From 8281b8c3e53a13b702d443bf0c0cd672d91537ca Mon Sep 17 00:00:00 2001 From: zuhdil Date: Fri, 31 Oct 2025 02:10:06 +0700 Subject: [PATCH] Add JSONB array translation support for project highlights and outcomes --- backend/src/gpml/db/topic/translation.sql | 5 +- backend/src/gpml/domain/translation.clj | 6 +- .../src/gpml/service/topic/translation.clj | 116 ++++++- .../gpml/service/topic/translation_test.clj | 151 +++++++++- doc/GOOGLE_TRANSLATE_INTEGRATION.md | 284 +++++++++++++++++- 5 files changed, 539 insertions(+), 23 deletions(-) diff --git a/backend/src/gpml/db/topic/translation.sql b/backend/src/gpml/db/topic/translation.sql index e04896e89..a29428f02 100644 --- a/backend/src/gpml/db/topic/translation.sql +++ b/backend/src/gpml/db/topic/translation.sql @@ -123,6 +123,7 @@ WHERE id IN (:v*:topic-ids); -- :name get-project-source-data :? :* -- Get source language content for projects -- FIXED: Removed non-existent 'description' column, added missing text fields +-- Includes JSONB array fields (highlights, outcomes) for translation SELECT 'project' AS topic_type, id AS topic_id, @@ -131,6 +132,8 @@ SELECT summary, background, -- Add missing translatable text field purpose, -- Add missing translatable text field - info_docs -- Add missing translatable field + info_docs, -- Add missing translatable field + highlights, -- JSONB array field (list of text items) + outcomes -- JSONB array field (list of text items) FROM project WHERE id IN (:v*:topic-ids); \ No newline at end of file diff --git a/backend/src/gpml/domain/translation.clj b/backend/src/gpml/domain/translation.clj index 2d4ca6b56..653ce5066 100644 --- a/backend/src/gpml/domain/translation.clj +++ b/backend/src/gpml/domain/translation.clj @@ -48,7 +48,7 @@ :q15 :q16 :q17 :q18 :q19 :q20 :q21 :q22 :q23 :q24 :title :summary :info_docs} "case_study" #{:title :summary :challenge_and_solution} - "project" #{:title :summary :background :purpose :info_docs}}) + "project" #{:title :summary :background :purpose :info_docs :highlights :outcomes}}) (def topic-type->table "Maps topic types to their database table names. @@ -116,7 +116,9 @@ :summary :summary :background :background :purpose :purpose - :info_docs :info_docs} + :info_docs :info_docs + :highlights :highlights + :outcomes :outcomes} "initiative" {:title :q2 :summary :q3 :info_docs :info_docs diff --git a/backend/src/gpml/service/topic/translation.clj b/backend/src/gpml/service/topic/translation.clj index 51c47b1b6..bf6ddc0ff 100644 --- a/backend/src/gpml/service/topic/translation.clj +++ b/backend/src/gpml/service/topic/translation.clj @@ -201,32 +201,110 @@ (group-by :language records)) (defn- extract-translatable-texts - "Extract all translatable text fields from source data. + "Extract all translatable text fields from source data, including JSONB arrays. Returns {:texts [vector of text strings] - :index-map [vector of {:topic-key [type id] :field field-keyword}]} + :index-map [vector of {:topic-key [type id] :field field-keyword + :array-index idx :object-key key}]} - The index-map allows mapping translated texts back to their source resource/field." + The index-map allows mapping translated texts back to their source resource/field. + For JSONB arrays: + - :array-index tracks position in array (nil for non-array fields) + - :object-key tracks property name in object (e.g., :text for highlights, nil for simple arrays)" [source-data-records] (let [texts (atom []) index-map (atom [])] (doseq [record source-data-records] (let [topic-type (:topic_type record) topic-id (:topic_id record) + topic-key [topic-type topic-id] translatable-fields (get dom.translation/translatable-fields-by-topic topic-type #{})] (doseq [field translatable-fields] - (when-let [text (get record field)] - (when (and (string? text) (not (clojure.string/blank? text))) - (swap! texts conj text) - (swap! index-map conj {:topic-key [topic-type topic-id] - :field field})))))) + (let [value (get record field)] + (cond + ;; Case 1: Regular text field (string) + (and (string? value) (not (clojure.string/blank? value))) + (do + (swap! texts conj value) + (swap! index-map conj {:topic-key topic-key + :field field + :array-index nil + :object-key nil})) + + ;; Case 2: JSONB array field (vector) + (sequential? value) + (doseq [[idx item] (map-indexed vector value)] + (cond + ;; Simple string in array (e.g., outcomes) + (and (string? item) (not (clojure.string/blank? item))) + (do + (swap! texts conj item) + (swap! index-map conj {:topic-key topic-key + :field field + :array-index idx + :object-key nil})) + + ;; Object with :text property (e.g., highlights: [{:url "..." :text "..."}]) + (and (map? item) (:text item) (string? (:text item)) (not (clojure.string/blank? (:text item)))) + (do + (swap! texts conj (:text item)) + (swap! index-map conj {:topic-key topic-key + :field field + :array-index idx + :object-key :text})) + + ;; Skip nil, numbers, or objects without text property + :else nil)) + + ;; Case 3: Nil or unsupported type - skip + :else nil))))) {:texts @texts :index-map @index-map})) +(defn- build-source-data-map + "Build a map of source data indexed by [topic-type topic-id] for efficient lookup. + Used to preserve non-translated properties in JSONB object arrays (e.g., :url in highlights). + Returns map of {[topic-type topic-id] {field value}}." + [source-data-records] + (reduce (fn [acc record] + (let [topic-key [(:topic_type record) (:topic_id record)]] + (assoc acc topic-key (dissoc record :topic_type :topic_id :language)))) + {} + source-data-records)) + (defn- map-translations-back - "Map translated texts back to their original resource/field locations. - Returns map of {[topic-type topic-id] {:content {field translated-text}}}}" - [translated-texts index-map] - (reduce (fn [acc [text {:keys [topic-key field]}]] - (assoc-in acc [topic-key :content field] text)) + "Map translated texts back to their original resource/field locations, reconstructing JSONB arrays. + Returns map of {[topic-type topic-id] {:content {field translated-text-or-array}}} + + For JSONB arrays: + - Simple arrays (outcomes): Reconstructs as vector of strings + - Object arrays (highlights): Reconstructs as vector of maps with translated :text property + + Note: For object arrays, we need source data to preserve non-translated properties (e.g., :url)" + [translated-texts index-map source-data-map] + (reduce (fn [acc [text {:keys [topic-key field array-index object-key]}]] + (cond + ;; Regular field (no array) + (and (nil? array-index) (nil? object-key)) + (assoc-in acc [topic-key :content field] text) + + ;; Simple array item (e.g., outcomes: ["text1", "text2"]) + (and array-index (nil? object-key)) + (update-in acc [topic-key :content field] + (fn [arr] + (let [v (or arr [])] + (assoc v array-index text)))) + + ;; Object array item with :text property (e.g., highlights: [{:url "..." :text "..."}]) + (and array-index object-key) + (update-in acc [topic-key :content field] + (fn [arr] + (let [v (or arr []) + ;; Get original object from source data to preserve other properties (e.g., :url) + source-obj (get-in source-data-map [topic-key field array-index]) + ;; Merge translated text with original object structure + updated-obj (assoc source-obj object-key text)] + (assoc v array-index updated-obj)))) + + :else acc)) {} (map vector translated-texts index-map))) @@ -254,13 +332,16 @@ (defn- build-same-language-records "Build translation records for same-language sources (copy source content as-is). - Returns map of {[topic-type topic-id] {:content {field value}}}." + Returns map of {[topic-type topic-id] {:content {field value}}}. + + JSONB arrays are preserved as-is (no translation needed when source == target language)." [source-records] (reduce (fn [acc record] (let [topic-type (:topic_type record) topic-id (:topic_id record) topic-key [topic-type topic-id] translatable-fields (get dom.translation/translatable-fields-by-topic topic-type #{}) + ;; Select only translatable fields, excluding metadata fields (topic_type, topic_id, language) content (select-keys record translatable-fields)] (assoc acc topic-key {:content content}))) {} @@ -348,13 +429,16 @@ {:success? true :translations filtered-result}) ;; Step 3d: Translate records grouped by source language - (let [;; Translate each source language group separately + (let [;; Build source data map for preserving JSONB object properties (e.g., :url in highlights) + source-data-map (build-source-data-map source-data) + + ;; Translate each source language group separately translated-maps (reduce (fn [acc [source-lang records]] (let [{:keys [texts index-map]} (extract-translatable-texts records) translated-texts (port.translate/translate-texts translate-adapter texts language source-lang) - mapped (map-translations-back translated-texts index-map)] + mapped (map-translations-back translated-texts index-map source-data-map)] (merge acc mapped))) {} grouped-by-language) diff --git a/backend/test/gpml/service/topic/translation_test.clj b/backend/test/gpml/service/topic/translation_test.clj index 1aa7ea0d5..dd1ab812b 100644 --- a/backend/test/gpml/service/topic/translation_test.clj +++ b/backend/test/gpml/service/topic/translation_test.clj @@ -315,4 +315,153 @@ ;; Mock adapter adds [ES] prefix when translating to Spanish (is (clojure.string/starts-with? (get-in translation [:content :title]) "[ES]")) ;; Verify translation was created (not skipped) - (is (= "es" (:language translation)))))))) \ No newline at end of file + (is (= "es" (:language translation)))))))) +;; JSONB Array Translation Tests + +(deftest get-bulk-translations-with-auto-translate-project-outcomes-array-test + (let [system (ig/init fixtures/*system* [:duct.database.sql/hikaricp :duct/const]) + config (get system [:duct/const :gpml.config/common]) + conn (test-util/db-test-conn)] + ;; Setup: Insert project with outcomes array (simple string array) + (jdbc/execute! conn ["INSERT INTO language (iso_code, english_name, native_name) VALUES ('en', 'English', 'English'), ('es', 'Spanish', 'Español') ON CONFLICT (iso_code) DO NOTHING"]) + (jdbc/execute! conn ["INSERT INTO project (id, language, title, summary, outcomes) + VALUES (99930, 'en', 'Climate Project', 'Climate initiative', + '[\"10,000 tons plastic removed\", \"50 communities engaged\", \"3 recycling centers\"]'::jsonb) + ON CONFLICT (id) DO UPDATE SET language = EXCLUDED.language, title = EXCLUDED.title, + summary = EXCLUDED.summary, outcomes = EXCLUDED.outcomes"]) + + (testing "Project outcomes array should be translated" + (let [topic-filters [{:topic-type "project" :topic-id 99930}] + result (svc.topic.translation/get-bulk-translations-with-auto-translate config topic-filters "es" nil)] + (is (:success? result)) + (is (= 1 (count (:translations result)))) + (let [translation (first (:translations result)) + outcomes (get-in translation [:content :outcomes])] + ;; Verify outcomes is an array + (is (sequential? outcomes)) + ;; Verify all 3 items were translated + (is (= 3 (count outcomes))) + ;; Mock adapter adds [ES] prefix to each translated string + (is (every? #(clojure.string/starts-with? % "[ES]") outcomes))))))) + +(deftest get-bulk-translations-with-auto-translate-project-highlights-object-array-test + (let [system (ig/init fixtures/*system* [:duct.database.sql/hikaricp :duct/const]) + config (get system [:duct/const :gpml.config/common]) + conn (test-util/db-test-conn)] + ;; Setup: Insert project with highlights array (array of objects with url + text) + (jdbc/execute! conn ["INSERT INTO language (iso_code, english_name, native_name) VALUES ('en', 'English', 'English'), ('es', 'Spanish', 'Español') ON CONFLICT (iso_code) DO NOTHING"]) + (jdbc/execute! conn ["INSERT INTO project (id, language, title, summary, highlights) + VALUES (99931, 'en', 'Ocean Project', 'Ocean conservation', + '[{\"url\": \"https://example.com/report\", \"text\": \"Annual impact report published\"}, + {\"url\": \"\", \"text\": \"Award-winning innovation\"}, + {\"url\": \"https://news.com/article\", \"text\": \"Featured in media\"}]'::jsonb) + ON CONFLICT (id) DO UPDATE SET language = EXCLUDED.language, title = EXCLUDED.title, + summary = EXCLUDED.summary, highlights = EXCLUDED.highlights"]) + + (testing "Project highlights object array should translate text property and preserve url" + (let [topic-filters [{:topic-type "project" :topic-id 99931}] + result (svc.topic.translation/get-bulk-translations-with-auto-translate config topic-filters "es" nil)] + (is (:success? result)) + (is (= 1 (count (:translations result)))) + (let [translation (first (:translations result)) + highlights (get-in translation [:content :highlights])] + ;; Verify highlights is an array + (is (sequential? highlights)) + ;; Verify all 3 items exist + (is (= 3 (count highlights))) + ;; Verify each item is a map with :url and :text properties + (is (every? map? highlights)) + (is (every? #(contains? % :url) highlights)) + (is (every? #(contains? % :text) highlights)) + ;; Verify URLs are preserved exactly (not translated) + (is (= "https://example.com/report" (:url (nth highlights 0)))) + (is (= "" (:url (nth highlights 1)))) + (is (= "https://news.com/article" (:url (nth highlights 2)))) + ;; Verify text properties were translated (mock adapter adds [ES] prefix) + (is (every? #(clojure.string/starts-with? (:text %) "[ES]") highlights))))))) + +(deftest get-bulk-translations-with-auto-translate-project-both-arrays-test + (let [system (ig/init fixtures/*system* [:duct.database.sql/hikaricp :duct/const]) + config (get system [:duct/const :gpml.config/common]) + conn (test-util/db-test-conn)] + ;; Setup: Insert project with both outcomes and highlights arrays + (jdbc/execute! conn ["INSERT INTO language (iso_code, english_name, native_name) VALUES ('en', 'English', 'English'), ('es', 'Spanish', 'Español') ON CONFLICT (iso_code) DO NOTHING"]) + (jdbc/execute! conn ["INSERT INTO project (id, language, title, summary, outcomes, highlights) + VALUES (99932, 'en', 'Full Project', 'Complete project', + '[\"Outcome 1\", \"Outcome 2\"]'::jsonb, + '[{\"url\": \"http://test.com\", \"text\": \"Highlight 1\"}, {\"url\": \"\", \"text\": \"Highlight 2\"}]'::jsonb) + ON CONFLICT (id) DO UPDATE SET language = EXCLUDED.language, title = EXCLUDED.title, + summary = EXCLUDED.summary, outcomes = EXCLUDED.outcomes, + highlights = EXCLUDED.highlights"]) + + (testing "Project with both outcomes and highlights arrays" + (let [topic-filters [{:topic-type "project" :topic-id 99932}] + result (svc.topic.translation/get-bulk-translations-with-auto-translate config topic-filters "es" nil)] + (is (:success? result)) + (is (= 1 (count (:translations result)))) + (let [translation (first (:translations result)) + outcomes (get-in translation [:content :outcomes]) + highlights (get-in translation [:content :highlights])] + ;; Verify both arrays exist and are correct type + (is (sequential? outcomes)) + (is (sequential? highlights)) + ;; Verify outcomes array structure (simple strings) + (is (= 2 (count outcomes))) + (is (every? string? outcomes)) + (is (every? #(clojure.string/starts-with? % "[ES]") outcomes)) + ;; Verify highlights array structure (objects with url + text) + (is (= 2 (count highlights))) + (is (every? map? highlights)) + (is (= "http://test.com" (:url (first highlights)))) + (is (every? #(clojure.string/starts-with? (:text %) "[ES]") highlights))))))) + +(deftest get-bulk-translations-with-auto-translate-project-empty-arrays-test + (let [system (ig/init fixtures/*system* [:duct.database.sql/hikaricp :duct/const]) + config (get system [:duct/const :gpml.config/common]) + conn (test-util/db-test-conn)] + ;; Setup: Insert project with empty arrays + (jdbc/execute! conn ["INSERT INTO language (iso_code, english_name, native_name) VALUES ('en', 'English', 'English'), ('es', 'Spanish', 'Español') ON CONFLICT (iso_code) DO NOTHING"]) + (jdbc/execute! conn ["INSERT INTO project (id, language, title, summary, outcomes, highlights) + VALUES (99933, 'en', 'Empty Arrays Project', 'Project with empty arrays', + '[]'::jsonb, '[]'::jsonb) + ON CONFLICT (id) DO UPDATE SET language = EXCLUDED.language, title = EXCLUDED.title, + summary = EXCLUDED.summary, outcomes = EXCLUDED.outcomes, + highlights = EXCLUDED.highlights"]) + + (testing "Project with empty arrays should handle gracefully" + (let [topic-filters [{:topic-type "project" :topic-id 99933}] + result (svc.topic.translation/get-bulk-translations-with-auto-translate config topic-filters "es" nil)] + (is (:success? result)) + (is (= 1 (count (:translations result)))) + (let [translation (first (:translations result))] + ;; Empty arrays are not included in translation (no translatable content) + ;; This is correct behavior - we only translate fields with content + (is (nil? (get-in translation [:content :outcomes]))) + (is (nil? (get-in translation [:content :highlights]))) + ;; Verify other fields were still translated + (is (clojure.string/starts-with? (get-in translation [:content :title]) "[ES]"))))))) + +(deftest get-bulk-translations-with-auto-translate-project-null-arrays-test + (let [system (ig/init fixtures/*system* [:duct.database.sql/hikaricp :duct/const]) + config (get system [:duct/const :gpml.config/common]) + conn (test-util/db-test-conn)] + ;; Setup: Insert project with null arrays + (jdbc/execute! conn ["INSERT INTO language (iso_code, english_name, native_name) VALUES ('en', 'English', 'English'), ('es', 'Spanish', 'Español') ON CONFLICT (iso_code) DO NOTHING"]) + (jdbc/execute! conn ["INSERT INTO project (id, language, title, summary, outcomes, highlights) + VALUES (99934, 'en', 'Null Arrays Project', 'Project with null arrays', + NULL, NULL) + ON CONFLICT (id) DO UPDATE SET language = EXCLUDED.language, title = EXCLUDED.title, + summary = EXCLUDED.summary, outcomes = EXCLUDED.outcomes, + highlights = EXCLUDED.highlights"]) + + (testing "Project with null arrays should handle gracefully" + (let [topic-filters [{:topic-type "project" :topic-id 99934}] + result (svc.topic.translation/get-bulk-translations-with-auto-translate config topic-filters "es" nil)] + (is (:success? result)) + (is (= 1 (count (:translations result)))) + (let [translation (first (:translations result))] + ;; Verify null fields are not included in translation + (is (nil? (get-in translation [:content :outcomes]))) + (is (nil? (get-in translation [:content :highlights]))) + ;; Verify other fields were still translated + (is (clojure.string/starts-with? (get-in translation [:content :title]) "[ES]"))))))) diff --git a/doc/GOOGLE_TRANSLATE_INTEGRATION.md b/doc/GOOGLE_TRANSLATE_INTEGRATION.md index 9d3b7b237..fe9ebaecf 100644 --- a/doc/GOOGLE_TRANSLATE_INTEGRATION.md +++ b/doc/GOOGLE_TRANSLATE_INTEGRATION.md @@ -2106,6 +2106,283 @@ curl "http://localhost:3000/api/bulk-translations?topics=policy:1,event:2&langua --- +## JSONB Array Field Translation Strategy + +### Overview + +The `project` topic type contains two JSONB array fields that require translation: + +1. **`outcomes`**: Simple JSONB array of text strings + - Schema: `["text1", "text2", "text3", ...]` + - Translation: Translate each string directly + +2. **`highlights`**: JSONB array of objects with `url` and `text` properties + - Schema: `[{url: "https://...", text: "Highlight text"}, ...]` + - Translation: Only translate the `text` property, preserve `url` as-is + +### Data Structures + +**Example Source Data (English)**: +```json +{ + "topic_type": "project", + "topic_id": 123, + "language": "en", + "title": "Climate Initiative", + "outcomes": [ + "10,000 tons of plastic removed", + "50 communities engaged", + "3 recycling centers established" + ], + "highlights": [ + {"url": "https://example.com/report", "text": "Annual impact report published"}, + {"url": "", "text": "Award-winning innovation"}, + {"url": "https://news.com/article", "text": "Featured in international media"} + ] +} +``` + +**Example Translated Data (Spanish)**: +```json +{ + "topic_type": "project", + "topic_id": 123, + "language": "es", + "content": { + "title": "Iniciativa Climática", + "outcomes": [ + "10,000 toneladas de plástico eliminadas", + "50 comunidades involucradas", + "3 centros de reciclaje establecidos" + ], + "highlights": [ + {"url": "https://example.com/report", "text": "Informe de impacto anual publicado"}, + {"url": "", "text": "Innovación galardonada"}, + {"url": "https://news.com/article", "text": "Destacado en medios internacionales"} + ] + } +} +``` + +### Implementation Approach + +#### Step 1: Field Extraction with Index Tracking + +Extract translatable texts from JSONB arrays while tracking their position and structure: + +```clojure +(defn extract-translatable-texts-from-resource + "Extract translatable texts from a resource, handling JSONB arrays. + Returns {:texts [...] :index-map [...]}" + [resource translatable-fields] + (let [texts (atom []) + index-map (atom []) + resource-key [(:topic_type resource) (:topic_id resource)]] + + (doseq [field translatable-fields] + (let [value (get resource field)] + (cond + ;; Case 1: Regular text field (string) + (string? value) + (do + (swap! texts conj value) + (swap! index-map conj {:resource-key resource-key + :field field + :array-index nil + :object-key nil})) + + ;; Case 2: JSONB array field + (sequential? value) + (doseq [[idx item] (map-indexed vector value)] + (cond + ;; Simple string in array (e.g., outcomes) + (string? item) + (do + (swap! texts conj item) + (swap! index-map conj {:resource-key resource-key + :field field + :array-index idx + :object-key nil})) + + ;; Object with text property (e.g., highlights) + (and (map? item) (:text item) (string? (:text item))) + (do + (swap! texts conj (:text item)) + (swap! index-map conj {:resource-key resource-key + :field field + :array-index idx + :object-key :text})) + + ;; Skip nil, numbers, or objects without text property + :else nil)) + + ;; Case 3: Nil or unsupported type - skip + :else nil))) + + {:texts @texts :index-map @index-map})) +``` + +**Example Extraction Result**: +```clojure +;; Input: project with outcomes and highlights +{:topic_type "project" :topic_id 123 + :outcomes ["10,000 tons removed", "50 communities engaged"] + :highlights [{:url "http://..." :text "Report published"} + {:url "" :text "Award-winning"}]} + +;; Output: +{:texts ["10,000 tons removed" + "50 communities engaged" + "Report published" + "Award-winning"] + + :index-map [{:resource-key ["project" 123] :field :outcomes :array-index 0 :object-key nil} + {:resource-key ["project" 123] :field :outcomes :array-index 1 :object-key nil} + {:resource-key ["project" 123] :field :highlights :array-index 0 :object-key :text} + {:resource-key ["project" 123] :field :highlights :array-index 1 :object-key :text}]} +``` + +#### Step 2: Translation + +Translate all extracted texts in a single batch: + +```clojure +;; All texts from extraction sent to Google Translate in one API call +(port/translate-texts adapter + ["10,000 tons removed" "50 communities engaged" "Report published" "Award-winning"] + "es" ;; target language + "en") ;; source language + +;; Result: +["10,000 toneladas eliminadas" "50 comunidades involucradas" + "Informe publicado" "Galardonado"] +``` + +#### Step 3: Reconstruction with Structure Preservation + +Map translated texts back to their original structure: + +```clojure +(defn map-translations-back-to-resources + "Map translated texts back to resources, preserving JSONB array structure." + [translated-texts index-map source-data-map] + (reduce + (fn [acc [text {:keys [resource-key field array-index object-key]}]] + (cond + ;; Regular field (no array) + (and (nil? array-index) (nil? object-key)) + (assoc-in acc [resource-key :content field] text) + + ;; Simple array item (e.g., outcomes) + (and array-index (nil? object-key)) + (update-in acc [resource-key :content field] + (fn [arr] + (let [v (or arr [])] + (assoc v array-index text)))) + + ;; Object array item with text property (e.g., highlights) + (and array-index object-key) + (update-in acc [resource-key :content field] + (fn [arr] + (let [v (or arr []) + ;; Get original object from source data to preserve other properties (url) + source-obj (get-in source-data-map [resource-key field array-index]) + ;; Merge translated text with original object structure + updated-obj (assoc source-obj object-key text)] + (assoc v array-index updated-obj)))) + + :else acc)) + {} + (map vector translated-texts index-map))) +``` + +**Example Reconstruction Result**: +```clojure +;; Input: +;; - Translated texts: ["10,000 toneladas eliminadas" "50 comunidades involucradas" +;; "Informe publicado" "Galardonado"] +;; - Index map from Step 1 +;; - Source data (for preserving url fields) + +;; Output: +{["project" 123] + {:content + {:outcomes ["10,000 toneladas eliminadas" + "50 comunidades involucradas"] + :highlights [{:url "http://..." :text "Informe publicado"} + {:url "" :text "Galardonado"}]}}} +``` + +### Key Design Decisions + +1. **URL Preservation**: For `highlights`, the `url` property is never translated, only `text` +2. **Array Order**: Index tracking ensures translated texts map back to correct array positions +3. **Batch Efficiency**: All array items translated in single Google Translate API call +4. **Type Safety**: Non-string elements (nil, numbers, objects without `text` property) are skipped +5. **Structure Preservation**: Original JSONB structure (arrays, objects) maintained after translation +6. **Storage Format**: Translated arrays stored as JSONB in same format as source + +### Database Storage + +**Before (Source Data in `project` table)**: +```sql +-- project table (id=123, language='en') +highlights: [ + {"url": "https://example.com/report", "text": "Annual impact report published"}, + {"url": "", "text": "Award-winning innovation"} +] +outcomes: ["10,000 tons removed", "50 communities engaged"] +``` + +**After (Translated Data in `topic_translation` table)**: +```sql +-- topic_translation table (topic_type='project', topic_id=123, language='es') +content: { + "title": "Iniciativa Climática", + "highlights": [ + {"url": "https://example.com/report", "text": "Informe de impacto anual publicado"}, + {"url": "", "text": "Innovación galardonada"} + ], + "outcomes": ["10,000 toneladas eliminadas", "50 comunidades involucradas"] +} +``` + +### Implementation Checklist for Phase 3 + +When implementing the auto-translation service (`get-bulk-translations-with-auto-translate`), ensure: + +- [ ] **Extraction Logic**: Detect JSONB arrays vs strings when extracting translatable fields +- [ ] **Index Tracking**: Track `array-index` and `object-key` for each extracted text +- [ ] **Source Data Preservation**: Keep original source data accessible for merging during reconstruction +- [ ] **Object Property Filtering**: For object arrays, only extract string values from `text` property +- [ ] **Reconstruction Logic**: Rebuild JSONB arrays with correct structure (simple arrays vs object arrays) +- [ ] **URL Preservation**: Ensure `url` properties in highlights are never modified +- [ ] **Type Validation**: Handle edge cases (nil items, empty arrays, objects without `text` property) + +### Testing Scenarios + +1. **Simple Array Translation** (outcomes): + - Source: `["English text 1", "English text 2"]` + - Expected: `["Texto en español 1", "Texto en español 2"]` + +2. **Object Array Translation** (highlights): + - Source: `[{url: "http://...", text: "English"}, {url: "", text: "Text"}]` + - Expected: `[{url: "http://...", text: "Español"}, {url: "", text: "Texto"}]` + +3. **Mixed Content**: + - Arrays with nil items, empty strings, objects with missing `text` property + - Should skip invalid items gracefully + +4. **Empty Arrays**: + - Source: `{outcomes: [], highlights: []}` + - Expected: `{outcomes: [], highlights: []}` (preserved, no translation) + +5. **Null Fields**: + - Source: `{outcomes: null, highlights: null}` + - Expected: Fields not included in translation (only translate non-null fields) + +--- + ## Conclusion This implementation plan provides a comprehensive, production-ready approach to integrating Google Translate API into the backend translation system. The design prioritizes: @@ -2120,12 +2397,13 @@ The existing translation infrastructure is well-designed and requires minimal ch --- -**Document Version**: 1.7 -**Last Updated**: 2025-10-29 +**Document Version**: 1.8 +**Last Updated**: 2025-10-30 **Author**: Claude Code -**Status**: Phase 2 Complete +**Status**: Phase 2 Complete + JSONB Array Translation Strategy Added **Changelog**: +- v1.8 (2025-10-30): **Added JSONB Array Translation Strategy** - Documented comprehensive approach for translating project `outcomes` (simple string arrays) and `highlights` (object arrays with url+text). Updated `translatable-fields-by-topic` and `normalized-field->db-column` mappings to include these fields. Updated SQL query `get-project-source-data` to fetch JSONB columns. Provided detailed implementation guidance with extraction, translation, and reconstruction logic. Includes code examples, testing scenarios, and Phase 3 implementation checklist. - v1.7 (2025-10-29): **Phase 2 Complete - Schema Alignment Implemented** - Normalized all field names to match detail API response schema. Policy: `abstract` → `summary`, Event: `description` → `summary`, Technology: `name` → `title` + `remarks` → `summary`, Initiative: JSONB casting `q2` → `title` + `q3` → `summary`, Case Study: `description` → `summary` + added `challenge_and_solution`, Project: Fixed critical bug (removed non-existent `description` field, added `background` and `purpose`). All SQL queries updated with aliases, all 91 tests passing. API now consistently uses `title` and `summary` across all topic types. - v1.6 (2025-10-21): **Added source language detection strategy** - Documents critical schema constraint (one language per topic record), current limitation (hardcoded English assumption), problem scenarios (wasteful same-language translation, incorrect source language), and proposed solution (add language column to queries, filter same-language records, group by source language for efficient batching). Includes implementation checklist, benefits analysis, and updated Phase 3 tasks with source language handling requirements. - v1.5 (2025-10-14): **Added translation cache invalidation strategy** - Strategy 1 (Immediate Deletion): Delete all translations when source content is updated to prevent stale translations. Includes implementation details, trade-offs analysis, and future optimization path (Strategy 2: Smart Field-Level Invalidation).