Add script to extract strings for translation

hotosm · Jun 19, 2019 · 878b100 · 878b100
1 parent 59a7743
commit 878b100
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,6 +17,9 @@ server/web/static/dist/
 logs/
 *.log
 
+# Ignore translation exports
+translations/
+
 #python
 venv/
 *pyc

diff --git a/devops/translation/README.md b/devops/translation/README.md
@@ -0,0 +1,24 @@
+# Translation script
+
+These scripts facilitate the management of translations for the Tasking Manager.
+
+## Usual work flow:
+
+* Extract the strings from the Tasking Manager with `gen_strings.py`.
+* Upload the file manually to Transifex.
+* Invite people to translate them.
+* Use the local `tx` client to pull down any changes.
+* Open a PR to submit the new ones added in.
+
+## Scripts
+
+* `gen_strings.py` - Generates a JSON file of English translation strings
+
+## Howto extract strings
+
+* `pip3 install glob2` - install dependencies
+* Make sure you are in the root directory of the tasking manager and run the script: `python3 devops/translation/gen_strings.py`
+* You get three files:
+  * `translations`/`export`/`en.json` (Strings for translation from the current code base)
+  * `translations`/`export`/`en_new.json` (Strings that are new)
+  * `translations`/`export`/`en_removed.json` (Strings that are not used anymore)
diff --git a/devops/translation/gen_strings.py b/devops/translation/gen_strings.py
@@ -0,0 +1,83 @@
+#!/usr/bin/python3
+
+# Generates a JSON file of English translation strings
+# Furhter it exports two JSON files containing only the new and the removed strings
+#
+
+import os
+import re
+import glob2  # Eventually I can move this off of glob2 and instead use os's dir walk
+
+# Source files containing translation strings
+files = glob2.glob('./client/app/**')
+files.append('./client/index.html')
+
+# Regular expression to detect translation strings
+translate_string = re.compile('\{\{\s+[\'\"]([^\|]*)[\'\"]\s+\|\s+translate\s+\}\}')
+
+# Create directory for translations
+try:
+    os.makedirs("translations/export")
+except FileExistsError:
+    pass
+
+# Assemble the data
+strings = []
+for f in files:
+
+    # Read in frontend files
+    try:
+        text = open(f, 'r').readlines()
+    except Exception:
+        continue
+    for line in text:
+
+        # Check for translation string
+        matches = translate_string.findall(line)
+        if len(matches) > 0:
+            flag = 1
+
+            # Add translation string to strings array
+            for match in matches:
+                if match not in strings:
+                    strings.append(match)
+
+# Write results to disk
+output = open('translations/export/en.json', 'w')
+output.write('{\n')
+for string in sorted(strings):
+    output.write('\t"%s": "%s",\n' % (string, string))
+output.write('}\n')
+output.close()
+
+# Read in the just generated export of strings
+output_new = open('translations/export/en.json', 'r')
+new = output_new.readlines()[1:-1]
+output_new.close()
+
+# Read in the strings from the code base
+output_old = open('client/locale/en.json', 'r')
+old = output_old.readlines()[1:-1]
+output_old.close()
+
+# Compare strings between old and new stage create diff files
+output_missing = open('translations/export/en_removed.json', 'w')
+output_new = open('translations/export/en_new.json', 'w')
+
+cn = 0
+co = 0
+for word in new:
+    if word not in old:
+        output_new.write(word)
+        cn += 1
+for word in old:
+    if word not in new:
+        output_missing.write(word)
+        co += 1
+
+# Write files with string comparison to disk
+output_missing.close()
+output_new.close()
+
+# Print out some stats
+print(cn, ' have been added, ', co, ' are no longer with is.', len(strings), ' were extracted in total.')