From 878b100903486761fe571535006000b61022c098 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Wed, 19 Jun 2019 10:14:23 +0000 Subject: [PATCH] Add script to extract strings for translation --- .gitignore | 3 ++ devops/translation/README.md | 24 +++++++++ devops/translation/gen_strings.py | 83 +++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 devops/translation/README.md create mode 100644 devops/translation/gen_strings.py diff --git a/.gitignore b/.gitignore index f997918520..fdf86c2b90 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,9 @@ server/web/static/dist/ logs/ *.log +# Ignore translation exports +translations/ + #python venv/ *pyc diff --git a/devops/translation/README.md b/devops/translation/README.md new file mode 100644 index 0000000000..e20e7da029 --- /dev/null +++ b/devops/translation/README.md @@ -0,0 +1,24 @@ +# Translation script + +These scripts facilitate the management of translations for the Tasking Manager. + +## Usual work flow: + +* Extract the strings from the Tasking Manager with `gen_strings.py`. +* Upload the file manually to Transifex. +* Invite people to translate them. +* Use the local `tx` client to pull down any changes. +* Open a PR to submit the new ones added in. + +## Scripts + +* `gen_strings.py` - Generates a JSON file of English translation strings + +## Howto extract strings + +* `pip3 install glob2` - install dependencies +* Make sure you are in the root directory of the tasking manager and run the script: `python3 devops/translation/gen_strings.py` +* You get three files: + * `translations`/`export`/`en.json` (Strings for translation from the current code base) + * `translations`/`export`/`en_new.json` (Strings that are new) + * `translations`/`export`/`en_removed.json` (Strings that are not used anymore) diff --git a/devops/translation/gen_strings.py b/devops/translation/gen_strings.py new file mode 100644 index 0000000000..d5a95ed875 --- /dev/null +++ b/devops/translation/gen_strings.py @@ -0,0 +1,83 @@ +#!/usr/bin/python3 + +# Generates a JSON file of English translation strings +# Furhter it exports two JSON files containing only the new and the removed strings +# + +import os +import re +import glob2 # Eventually I can move this off of glob2 and instead use os's dir walk + +# Source files containing translation strings +files = glob2.glob('./client/app/**') +files.append('./client/index.html') + +# Regular expression to detect translation strings +translate_string = re.compile('\{\{\s+[\'\"]([^\|]*)[\'\"]\s+\|\s+translate\s+\}\}') + +# Create directory for translations +try: + os.makedirs("translations/export") +except FileExistsError: + pass + +# Assemble the data +strings = [] +for f in files: + + # Read in frontend files + try: + text = open(f, 'r').readlines() + except Exception: + continue + for line in text: + + # Check for translation string + matches = translate_string.findall(line) + if len(matches) > 0: + flag = 1 + + # Add translation string to strings array + for match in matches: + if match not in strings: + strings.append(match) + +# Write results to disk +output = open('translations/export/en.json', 'w') +output.write('{\n') +for string in sorted(strings): + output.write('\t"%s": "%s",\n' % (string, string)) +output.write('}\n') +output.close() + +# Read in the just generated export of strings +output_new = open('translations/export/en.json', 'r') +new = output_new.readlines()[1:-1] +output_new.close() + +# Read in the strings from the code base +output_old = open('client/locale/en.json', 'r') +old = output_old.readlines()[1:-1] +output_old.close() + +# Compare strings between old and new stage create diff files +output_missing = open('translations/export/en_removed.json', 'w') +output_new = open('translations/export/en_new.json', 'w') + +cn = 0 +co = 0 +for word in new: + if word not in old: + output_new.write(word) + cn += 1 +for word in old: + if word not in new: + output_missing.write(word) + co += 1 + +# Write files with string comparison to disk +output_missing.close() +output_new.close() + +# Print out some stats +print(cn, ' have been added, ', co, ' are no longer with is.', len(strings), ' were extracted in total.')