diff options
Diffstat (limited to 'scripts/code_tools/modified_strings.py')
-rw-r--r-- | scripts/code_tools/modified_strings.py | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/scripts/code_tools/modified_strings.py b/scripts/code_tools/modified_strings.py new file mode 100644 index 0000000000..6a763b6ec5 --- /dev/null +++ b/scripts/code_tools/modified_strings.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python +"""\ + +This script scans the SL codebase for translation-related strings. + +$LicenseInfo:firstyear=2020&license=viewerlgpl$ +Second Life Viewer Source Code +Copyright (C) 2020, Linden Research, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; +version 2.1 of the License only. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA +$/LicenseInfo$ +""" + +from __future__ import print_function + +import xml.etree.ElementTree as ET +import argparse +import os +import sys +from git import Repo, Git # requires the gitpython package +import pandas as pd +import re +from datetime import datetime + +usage_msg="""%(prog)s [options] + +Analyze the XUI configuration files to find text that may need to +be translated. Works by comparing two specified revisions, one +specified by --rev (default HEAD) and one specified by --rev_base +(default master). The script works by comparing xui contents of the +two revisions, and outputs a spreadsheet listing any areas of +difference. The target language must be specified using the --lang +option. Output is an excel file, which can be used as-is or imported +into google sheets. + +If the --rev revision already contains a translation for the text, it +will be included in the spreadsheet for reference. + +Normally you would want --rev_base to be the last revision to have +translations added, and --rev to be the tip of the current +project. You can find the last commit with translation work using "git log --grep INTL- | head" + +The --missing argument can be used to find all text with missing +translations, regardless of when it was added. If translations are being kept +reasonably current, you will normally not need this argument. +""" + +translate_attribs = [ + "title", + "short_title", + "value", + "label", + "label_selected", + "tool_tip", + "ignoretext", + "yestext", + "notext", + "canceltext", + "description", + "longdescription" +] + +def codify_for_print(val): + if isinstance(val, unicode): + return val.encode("utf-8") + else: + return unicode(val, 'utf-8').encode("utf-8") + +# Returns a dict of { name => xml_node } +def read_xml_elements(blob): + try: + contents = blob.data_stream.read() + except: + # default - pretend we read a file with no elements of interest. + # Parser will complain if it gets no elements at all. + contents = '<?xml version="1.0" encoding="utf-8" standalone="yes" ?><strings></strings>' + xml = ET.fromstring(contents) + elts = {} + for child in xml.iter(): + if "name" in child.attrib: + name = child.attrib['name'] + elts[name] = child + return elts + +def failure(*msg): + print(*msg) + sys.exit(1) + +# return True iff any element of lis is "in" thing +def has_any(thing,lis): + for l in lis: + if l in thing: + return True + return False + +def should_translate(filename, elt, field, val): + if val is None: + return False + # Should translate apply recursively? + if "translate" in elt.attrib and elt.attrib["translate"] == "false": + return False + if has_any(filename,["floater_test","floater_aaa","floater_ui_preview"]): + return False + if "TestString PleaseIgnore" in val: + return False + val = re.sub(r"\[.*?\]","",val) + if len(val) == 0: + return False + if val.isspace(): + return False + val = val.strip() + if val.isdigit(): + return False + if not re.search('\w+', val): + return False + if re.match(r"^\s*\d*\s*x\s*\d*\s*$", val): + #print(val, "matches resolution string, will ignore") + return False + # "value" attribute is a hairball, mostly used to encode non-display info but a few exceptions + if field == "value": + if elt.text is not None and len(elt.text) > 0: + #print("value has text, ignoring", ET.tostring(elt)) + return False + if has_any(elt.attrib,["label"]): + return False + if elt.tag in ["string","text"]: + return True + #print("including value attribute", val, "tag", elt.tag,"in", ET.tostring(elt)) + return True + return True + +def make_translation_table(mod_tree, base_tree, lang, args): + + xui_path = "{}/{}".format(xui_base, args.base_lang) + try: + mod_xui_tree = mod_tree[xui_path] + except: + failure("xui tree not found for base language", args.base_lang,"or target lang", lang) + + if args.rev == args.rev_base: + failure("Revs are the same, nothing to compare") + + + data = [] + # For all files to be checked for translations + all_en_strings = set() + for mod_blob in mod_xui_tree.traverse(): + filename = mod_blob.path + if mod_blob.type == "tree": # directory, skip + continue + + if args.verbose: + print(filename) + + try: + base_blob = base_tree[filename] + except: + if args.verbose: + print("No matching base file found for", filename) + base_blob = None + + try: + transl_filename = filename.replace("/xui/{}/".format(args.base_lang), "/xui/{}/".format(lang)) + transl_blob = mod_tree[transl_filename] + except: + if args.verbose: + print("No matching translation file found at", transl_filename) + transl_blob = None + + mod_dict = read_xml_elements(mod_blob) + base_dict = read_xml_elements(base_blob) + transl_dict = read_xml_elements(transl_blob) + + rows = 0 + for name in mod_dict.keys(): + if not name in base_dict or mod_dict[name].text != base_dict[name].text or (args.missing and not name in transl_dict): + elt = mod_dict[name] + val = elt.text + field = "text" + if should_translate(filename, elt, field, val): + transl_val = "--" + if name in transl_dict: + transl_val = transl_dict[name].text + if val in all_en_strings: + new_val = "(DUPLICATE)" + else: + new_val = "" + data.append([val, transl_val, new_val, "", "", filename, name, field]) + all_en_strings.add(val) + rows += 1 + for attr in translate_attribs: + if attr in mod_dict[name].attrib: + if name not in base_dict \ + or attr not in base_dict[name].attrib \ + or mod_dict[name].attrib[attr] != base_dict[name].attrib[attr] \ + or (args.missing and (not name in transl_dict or not attr in transl_dict[name].attrib)): + elt = mod_dict[name] + val = elt.attrib[attr] + if should_translate(filename, elt, attr, val): + transl_val = "--" + if name in transl_dict and attr in transl_dict[name].attrib: + transl_val = transl_dict[name].attrib[attr] + if val in all_en_strings: + new_val = "(DUPLICATE)" + else: + new_val = "" + #attr = attr + ":" + ET.tostring(elt) + data.append([val, transl_val, new_val, "", "", filename, name, attr]) + all_en_strings.add(val) + rows += 1 + + return data + +def find_deletions(mod_tree, base_tree, lang, args, f): + + transl_xui_path = "{}/{}".format(xui_base, lang) + try: + transl_xui_tree = mod_tree[transl_xui_path] + except: + failure("xui tree not found for base language", args.base_lang,"or target lang", lang) + + for transl_blob in transl_xui_tree.traverse(): + if transl_blob.type == "tree": # directory, skip + continue + transl_filename = transl_blob.path + mod_filename = transl_filename.replace("/xui/{}/".format(lang), "/xui/{}/".format(args.base_lang)) + #print("checking",transl_filename,"against",mod_filename) + try: + mod_blob = mod_tree[mod_filename] + except: + print(" delete file", transl_filename, file=f) + continue + mod_dict = read_xml_elements(mod_blob) + if len(mod_dict) == 0: + print(" delete file", transl_filename, file=f) + continue + transl_dict = read_xml_elements(transl_blob) + #print("mod vs transl", len(mod_dict), len(transl_dict)) + lines = 0 + for elt_key in transl_dict: + if not elt_key in mod_dict: + if lines == 0: + print(" in file", transl_filename, file=f) + lines += 1 + print(" delete element", elt_key, file=f) + else: + transl_elt = transl_dict[elt_key] + mod_elt = mod_dict[elt_key] + for a in transl_elt.attrib: + if not a in mod_elt.attrib: + if lines == 0: + print(" in file", transl_filename, file=f) + lines += 1 + print(" delete attribute", a, "from", elt_key, file=f) + if transl_elt.text and (not mod_elt.text): + if lines == 0: + print(" in file", transl_filename, file=f) + lines += 1 + print(" delete text from", elt_key, file=f) + +def save_translation_file(per_lang_data, aux_data, outfile): + + langs = sorted(per_lang_data.keys()) + print("Saving languages", ",".join(langs),"as",outfile) + + writer = pd.ExcelWriter(outfile, engine='xlsxwriter') + + workbook = writer.book + wrap_format = workbook.add_format({'text_wrap': True}) + bold_wrap_format = workbook.add_format({'text_wrap': True, 'bold': True}) + wrap_unlocked_format = workbook.add_format({'text_wrap': True, 'locked': False}) + + for lang in langs: + data = per_lang_data[lang] + num_translations = len(data) + cols = ["EN", "Previous Translation ({})".format(lang.upper()), "ENTER NEW TRANSLATION ({})".format(lang.upper()), "Translator Questions", "Notes", "File", "Element", "Field"] + df = pd.DataFrame(data, columns=cols) + df.to_excel(writer, index=False, sheet_name = lang.upper()) + + worksheet = writer.sheets[lang.upper()] + + # Translators primarily care about columns A-C, and should write + # only in column C. Hide the others. Set widths. + worksheet.protect() + worksheet.set_column('A:B', 60, wrap_format) + worksheet.set_column('C:C', 60, wrap_unlocked_format) + worksheet.set_column('D:E', 40, wrap_unlocked_format) + worksheet.set_column('F:F', 50, wrap_format, {'hidden': True}) + worksheet.set_column('G:H', 30, wrap_format, {'hidden': True}) + + # Lock the top row (column headers) in place while scrolling + worksheet.freeze_panes(1, 0) + print("Added", num_translations, "rows for language", lang) + + # Reference info, not for translation + for aux, data in aux_data.items(): + df = pd.DataFrame(data, columns = ["Key", "Value"]) + df.to_excel(writer, index=False, sheet_name=aux) + worksheet = writer.sheets[aux] + worksheet.set_column('A:A', 50, bold_wrap_format) + worksheet.set_column('B:B', 80, wrap_format) + + print("Writing", outfile) + writer.save() + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="analyze viewer xui files for needed translations", usage=usage_msg) + parser.add_argument("-v","--verbose", action="store_true", help="verbose flag") + parser.add_argument("--missing", action="store_true", default = False, help="include all fields for which a translation does not exist") + parser.add_argument("--deleted", action="store_true", default = False, help="show all translated entities which don't exist in english") + parser.add_argument("--skip_spreadsheet", action="store_true", default = False, help="skip creating the translation spreadsheet") + parser.add_argument("--rev", help="revision with modified strings, default HEAD", default="HEAD") + parser.add_argument("--rev_base", help="previous revision to compare against, default master", default="master") + parser.add_argument("--base_lang", help="base language, default en (normally leave unchanged - other values are only useful for testing)", default="en") + parser.add_argument("--lang", help="target languages, or 'all_valid' or 'supported'; default is 'supported'", nargs="+", default = ["supported"]) + args = parser.parse_args() + + cwd = os.getcwd() + rootdir = Git(cwd).rev_parse("--show-toplevel") + repo = Repo(rootdir) + try: + mod_commit = repo.commit(args.rev) + except: + failure(args.rev,"is not a valid commit") + try: + base_commit = repo.commit(args.rev_base) + except: + failure(args.rev_base,"is not a valid commit") + + print("Will identify changes in", args.rev, "not present in", args.rev_base) + if args.missing: + print("Will also include any text for which no corresponding translation exists, regardless of when it was added") + sys.stdout.flush() + + mod_tree = mod_commit.tree + base_tree = base_commit.tree + + xui_base = "indra/newview/skins/default/xui" + xui_base_tree = mod_tree[xui_base] + + # Find target languages + # all languages present in the codebase + valid_langs = [tree.name.lower() for tree in xui_base_tree if tree.name.lower() != args.base_lang.lower()] + # offically supported languages + supported_langs = ["fr", "es", "it", "pt", "ja", "de"] + langs = [l.lower() for l in args.lang] + if "supported" in args.lang: + langs = supported_langs + if "all_valid" in args.lang: + langs = valid_langs + langs = sorted(langs) + for lang in langs: + if not lang in valid_langs: + failure("Unknown target language {}. Valid values are {}".format(lang,", ".join(sorted(valid_langs) + ["all_valid","supported"]))) + print("Target language(s) are", ",".join(sorted(langs))) + sys.stdout.flush() + + outfile = "SL_Translations.xlsx" + try: + f = open(outfile,"a+") + f.close() + except: + failure("Can't write to output file",outfile,". Is it already open?") + + aux_data = { "REFERENCE": [["Command", " ".join(sys.argv)], + ["Date", str(datetime.now())], + ["Mod Commit", mod_commit.hexsha], + ["Base Commit", base_commit.hexsha], + ] } + + if not args.skip_spreadsheet: + per_lang_data = {} + for lang in langs: + print("Creating spreadsheet for language", lang) + sys.stdout.flush() + + per_lang_data[lang] = make_translation_table(mod_tree, base_tree, lang, args) + + print("Saving output file", outfile) + save_translation_file(per_lang_data, aux_data, outfile) + + if args.deleted: + deletion_file = "Translate_deletions.txt" + print("Saving deletion info to", deletion_file) + with open(deletion_file,"w") as f: + for lang in langs: + find_deletions(mod_tree, base_tree, lang, args, f) + |