diff options
author | Brad Payne (Vir Linden) <vir@lindenlab.com> | 2020-08-13 14:54:45 +0100 |
---|---|---|
committer | Brad Payne (Vir Linden) <vir@lindenlab.com> | 2020-08-13 14:54:45 +0100 |
commit | f28437adc7ae856d55edfe77596f5ce7331778f4 (patch) | |
tree | 60b7caa19cc1eb74733bdae6566644aaeada9665 /scripts/code_tools | |
parent | 1f9852f04c4c430631d10794cce4a3e8186470b5 (diff) |
SL-13705 - modified_strings.py handles more cases
Diffstat (limited to 'scripts/code_tools')
-rw-r--r-- | scripts/code_tools/modified_strings.py | 341 |
1 files changed, 183 insertions, 158 deletions
diff --git a/scripts/code_tools/modified_strings.py b/scripts/code_tools/modified_strings.py index bb42628f55..dc3357fe8e 100644 --- a/scripts/code_tools/modified_strings.py +++ b/scripts/code_tools/modified_strings.py @@ -1,158 +1,183 @@ -"""\
-
-This module contains tools for scanning the SL codebase for translation-related strings.
-
-$LicenseInfo:firstyear=2020&license=viewerlgpl$
-Second Life Viewer Source Code
-Copyright (C) 2020, Linden Research, Inc.
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation;
-version 2.1 of the License only.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
-$/LicenseInfo$
-"""
-
-from __future__ import print_function
-
-# packages required include: gitpython, pandas
-
-import xml.etree.ElementTree as ET
-import argparse
-import os
-import sys
-from git import Repo, Git # requires the gitpython package
-import pandas as pd
-
-translate_attribs = [
- "title",
- "short_title",
- "value",
- "label",
- "label_selected",
- "tool_tip",
- "ignoretext",
- "yestext",
- "notext",
- "canceltext",
- "description",
- "longdescription"
-]
-
-def codify(val):
- if isinstance(val, unicode):
- return val.encode("utf-8")
- else:
- return unicode(val, 'utf-8').encode("utf-8")
-
-def failure(*msg):
- print(*msg)
- sys.exit(1)
-
-if __name__ == "__main__":
-
- parser = argparse.ArgumentParser(description="analyze viewer xui files")
- parser.add_argument("--verbose", action="store_true", help="verbose flag")
- parser.add_argument("--rev", help="revision with modified strings, default HEAD", default="HEAD")
- parser.add_argument("--rev_base", help="previous revision to compare against, default master", default="master")
- parser.add_argument("--base_lang", help="base language, default en (useful only for testing)", default="en")
- parser.add_argument("--lang", help="target language, default fr", default="fr")
- #parser.add_argument("infilename", help="name of input file", nargs="?")
- args = parser.parse_args()
-
- #root = ET.parse(args.infilename)
-
- #for child in root.iter("string"):
- # print child.attrib["name"], "\t", unicode(child.text, 'utf-8').encode("utf-8")
- # #print unicode(child.text, 'utf-8')
- # #print u'\u0420\u043e\u0441\u0441\u0438\u044f'.encode("utf-8")
-
- if args.rev == args.rev_base:
- failure("Revs are the same, nothing to compare")
-
- print("Finding changes in", args.rev, "not present in", args.rev_base)
-
- cwd = os.getcwd()
- rootdir = Git(cwd).rev_parse("--show-toplevel")
- repo = Repo(rootdir)
- try:
- mod_commit = repo.commit(args.rev)
- except:
- failure(args.rev,"is not a valid commit")
- try:
- base_commit = repo.commit(args.rev_base)
- except:
- failure(args.rev_base,"is not a valid commit")
-
- mod_tree = mod_commit.tree
- base_tree = base_commit.tree
-
- all_attrib = set()
-
- try:
- mod_xui_tree = mod_tree["indra/newview/skins/default/xui/{}".format(args.base_lang)]
- except:
- print("xui tree not found for language", args.base_lang)
- sys.exit(1)
-
- data = []
- # For all files to be checked for translations
- for mod_blob in mod_xui_tree.traverse():
- print(mod_blob.path)
- filename = mod_blob.path
- if mod_blob.type == "tree": # directory, skip
- continue
-
- mod_contents = mod_blob.data_stream.read()
- try:
- base_blob = base_tree[filename]
- base_contents = base_blob.data_stream.read()
- except:
- print("No matching base file found for", filename)
- base_contents = '<?xml version="1.0" encoding="utf-8" standalone="yes" ?><strings></strings>'
-
- mod_xml = ET.fromstring(mod_contents)
- base_xml = ET.fromstring(base_contents)
-
- mod_dict = {}
- for child in mod_xml.iter():
- if "name" in child.attrib:
- name = child.attrib['name']
- mod_dict[name] = child
- base_dict = {}
- for child in base_xml.iter():
- if "name" in child.attrib:
- name = child.attrib['name']
- base_dict[name] = child
- for name in mod_dict.keys():
- if not name in base_dict or mod_dict[name].text != base_dict[name].text:
- data.append([filename, name, "text", mod_dict[name].text,""])
- #print(" ", name, "text", codify(mod_dict[name].text))
- all_attrib = all_attrib.union(set(mod_dict[name].attrib.keys()))
- for attr in translate_attribs:
- if attr in mod_dict[name].attrib:
- if name not in base_dict or attr not in base_dict[name] or mod_dict[name].attrib[attr] != base_dict[name].attrib[attr]:
- val = mod_dict[name].attrib[attr]
- data.append([filename, name, attr, mod_dict[name].attrib[attr],""])
- #print(" ", name, attr, codify(val))
-
- cols = ["File", "Element", "Field", "EN", "Translation ({})".format(args.lang)]
- df = pd.DataFrame(data, columns=cols)
- df.to_excel("SL_Translations_{}.xlsx".format(args.lang.upper()), index=False)
-
- #print "all_attrib", all_attrib
-
-
-
-
+"""\ + +This module contains tools for scanning the SL codebase for translation-related strings. + +$LicenseInfo:firstyear=2020&license=viewerlgpl$ +Second Life Viewer Source Code +Copyright (C) 2020, Linden Research, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; +version 2.1 of the License only. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA +$/LicenseInfo$ +""" + +from __future__ import print_function + +# packages required include: gitpython, pandas + +import xml.etree.ElementTree as ET +import argparse +import os +import sys +from git import Repo, Git # requires the gitpython package +import pandas as pd + +translate_attribs = [ + "title", + "short_title", + "value", + "label", + "label_selected", + "tool_tip", + "ignoretext", + "yestext", + "notext", + "canceltext", + "description", + "longdescription" +] + +def codify_for_print(val): + if isinstance(val, unicode): + return val.encode("utf-8") + else: + return unicode(val, 'utf-8').encode("utf-8") + +# Returns a dict of { name => xml_node } +def read_xml_elements(blob): + try: + contents = blob.data_stream.read() + except: + contents = '<?xml version="1.0" encoding="utf-8" standalone="yes" ?><strings></strings>' + xml = ET.fromstring(contents) + elts = {} + for child in xml.iter(): + if "name" in child.attrib: + name = child.attrib['name'] + elts[name] = child + return elts + +def failure(*msg): + print(*msg) + sys.exit(1) + +def can_translate(val): + if val is None: + return False + if val.isspace(): + return False + val = val.strip() + if val.isdigit(): + return False + return True + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="analyze viewer xui files") + parser.add_argument("-v","--verbose", action="store_true", help="verbose flag") + parser.add_argument("--rev", help="revision with modified strings, default HEAD", default="HEAD") + parser.add_argument("--rev_base", help="previous revision to compare against, default master", default="master") + parser.add_argument("--base_lang", help="base language, default en (useful only for testing)", default="en") + parser.add_argument("--lang", help="target language, default fr", default="fr") + args = parser.parse_args() + + if args.rev == args.rev_base: + failure("Revs are the same, nothing to compare") + + print("Finding changes in", args.rev, "not present in", args.rev_base) + sys.stdout.flush() + + cwd = os.getcwd() + rootdir = Git(cwd).rev_parse("--show-toplevel") + repo = Repo(rootdir) + try: + mod_commit = repo.commit(args.rev) + except: + failure(args.rev,"is not a valid commit") + try: + base_commit = repo.commit(args.rev_base) + except: + failure(args.rev_base,"is not a valid commit") + + mod_tree = mod_commit.tree + base_tree = base_commit.tree + + xui_path = "indra/newview/skins/default/xui/{}".format(args.base_lang) + try: + mod_xui_tree = mod_tree[xui_path] + except: + failure("xui tree not found for language", args.base_lang) + + data = [] + # For all files to be checked for translations + for mod_blob in mod_xui_tree.traverse(): + filename = mod_blob.path + if mod_blob.type == "tree": # directory, skip + continue + + if args.verbose: + print(filename) + try: + base_blob = base_tree[filename] + except: + if args.verbose: + print("No matching base file found for", filename) + base_blob = None + + try: + transl_filename = filename.replace(args.base_lang, args.lang) + transl_blob = mod_tree[transl_filename] + except: + if args.verbose: + print("No matching translation file found at", transl_filename) + transl_blob = None + + mod_dict = read_xml_elements(mod_blob) + base_dict = read_xml_elements(base_blob) + transl_dict = read_xml_elements(transl_blob) + + rows = 0 + for name in mod_dict.keys(): + if not name in base_dict or mod_dict[name].text != base_dict[name].text: + val = mod_dict[name].text + if can_translate(val): + transl_val = "--" + if name in transl_dict: + transl_val = transl_dict[name].text + data.append([filename, name, "text", val, transl_val, ""]) + rows += 1 + for attr in translate_attribs: + if attr in mod_dict[name].attrib: + if name not in base_dict or attr not in base_dict[name].attrib or mod_dict[name].attrib[attr] != base_dict[name].attrib[attr]: + val = mod_dict[name].attrib[attr] + if can_translate(val): + transl_val = "--" + if name in transl_dict and attr in transl_dict[name].attrib: + transl_val = transl_dict[name].attrib[attr] + data.append([filename, name, attr, val, transl_val, ""]) + rows += 1 + if args.verbose and rows>0: + print(" ",rows,"rows added") + + outfile = "SL_Translations_{}.xlsx".format(args.lang.upper()) + cols = ["File", "Element", "Field", "EN", "Previous Translation ({})".format(args.lang.upper()), "ENTER NEW TRANSLATION ({})".format(args.lang.upper())] + num_translations = len(data) + df = pd.DataFrame(data, columns=cols) + df.to_excel(outfile, index=False) + if num_translations>0: + print("Wrote", num_translations, "rows to file", outfile) + else: + print("Nothing to translate,", outfile, "is empty") |