diff options
Diffstat (limited to 'scripts/metrics')
-rwxr-xr-x | scripts/metrics/viewerstats.py | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py new file mode 100755 index 0000000000..f7be3d967e --- /dev/null +++ b/scripts/metrics/viewerstats.py @@ -0,0 +1,226 @@ +#!runpy.sh + +"""\ + +This module contains code for analyzing ViewerStats data as uploaded by the viewer. + +$LicenseInfo:firstyear=2021&license=viewerlgpl$ +Second Life Viewer Source Code +Copyright (C) 2021, Linden Research, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; +version 2.1 of the License only. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA +$/LicenseInfo$ +""" + +import argparse +import numpy as np +import pandas as pd +import json +from collections import Counter, defaultdict +from llbase import llsd +import io +import re +import os +import sys + +def show_stats_by_key(recs,indices,settings_sd = None): + result = () + cnt = Counter() + per_key_cnt = defaultdict(Counter) + for r in recs: + try: + d = r + for idx in indices: + d = d[idx] + for k,v in d.items(): + if isinstance(v,dict): + continue + cnt[k] += 1 + if isinstance(v,list): + v = tuple(v) + per_key_cnt[k][v] += 1 + except Exception as e: + print "err", e + print "d", d, "k", k, "v", v + raise + mc = cnt.most_common() + print "=========================" + keyprefix = "" + if len(indices)>0: + keyprefix = ".".join(indices) + "." + for i,m in enumerate(mc): + k = m[0] + bigc = m[1] + unset_cnt = len(recs) - bigc + kmc = per_key_cnt[k].most_common(5) + print i, keyprefix+str(k), bigc + if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]: + print " ", "default",settings_sd[k]["Value"],"count",unset_cnt + for v in kmc: + print " ", "value",v[0],"count",v[1] + if settings_sd is not None: + print "Total keys in settings", len(settings_sd.keys()) + unused_keys = list(set(settings_sd.keys()) - set(cnt.keys())) + unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"] + unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"] + + # Things that no one in the sample has set to a non-default value. Possible candidates for removal. + print "\nUnused_keys_non_str", len(unused_keys_non_str) + print "======================" + print "\n".join(sorted(unused_keys_non_str)) + + # Strings are not currently logged, so we have no info on usage. + print "\nString keys (usage unknown)", len(unused_keys_str) + print "======================" + print "\n".join(sorted(unused_keys_str)) + + # Things that someone has set but that aren't recognized settings. + unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys())) + print "\nUnrecognized keys", len(unrec_keys) + print "======================" + print "\n".join(sorted(unrec_keys)) + + result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys) + return result + +def parse_settings_xml(fname): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + with open(fname,"r") as f: + contents = f.read() + return llsd.parse_xml(contents) + +def read_raw_settings_xml(fname): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + contents = None + with open(fname,"r") as f: + contents = f.read() + return contents + +def write_settings_xml(fname, contents): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + with open(fname,"w") as f: + f.write(llsd.format_pretty_xml(contents)) + f.close() + +def write_raw_settings_xml(fname, string): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + with io.open(fname,"w", newline='\n') as f: + f.write(string.decode('latin1')) + f.close() + +def remove_settings(string, to_remove): + for r in to_remove: + subs_str = r"<key>" + r + r"<.*?</map>\n" + string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL) + return string + +def get_used_strings(root_dir): + used_str = set() + skipped_ext = set() + for dir_name, sub_dir_list, file_list in os.walk(root_dir): + for fname in file_list: + if fname in ["settings.xml", "settings.xml.edit", "settings_per_account.xml"]: + print "skip", fname + continue + (base,ext) = os.path.splitext(fname) + #if ext not in [".cpp", ".hpp", ".h", ".xml"]: + # skipped_ext.add(ext) + # continue + + full_name = os.path.join(dir_name,fname) + + with open(full_name,"r") as f: + #print full_name + lines = f.readlines() + for l in lines: + ms = re.findall(r'[>\"]([A-Za-z0-9_]+)[\"<]',l) + for m in ms: + #print "used_str",m + used_str.add(m) + print "skipped extensions", skipped_ext + print "got used_str", len(used_str) + return used_str + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs") + parser.add_argument("--verbose", action="store_true",help="verbose flag") + parser.add_argument("--preferences", action="store_true", help="analyze preference info") + parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences") + parser.add_argument("--column", help="name of column containing viewerstats info") + parser.add_argument("infiles", nargs="+", help="name of .tsv files to process") + args = parser.parse_args() + + for fname in args.infiles: + print "process", fname + df = pd.read_csv(fname,sep='\t') + #print "DF", df.describe() + jstrs = df['RAW_LOG:BODY'] + #print "JSTRS", jstrs.describe() + recs = [] + for i,jstr in enumerate(jstrs): + recs.append(json.loads(jstr)) + show_stats_by_key(recs,[]) + show_stats_by_key(recs,["agent"]) + if args.preferences: + print "\nSETTINGS.XML" + settings_sd = parse_settings_xml("settings.xml") + #for skey,svals in settings_sd.items(): + # print skey, "=>", svals + (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd) + print + + #print "\nSETTINGS_PER_ACCOUNT.XML" + #settings_pa_sd = parse_settings_xml("settings_per_account.xml") + #show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd) + + if args.remove_unused: + # walk codebase looking for strings + all_str_set = set(all_str) + used_strings = get_used_strings("../../indra") + used_strings_set = set(used_strings) + unref_strings = all_str_set-used_strings_set + # Some settings names are generated by appending to a prefix. Need to look for this case. + prefix_used = set() + print "checking unref_strings", len(unref_strings) + for u in unref_strings: + for k in range(6,len(u)): + prefix = u[0:k] + if prefix in all_str_set and prefix in used_strings_set: + prefix_used.add(u) + #print "PREFIX_USED",u,prefix + print "PREFIX_USED", len(prefix_used), ",".join(list(prefix_used)) + print + unref_strings = unref_strings - prefix_used + + print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n" + print "\n".join(list(unref_strings)) + settings_str = read_raw_settings_xml("settings.xml") + # Do this via direct string munging to generate minimal changeset + settings_edited = remove_settings(settings_str,unref_strings) + write_raw_settings_xml("settings.xml.edit",settings_edited) + + + + + + |