From 61ab98212073f0093f044bc78a6ccefac0f63761 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Fri, 26 Mar 2021 14:42:40 +0100 Subject: SL-15031 - ViewerStats analysis script --- scripts/metrics/viewerstats.py | 92 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 scripts/metrics/viewerstats.py (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py new file mode 100644 index 0000000000..1e54d2cfd0 --- /dev/null +++ b/scripts/metrics/viewerstats.py @@ -0,0 +1,92 @@ +#!runpy.sh + +"""\ + +This module contains code for analyzing ViewerStats data as uploaded by the viewer. + +$LicenseInfo:firstyear=2021&license=viewerlgpl$ +Second Life Viewer Source Code +Copyright (C) 2021, Linden Research, Inc. + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; +version 2.1 of the License only. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA +$/LicenseInfo$ +""" + +import argparse +import numpy as np +import pandas as pd +import json +from collections import Counter, defaultdict + +def show_stats_by_key(recs,indices): + cnt = Counter() + per_key_cnt = defaultdict(Counter) + for r in recs: + try: + d = r + for idx in indices: + d = d[idx] + for k,v in d.items(): + if isinstance(v,dict): + continue + cnt[k] += 1 + if isinstance(v,list): + v = tuple(v) + per_key_cnt[k][v] += 1 + except Exception as e: + print "err", e + print "d", d, "k", k, "v", v + raise + mc = cnt.most_common(100) + print "=========================" + keyprefix = "" + if len(indices)>0: + keyprefix = ".".join(indices) + "." + for i,m in enumerate(mc): + k = m[0] + bigc = m[1] + kmc = per_key_cnt[k].most_common(5) + print i, keyprefix+str(k), bigc + for v in kmc: + print " ", "value",v[0],"count",v[1] + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs") + parser.add_argument("--verbose", action="store_true",help="verbose flag") + parser.add_argument("--preferences", action="store_true", help="analyze preference info") + parser.add_argument("--column", help="name of column containing viewerstats info") + parser.add_argument("infiles", nargs="+", help="name of .tsv files to process") + args = parser.parse_args() + + for fname in args.infiles: + print "process", fname + df = pd.read_csv(fname,sep='\t') + #print "DF", df.describe() + jstrs = df['RAW_LOG:BODY'] + #print "JSTRS", jstrs.describe() + recs = [] + for i,jstr in enumerate(jstrs): + recs.append(json.loads(jstr)) + show_stats_by_key(recs,[]) + show_stats_by_key(recs,["agent"]) + if args.preferences: + show_stats_by_key(recs,["preferences","settings"]) + + + + -- cgit v1.2.3 From e0b414921590152c51a1efc31d0c22c3957d437f Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Fri, 26 Mar 2021 14:43:05 +0100 Subject: SL-15031 - chmod flag --- scripts/metrics/viewerstats.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/metrics/viewerstats.py (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py old mode 100644 new mode 100755 -- cgit v1.2.3 From 2e255caae1cb4f82c9f7f3d9233c9865230d599b Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Tue, 30 Mar 2021 16:59:17 +0100 Subject: SL-15031 - ViewerStats analysis compare to settings.xml --- scripts/metrics/viewerstats.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py index 1e54d2cfd0..740540b3dd 100755 --- a/scripts/metrics/viewerstats.py +++ b/scripts/metrics/viewerstats.py @@ -31,8 +31,9 @@ import numpy as np import pandas as pd import json from collections import Counter, defaultdict +from llbase import llsd -def show_stats_by_key(recs,indices): +def show_stats_by_key(recs,indices,settings_sd = None): cnt = Counter() per_key_cnt = defaultdict(Counter) for r in recs: @@ -51,7 +52,7 @@ def show_stats_by_key(recs,indices): print "err", e print "d", d, "k", k, "v", v raise - mc = cnt.most_common(100) + mc = cnt.most_common() print "=========================" keyprefix = "" if len(indices)>0: @@ -59,10 +60,30 @@ def show_stats_by_key(recs,indices): for i,m in enumerate(mc): k = m[0] bigc = m[1] + unset_cnt = len(recs) - bigc kmc = per_key_cnt[k].most_common(5) print i, keyprefix+str(k), bigc + if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]: + print " ", "default",settings_sd[k]["Value"],"count",unset_cnt for v in kmc: print " ", "value",v[0],"count",v[1] + if settings_sd is not None: + print "Total keys in settings", len(settings_sd.keys()) + unused_keys = list(set(settings_sd.keys()) - set(cnt.keys())) + print "\nUnused_keys", len(unused_keys) + print "======================" + print "\n".join(sorted(unused_keys)) + unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys())) + print "\nUnrecognized keys", len(unrec_keys) + print "======================" + print "\n".join(sorted(unrec_keys)) + +def parse_settings_xml(): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/settings.xml" + with open(fname,"r") as f: + contents = f.read() + return llsd.parse_xml(contents) if __name__ == "__main__": @@ -85,7 +106,10 @@ if __name__ == "__main__": show_stats_by_key(recs,[]) show_stats_by_key(recs,["agent"]) if args.preferences: - show_stats_by_key(recs,["preferences","settings"]) + settings_sd = parse_settings_xml() + #for skey,svals in settings_sd.items(): + # print skey, "=>", svals + show_stats_by_key(recs,["preferences","settings"],settings_sd) -- cgit v1.2.3 From 6c1dc74b75fc3d8967c54a3f95e5ce57ba4c4716 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Thu, 1 Apr 2021 15:55:57 +0100 Subject: SL-15031 - include settings_per_account stats --- scripts/metrics/viewerstats.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py index 740540b3dd..1e2b8bc4ff 100755 --- a/scripts/metrics/viewerstats.py +++ b/scripts/metrics/viewerstats.py @@ -70,17 +70,28 @@ def show_stats_by_key(recs,indices,settings_sd = None): if settings_sd is not None: print "Total keys in settings", len(settings_sd.keys()) unused_keys = list(set(settings_sd.keys()) - set(cnt.keys())) - print "\nUnused_keys", len(unused_keys) + unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"] + unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"] + + # Things that no one in the sample has set to a non-default value. Possible candidates for removal. + print "\nUnused_keys_non_str", len(unused_keys_non_str) + print "======================" + print "\n".join(sorted(unused_keys_non_str)) + + # Strings are not currently logged, so we have no info on usage. + print "\nString keys (usage unknown)", len(unused_keys_str) print "======================" - print "\n".join(sorted(unused_keys)) + print "\n".join(sorted(unused_keys_str)) + + # Things that someone has set but that aren't recognized settings. unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys())) print "\nUnrecognized keys", len(unrec_keys) print "======================" print "\n".join(sorted(unrec_keys)) -def parse_settings_xml(): +def parse_settings_xml(fname): # assume we're in scripts/metrics - fname = "../../indra/newview/app_settings/settings.xml" + fname = "../../indra/newview/app_settings/" + fname with open(fname,"r") as f: contents = f.read() return llsd.parse_xml(contents) @@ -106,11 +117,16 @@ if __name__ == "__main__": show_stats_by_key(recs,[]) show_stats_by_key(recs,["agent"]) if args.preferences: - settings_sd = parse_settings_xml() + print "\nSETTINGS.XML" + settings_sd = parse_settings_xml("settings.xml") #for skey,svals in settings_sd.items(): # print skey, "=>", svals show_stats_by_key(recs,["preferences","settings"],settings_sd) - + print + + print "\nSETTINGS_PER_ACCOUNT.XML" + settings_pa_sd = parse_settings_xml("settings_per_account.xml") + show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd) -- cgit v1.2.3 From 704aeb09d81f0fa37e343460edb1ab11ba3dbc8c Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Tue, 6 Apr 2021 16:32:19 +0100 Subject: SL-15031 - option to remove unreferenced settings --- scripts/metrics/viewerstats.py | 55 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py index 1e2b8bc4ff..c34ed440c9 100755 --- a/scripts/metrics/viewerstats.py +++ b/scripts/metrics/viewerstats.py @@ -32,8 +32,11 @@ import pandas as pd import json from collections import Counter, defaultdict from llbase import llsd +import io +import re def show_stats_by_key(recs,indices,settings_sd = None): + result = () cnt = Counter() per_key_cnt = defaultdict(Counter) for r in recs: @@ -89,6 +92,9 @@ def show_stats_by_key(recs,indices,settings_sd = None): print "======================" print "\n".join(sorted(unrec_keys)) + result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys) + return result + def parse_settings_xml(fname): # assume we're in scripts/metrics fname = "../../indra/newview/app_settings/" + fname @@ -96,11 +102,40 @@ def parse_settings_xml(fname): contents = f.read() return llsd.parse_xml(contents) +def read_settings_xml(fname): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + contents = None + with open(fname,"r") as f: + contents = f.read() + return contents + +def write_settings_xml(fname, contents): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + with open(fname,"w") as f: + f.write(llsd.format_pretty_xml(contents)) + f.close() + +def write_raw_settings_xml(fname, string): + # assume we're in scripts/metrics + fname = "../../indra/newview/app_settings/" + fname + with io.open(fname,"w", newline='\n') as f: + f.write(string.decode('latin1')) + f.close() + +def remove_settings(string, to_remove): + for r in to_remove: + subs_str = r"" + r + r"<.*?" + string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL) + return string + if __name__ == "__main__": parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs") parser.add_argument("--verbose", action="store_true",help="verbose flag") parser.add_argument("--preferences", action="store_true", help="analyze preference info") + parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences") parser.add_argument("--column", help="name of column containing viewerstats info") parser.add_argument("infiles", nargs="+", help="name of .tsv files to process") args = parser.parse_args() @@ -121,12 +156,30 @@ if __name__ == "__main__": settings_sd = parse_settings_xml("settings.xml") #for skey,svals in settings_sd.items(): # print skey, "=>", svals - show_stats_by_key(recs,["preferences","settings"],settings_sd) + (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd) print print "\nSETTINGS_PER_ACCOUNT.XML" settings_pa_sd = parse_settings_xml("settings_per_account.xml") show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd) + if args.remove_unused: + # quotestrings created by + # % find . -name '*.cpp' -o -name '*.hpp' -o -name '*.h' -o -name '*cmd_line.xml' | xargs grep -ohP '[\">][a-zA-Z0-9]*[\"<]' | sort | uniq > ../scripts/metrics/quotestrings.out + with open("quotestrings.out", "r") as f: + used_strings = f.readlines() + used_strings = [u[1:-2] for u in used_strings] + #print "\n".join(used_strings) + unref_strings = list(set(all_str)-set(used_strings)) + print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n" + print "\n".join(unref_strings) + settings_str = read_settings_xml("settings.xml") + # Do this via direct string munging to generate minimal changeset + settings_edited = remove_settings(settings_str,unref_strings) + write_raw_settings_xml("settings.xml",settings_edited) + + + + -- cgit v1.2.3 From d2f76612a7e47073e5e780a99866afc81efbbda9 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 7 Apr 2021 19:48:27 +0100 Subject: SL-15031 - more viewerstats.py support for cleaning up unused settings --- scripts/metrics/viewerstats.py | 77 ++++++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 18 deletions(-) (limited to 'scripts/metrics/viewerstats.py') diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py index c34ed440c9..f7be3d967e 100755 --- a/scripts/metrics/viewerstats.py +++ b/scripts/metrics/viewerstats.py @@ -34,6 +34,8 @@ from collections import Counter, defaultdict from llbase import llsd import io import re +import os +import sys def show_stats_by_key(recs,indices,settings_sd = None): result = () @@ -102,7 +104,7 @@ def parse_settings_xml(fname): contents = f.read() return llsd.parse_xml(contents) -def read_settings_xml(fname): +def read_raw_settings_xml(fname): # assume we're in scripts/metrics fname = "../../indra/newview/app_settings/" + fname contents = None @@ -126,10 +128,38 @@ def write_raw_settings_xml(fname, string): def remove_settings(string, to_remove): for r in to_remove: - subs_str = r"" + r + r"<.*?" + subs_str = r"" + r + r"<.*?\n" string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL) return string +def get_used_strings(root_dir): + used_str = set() + skipped_ext = set() + for dir_name, sub_dir_list, file_list in os.walk(root_dir): + for fname in file_list: + if fname in ["settings.xml", "settings.xml.edit", "settings_per_account.xml"]: + print "skip", fname + continue + (base,ext) = os.path.splitext(fname) + #if ext not in [".cpp", ".hpp", ".h", ".xml"]: + # skipped_ext.add(ext) + # continue + + full_name = os.path.join(dir_name,fname) + + with open(full_name,"r") as f: + #print full_name + lines = f.readlines() + for l in lines: + ms = re.findall(r'[>\"]([A-Za-z0-9_]+)[\"<]',l) + for m in ms: + #print "used_str",m + used_str.add(m) + print "skipped extensions", skipped_ext + print "got used_str", len(used_str) + return used_str + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs") @@ -159,24 +189,35 @@ if __name__ == "__main__": (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd) print - print "\nSETTINGS_PER_ACCOUNT.XML" - settings_pa_sd = parse_settings_xml("settings_per_account.xml") - show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd) + #print "\nSETTINGS_PER_ACCOUNT.XML" + #settings_pa_sd = parse_settings_xml("settings_per_account.xml") + #show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd) if args.remove_unused: - # quotestrings created by - # % find . -name '*.cpp' -o -name '*.hpp' -o -name '*.h' -o -name '*cmd_line.xml' | xargs grep -ohP '[\">][a-zA-Z0-9]*[\"<]' | sort | uniq > ../scripts/metrics/quotestrings.out - with open("quotestrings.out", "r") as f: - used_strings = f.readlines() - used_strings = [u[1:-2] for u in used_strings] - #print "\n".join(used_strings) - unref_strings = list(set(all_str)-set(used_strings)) - print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n" - print "\n".join(unref_strings) - settings_str = read_settings_xml("settings.xml") - # Do this via direct string munging to generate minimal changeset - settings_edited = remove_settings(settings_str,unref_strings) - write_raw_settings_xml("settings.xml",settings_edited) + # walk codebase looking for strings + all_str_set = set(all_str) + used_strings = get_used_strings("../../indra") + used_strings_set = set(used_strings) + unref_strings = all_str_set-used_strings_set + # Some settings names are generated by appending to a prefix. Need to look for this case. + prefix_used = set() + print "checking unref_strings", len(unref_strings) + for u in unref_strings: + for k in range(6,len(u)): + prefix = u[0:k] + if prefix in all_str_set and prefix in used_strings_set: + prefix_used.add(u) + #print "PREFIX_USED",u,prefix + print "PREFIX_USED", len(prefix_used), ",".join(list(prefix_used)) + print + unref_strings = unref_strings - prefix_used + + print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n" + print "\n".join(list(unref_strings)) + settings_str = read_raw_settings_xml("settings.xml") + # Do this via direct string munging to generate minimal changeset + settings_edited = remove_settings(settings_str,unref_strings) + write_raw_settings_xml("settings.xml.edit",settings_edited) -- cgit v1.2.3