1 files changed, 185 insertions, 0 deletions
diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py
new file mode 100755
index 0000000000..c34ed440c9
--- /dev/null
+++ b/scripts/metrics/viewerstats.py
@@ -0,0 +1,185 @@
+#!runpy.sh
+
+"""\
+
+This module contains code for analyzing ViewerStats data as uploaded by the viewer.
+
+$LicenseInfo:firstyear=2021&license=viewerlgpl$
+Second Life Viewer Source Code
+Copyright (C) 2021, Linden Research, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation;
+version 2.1 of the License only.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+$/LicenseInfo$
+"""
+
+import argparse
+import numpy as np
+import pandas as pd
+import json
+from collections import Counter, defaultdict
+from llbase import llsd
+import io
+import re
+
+def show_stats_by_key(recs,indices,settings_sd = None):
+    result = ()
+    cnt = Counter()
+    per_key_cnt = defaultdict(Counter)
+    for r in recs:
+        try:
+            d = r
+            for idx in indices:
+                d = d[idx]
+            for k,v in d.items():
+                if isinstance(v,dict):
+                    continue
+                cnt[k] += 1
+                if isinstance(v,list):
+                    v = tuple(v)
+                per_key_cnt[k][v] += 1
+        except Exception as e:
+            print "err", e
+            print "d", d, "k", k, "v", v
+            raise
+    mc = cnt.most_common()
+    print "========================="
+    keyprefix = ""
+    if len(indices)>0:
+        keyprefix = ".".join(indices) + "."
+    for i,m in enumerate(mc):
+        k = m[0]
+        bigc = m[1]
+        unset_cnt = len(recs) - bigc
+        kmc = per_key_cnt[k].most_common(5)
+        print i, keyprefix+str(k), bigc
+        if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]:
+            print "    ", "default",settings_sd[k]["Value"],"count",unset_cnt
+        for v in kmc:
+            print "    ", "value",v[0],"count",v[1]
+    if settings_sd is not None:
+        print "Total keys in settings", len(settings_sd.keys())
+        unused_keys = list(set(settings_sd.keys()) - set(cnt.keys()))
+        unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"]
+        unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"]
+
+        # Things that no one in the sample has set to a non-default value. Possible candidates for removal.
+        print "\nUnused_keys_non_str", len(unused_keys_non_str)
+        print   "======================"
+        print "\n".join(sorted(unused_keys_non_str))
+
+        # Strings are not currently logged, so we have no info on usage.
+        print "\nString keys (usage unknown)", len(unused_keys_str)
+        print   "======================"
+        print "\n".join(sorted(unused_keys_str))
+
+        # Things that someone has set but that aren't recognized settings.
+        unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys()))
+        print "\nUnrecognized keys", len(unrec_keys)
+        print   "======================"
+        print "\n".join(sorted(unrec_keys))
+
+        result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys)
+    return result
+
+def parse_settings_xml(fname):
+    # assume we're in scripts/metrics
+    fname = "../../indra/newview/app_settings/" + fname
+    with open(fname,"r") as f:
+        contents = f.read()
+        return llsd.parse_xml(contents)
+
+def read_settings_xml(fname):
+    # assume we're in scripts/metrics
+    fname = "../../indra/newview/app_settings/" + fname
+    contents = None
+    with open(fname,"r") as f:
+        contents = f.read()
+    return contents
+
+def write_settings_xml(fname, contents):
+    # assume we're in scripts/metrics
+    fname = "../../indra/newview/app_settings/" + fname
+    with open(fname,"w") as f:
+        f.write(llsd.format_pretty_xml(contents))
+        f.close()
+
+def write_raw_settings_xml(fname, string):
+    # assume we're in scripts/metrics
+    fname = "../../indra/newview/app_settings/" + fname
+    with io.open(fname,"w", newline='\n') as f:
+        f.write(string.decode('latin1'))
+        f.close()
+
+def remove_settings(string, to_remove):
+    for r in to_remove:
+        subs_str = r"<key>" + r + r"<.*?</map>"
+        string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL)
+    return string
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs")
+    parser.add_argument("--verbose", action="store_true",help="verbose flag")
+    parser.add_argument("--preferences", action="store_true", help="analyze preference info")
+    parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences")
+    parser.add_argument("--column", help="name of column containing viewerstats info")
+    parser.add_argument("infiles", nargs="+", help="name of .tsv files to process")
+    args = parser.parse_args()
+
+    for fname in args.infiles:
+        print "process", fname
+        df = pd.read_csv(fname,sep='\t')
+        #print "DF", df.describe()
+        jstrs = df['RAW_LOG:BODY']
+        #print "JSTRS", jstrs.describe()
+        recs = []
+        for i,jstr in enumerate(jstrs):
+            recs.append(json.loads(jstr))
+        show_stats_by_key(recs,[])
+        show_stats_by_key(recs,["agent"])
+        if args.preferences:
+            print "\nSETTINGS.XML"
+            settings_sd = parse_settings_xml("settings.xml")
+            #for skey,svals in settings_sd.items(): 
+            #    print skey, "=>", svals
+            (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd)
+            print
+
+            print "\nSETTINGS_PER_ACCOUNT.XML"
+            settings_pa_sd = parse_settings_xml("settings_per_account.xml")
+            show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd)
+
+            if args.remove_unused:
+                # quotestrings created by
+                # % find . -name '*.cpp' -o -name '*.hpp' -o -name '*.h' -o -name '*cmd_line.xml'  | xargs grep -ohP '[\">][a-zA-Z0-9]*[\"<]' | sort | uniq > ../scripts/metrics/quotestrings.out
+                with open("quotestrings.out", "r") as f:
+                    used_strings = f.readlines()
+                    used_strings = [u[1:-2] for u in used_strings]
+                    #print "\n".join(used_strings)
+                    unref_strings = list(set(all_str)-set(used_strings))
+                    print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n"
+                    print "\n".join(unref_strings)
+                    settings_str = read_settings_xml("settings.xml")
+                    # Do this via direct string munging to generate minimal changeset
+                    settings_edited = remove_settings(settings_str,unref_strings)
+                    write_raw_settings_xml("settings.xml",settings_edited)
+                    
+
+
+
+        
+