summaryrefslogtreecommitdiff
path: root/scripts/metrics
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/metrics')
-rwxr-xr-xscripts/metrics/viewerstats.py226
1 files changed, 226 insertions, 0 deletions
diff --git a/scripts/metrics/viewerstats.py b/scripts/metrics/viewerstats.py
new file mode 100755
index 0000000000..f7be3d967e
--- /dev/null
+++ b/scripts/metrics/viewerstats.py
@@ -0,0 +1,226 @@
+#!runpy.sh
+
+"""\
+
+This module contains code for analyzing ViewerStats data as uploaded by the viewer.
+
+$LicenseInfo:firstyear=2021&license=viewerlgpl$
+Second Life Viewer Source Code
+Copyright (C) 2021, Linden Research, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation;
+version 2.1 of the License only.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+$/LicenseInfo$
+"""
+
+import argparse
+import numpy as np
+import pandas as pd
+import json
+from collections import Counter, defaultdict
+from llbase import llsd
+import io
+import re
+import os
+import sys
+
+def show_stats_by_key(recs,indices,settings_sd = None):
+ result = ()
+ cnt = Counter()
+ per_key_cnt = defaultdict(Counter)
+ for r in recs:
+ try:
+ d = r
+ for idx in indices:
+ d = d[idx]
+ for k,v in d.items():
+ if isinstance(v,dict):
+ continue
+ cnt[k] += 1
+ if isinstance(v,list):
+ v = tuple(v)
+ per_key_cnt[k][v] += 1
+ except Exception as e:
+ print "err", e
+ print "d", d, "k", k, "v", v
+ raise
+ mc = cnt.most_common()
+ print "========================="
+ keyprefix = ""
+ if len(indices)>0:
+ keyprefix = ".".join(indices) + "."
+ for i,m in enumerate(mc):
+ k = m[0]
+ bigc = m[1]
+ unset_cnt = len(recs) - bigc
+ kmc = per_key_cnt[k].most_common(5)
+ print i, keyprefix+str(k), bigc
+ if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]:
+ print " ", "default",settings_sd[k]["Value"],"count",unset_cnt
+ for v in kmc:
+ print " ", "value",v[0],"count",v[1]
+ if settings_sd is not None:
+ print "Total keys in settings", len(settings_sd.keys())
+ unused_keys = list(set(settings_sd.keys()) - set(cnt.keys()))
+ unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"]
+ unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"]
+
+ # Things that no one in the sample has set to a non-default value. Possible candidates for removal.
+ print "\nUnused_keys_non_str", len(unused_keys_non_str)
+ print "======================"
+ print "\n".join(sorted(unused_keys_non_str))
+
+ # Strings are not currently logged, so we have no info on usage.
+ print "\nString keys (usage unknown)", len(unused_keys_str)
+ print "======================"
+ print "\n".join(sorted(unused_keys_str))
+
+ # Things that someone has set but that aren't recognized settings.
+ unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys()))
+ print "\nUnrecognized keys", len(unrec_keys)
+ print "======================"
+ print "\n".join(sorted(unrec_keys))
+
+ result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys)
+ return result
+
+def parse_settings_xml(fname):
+ # assume we're in scripts/metrics
+ fname = "../../indra/newview/app_settings/" + fname
+ with open(fname,"r") as f:
+ contents = f.read()
+ return llsd.parse_xml(contents)
+
+def read_raw_settings_xml(fname):
+ # assume we're in scripts/metrics
+ fname = "../../indra/newview/app_settings/" + fname
+ contents = None
+ with open(fname,"r") as f:
+ contents = f.read()
+ return contents
+
+def write_settings_xml(fname, contents):
+ # assume we're in scripts/metrics
+ fname = "../../indra/newview/app_settings/" + fname
+ with open(fname,"w") as f:
+ f.write(llsd.format_pretty_xml(contents))
+ f.close()
+
+def write_raw_settings_xml(fname, string):
+ # assume we're in scripts/metrics
+ fname = "../../indra/newview/app_settings/" + fname
+ with io.open(fname,"w", newline='\n') as f:
+ f.write(string.decode('latin1'))
+ f.close()
+
+def remove_settings(string, to_remove):
+ for r in to_remove:
+ subs_str = r"<key>" + r + r"<.*?</map>\n"
+ string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL)
+ return string
+
+def get_used_strings(root_dir):
+ used_str = set()
+ skipped_ext = set()
+ for dir_name, sub_dir_list, file_list in os.walk(root_dir):
+ for fname in file_list:
+ if fname in ["settings.xml", "settings.xml.edit", "settings_per_account.xml"]:
+ print "skip", fname
+ continue
+ (base,ext) = os.path.splitext(fname)
+ #if ext not in [".cpp", ".hpp", ".h", ".xml"]:
+ # skipped_ext.add(ext)
+ # continue
+
+ full_name = os.path.join(dir_name,fname)
+
+ with open(full_name,"r") as f:
+ #print full_name
+ lines = f.readlines()
+ for l in lines:
+ ms = re.findall(r'[>\"]([A-Za-z0-9_]+)[\"<]',l)
+ for m in ms:
+ #print "used_str",m
+ used_str.add(m)
+ print "skipped extensions", skipped_ext
+ print "got used_str", len(used_str)
+ return used_str
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs")
+ parser.add_argument("--verbose", action="store_true",help="verbose flag")
+ parser.add_argument("--preferences", action="store_true", help="analyze preference info")
+ parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences")
+ parser.add_argument("--column", help="name of column containing viewerstats info")
+ parser.add_argument("infiles", nargs="+", help="name of .tsv files to process")
+ args = parser.parse_args()
+
+ for fname in args.infiles:
+ print "process", fname
+ df = pd.read_csv(fname,sep='\t')
+ #print "DF", df.describe()
+ jstrs = df['RAW_LOG:BODY']
+ #print "JSTRS", jstrs.describe()
+ recs = []
+ for i,jstr in enumerate(jstrs):
+ recs.append(json.loads(jstr))
+ show_stats_by_key(recs,[])
+ show_stats_by_key(recs,["agent"])
+ if args.preferences:
+ print "\nSETTINGS.XML"
+ settings_sd = parse_settings_xml("settings.xml")
+ #for skey,svals in settings_sd.items():
+ # print skey, "=>", svals
+ (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd)
+ print
+
+ #print "\nSETTINGS_PER_ACCOUNT.XML"
+ #settings_pa_sd = parse_settings_xml("settings_per_account.xml")
+ #show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd)
+
+ if args.remove_unused:
+ # walk codebase looking for strings
+ all_str_set = set(all_str)
+ used_strings = get_used_strings("../../indra")
+ used_strings_set = set(used_strings)
+ unref_strings = all_str_set-used_strings_set
+ # Some settings names are generated by appending to a prefix. Need to look for this case.
+ prefix_used = set()
+ print "checking unref_strings", len(unref_strings)
+ for u in unref_strings:
+ for k in range(6,len(u)):
+ prefix = u[0:k]
+ if prefix in all_str_set and prefix in used_strings_set:
+ prefix_used.add(u)
+ #print "PREFIX_USED",u,prefix
+ print "PREFIX_USED", len(prefix_used), ",".join(list(prefix_used))
+ print
+ unref_strings = unref_strings - prefix_used
+
+ print "\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n"
+ print "\n".join(list(unref_strings))
+ settings_str = read_raw_settings_xml("settings.xml")
+ # Do this via direct string munging to generate minimal changeset
+ settings_edited = remove_settings(settings_str,unref_strings)
+ write_raw_settings_xml("settings.xml.edit",settings_edited)
+
+
+
+
+
+