#!runpy.sh

"""\

This module contains code for analyzing ViewerStats data as uploaded by the viewer.

$LicenseInfo:firstyear=2021&license=viewerlgpl$
Second Life Viewer Source Code
Copyright (C) 2021, Linden Research, Inc.

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation;
version 2.1 of the License only.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
$/LicenseInfo$
"""

import argparse
import numpy as np
import pandas as pd
import json
from collections import Counter, defaultdict
import llsd
import io
import re
import os
import sys

def show_stats_by_key(recs,indices,settings_sd = None):
    result = ()
    cnt = Counter()
    per_key_cnt = defaultdict(Counter)
    for r in recs:
        try:
            d = r
            for idx in indices:
                d = d[idx]
            for k,v in d.items():
                if isinstance(v,dict):
                    continue
                cnt[k] += 1
                if isinstance(v,list):
                    v = tuple(v)
                per_key_cnt[k][v] += 1
        except Exception as e:
            print("err", e)
            print("d", d, "k", k, "v", v)
            raise
    mc = cnt.most_common()
    print("=========================")
    keyprefix = ""
    if len(indices)>0:
        keyprefix = ".".join(indices) + "."
    for i,m in enumerate(mc):
        k = m[0]
        bigc = m[1]
        unset_cnt = len(recs) - bigc
        kmc = per_key_cnt[k].most_common(5)
        print(i, keyprefix+str(k), bigc)
        if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]:
            print("    ", "default",settings_sd[k]["Value"],"count",unset_cnt)
        for v in kmc:
            print("    ", "value",v[0],"count",v[1])
    if settings_sd is not None:
        print("Total keys in settings", len(settings_sd.keys()))
        unused_keys = list(set(settings_sd.keys()) - set(cnt.keys()))
        unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"]
        unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"]

        # Things that no one in the sample has set to a non-default value. Possible candidates for removal.
        print("\nUnused_keys_non_str", len(unused_keys_non_str))
        print(  "======================")
        print("\n".join(sorted(unused_keys_non_str)))

        # Strings are not currently logged, so we have no info on usage.
        print("\nString keys (usage unknown)", len(unused_keys_str))
        print(  "======================")
        print("\n".join(sorted(unused_keys_str)))

        # Things that someone has set but that aren't recognized settings.
        unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys()))
        print("\nUnrecognized keys", len(unrec_keys))
        print(  "======================")
        print("\n".join(sorted(unrec_keys)))

        result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys)
    return result

def parse_settings_xml(fname):
    # assume we're in scripts/metrics
    fname = "../../indra/newview/app_settings/" + fname
    with open(fname,"r") as f:
        contents = f.read()
        return llsd.parse_xml(contents)

def read_raw_settings_xml(fname):
    # assume we're in scripts/metrics
    fname = "../../indra/newview/app_settings/" + fname
    contents = None
    with open(fname,"r") as f:
        contents = f.read()
    return contents

def write_settings_xml(fname, contents):
    # assume we're in scripts/metrics
    fname = "../../indra/newview/app_settings/" + fname
    with open(fname,"w") as f:
        f.write(llsd.format_pretty_xml(contents))
        f.close()

def write_raw_settings_xml(fname, string):
    # assume we're in scripts/metrics
    fname = "../../indra/newview/app_settings/" + fname
    with io.open(fname,"w", newline='\n') as f:
        f.write(string.decode('latin1'))
        f.close()

def remove_settings(string, to_remove):
    for r in to_remove:
        subs_str = r"<key>" + r + r"<.*?</map>\n"
        string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL)
    return string

def get_used_strings(root_dir):
    used_str = set()
    skipped_ext = set()
    for dir_name, sub_dir_list, file_list in os.walk(root_dir):
        for fname in file_list:
            if fname in ["settings.xml", "settings.xml.edit", "settings_per_account.xml"]:
                print("skip", fname)
                continue
            (base,ext) = os.path.splitext(fname)
            #if ext not in [".cpp", ".hpp", ".h", ".xml"]:
            #    skipped_ext.add(ext)
            #    continue
            
            full_name = os.path.join(dir_name,fname)

            with open(full_name,"r") as f:
                #print full_name
                lines = f.readlines()
                for l in lines:
                    ms = re.findall(r'[>\"]([A-Za-z0-9_]+)[\"<]',l)
                    for m in ms:
                        #print "used_str",m
                        used_str.add(m)
    print("skipped extensions", skipped_ext)
    print("got used_str", len(used_str))
    return used_str
                
    
if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs")
    parser.add_argument("--verbose", action="store_true",help="verbose flag")
    parser.add_argument("--preferences", action="store_true", help="analyze preference info")
    parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences")
    parser.add_argument("--column", help="name of column containing viewerstats info")
    parser.add_argument("infiles", nargs="+", help="name of .tsv files to process")
    args = parser.parse_args()

    for fname in args.infiles:
        print("process", fname)
        df = pd.read_csv(fname,sep='\t')
        #print "DF", df.describe()
        jstrs = df['RAW_LOG:BODY']
        #print "JSTRS", jstrs.describe()
        recs = []
        for i,jstr in enumerate(jstrs):
            recs.append(json.loads(jstr))
        show_stats_by_key(recs,[])
        show_stats_by_key(recs,["agent"])
        if args.preferences:
            print("\nSETTINGS.XML")
            settings_sd = parse_settings_xml("settings.xml")
            #for skey,svals in settings_sd.items(): 
            #    print skey, "=>", svals
            (all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd)
            print()

            #print "\nSETTINGS_PER_ACCOUNT.XML"
            #settings_pa_sd = parse_settings_xml("settings_per_account.xml")
            #show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd)

            if args.remove_unused:
                # walk codebase looking for strings
                all_str_set = set(all_str)
                used_strings = get_used_strings("../../indra")
                used_strings_set = set(used_strings)
                unref_strings = all_str_set-used_strings_set
                # Some settings names are generated by appending to a prefix. Need to look for this case.
                prefix_used = set()
                print("checking unref_strings", len(unref_strings))
                for u in unref_strings:
                    for k in range(6,len(u)):
                        prefix = u[0:k]
                        if prefix in all_str_set and prefix in used_strings_set:
                            prefix_used.add(u)
                            #print "PREFIX_USED",u,prefix
                print("PREFIX_USED", len(prefix_used), ",".join(list(prefix_used)))
                print()
                unref_strings = unref_strings - prefix_used
                        
                print("\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n")
                print("\n".join(list(unref_strings)))
                settings_str = read_raw_settings_xml("settings.xml")
                # Do this via direct string munging to generate minimal changeset
                settings_edited = remove_settings(settings_str,unref_strings)
                write_raw_settings_xml("settings.xml.edit",settings_edited)