diff --git a/database.py b/database.py index 46ad883..3d51ba7 100644 --- a/database.py +++ b/database.py @@ -6,6 +6,7 @@ import os import datetime from cleanup import * from utilities import * +from doreah.logging import log from malojatime import * import sys import unicodedata diff --git a/doreah/logging.py b/doreah/logging.py new file mode 100644 index 0000000..183ded0 --- /dev/null +++ b/doreah/logging.py @@ -0,0 +1,102 @@ +import datetime +import inspect +import os + +from ._internal import defaultarguments, gopen + +_config = {} + +_queue = [] +_locked = False + +# set configuration +# logfolder folder to store logfiles in +# timeformat strftime format for log files +# defaultmodule name for the main running script +# verbosity higher means more (less important) messages are shown on console +def config(logfolder="logs",timeformat="%Y/%m/%d %H:%M:%S",defaultmodule="main",verbosity=0): + global _config + _config["logfolder"] = logfolder + _config["timeformat"] = timeformat + _config["defaultmodule"] = defaultmodule + _config["verbosity"] = verbosity + + +# initial config on import, set everything to default +config() + + + + + +# Log entry +# module allows discrimination between modules of a program. Will be prepended in console output and will determine the separate file for disk storage +# defaults to actual name of the calling module or "main" for the main script +# header determines the hierarchical position of the entry. +# indent adds indent to the log entry +# importance low means important. if higher than the configured verbosity, entry will not be shown on console +def log(*msgs,module=None,header=None,indent=0,importance=0): + + now = datetime.datetime.utcnow().strftime(_config["timeformat"]) + + # log() can be used to add empty line + if len(msgs) == 0: msgs = ("",) + + # make it easier to log data structures and such + msgs = tuple([str(msg) for msg in msgs]) + + # header formating + if header == 2: + msgs = ("","","####") + msgs + ("####","") + elif header == 1: + msgs = ("","","","# # # # #","") + msgs + ("","# # # # #","","") + + # indent + prefix = "\t" * indent + + # module name + if module is None: + try: + module = inspect.getmodule(inspect.stack()[1][0]).__name__ + if module == "__main__": module = _config["defaultmodule"] + except: + module = "interpreter" + + global _locked, _queue + if _locked: + for msg in msgs: + _queue.append({"time":now,"prefix":prefix,"msg":msg,"module":module,"console":(importance <= _config["verbosity"])}) + else: + # console output + if (importance <= _config["verbosity"]): + for msg in msgs: + print("[" + module + "] " + prefix + msg) + + # file output + logfilename = _config["logfolder"] + "/" + module + ".log" + #os.makedirs(os.path.dirname(logfilename), exist_ok=True) + with gopen(logfilename,"a") as logfile: + for msg in msgs: + logfile.write(now + " " + prefix + msg + "\n") + + +def flush(): + global _queue + for entry in _queue: + # console output + if entry["console"]: + print("[" + entry["module"] + "] " + entry["prefix"] + entry["msg"]) + + # file output + logfilename = _config["logfolder"] + "/" + entry["module"] + ".log" + #os.makedirs(os.path.dirname(logfilename), exist_ok=True) + with gopen(logfilename,"a") as logfile: + logfile.write(entry["time"] + " " + entry["prefix"] + entry["msg"] + "\n") + + _queue = [] + +# Quicker way to add header +def logh1(*args,**kwargs): + return log(*args,**kwargs,header=1) +def logh2(*args,**kwargs): + return log(*args,**kwargs,header=2) diff --git a/doreah/persistence.py b/doreah/persistence.py new file mode 100644 index 0000000..1c10363 --- /dev/null +++ b/doreah/persistence.py @@ -0,0 +1,40 @@ +import pickle +import os + +from ._internal import defaultarguments, gopen + +_config = {} + +# set configuration +# folder folder to store log files +def config(folder="storage"): + global _config + _config["folder"] = folder + + +# initial config on import, set everything to default +config() + +@defaultarguments(_config,folder="folder") +def save(data,name,folder): + + filename = os.path.join(folder,name + ".gilly") + + fl = gopen(filename,"wb") + stream = pickle.dumps(data) + fl.write(stream) + fl.close() + +@defaultarguments(_config,folder="folder") +def load(name,folder): + + filename = os.path.join(folder,name + ".gilly") + + try: + fl = gopen(filename,"rb") + ob = pickle.loads(fl.read()) + except: ob = None + finally: + fl.close() + + return ob diff --git a/doreah/timing.py b/doreah/timing.py new file mode 100644 index 0000000..26fb862 --- /dev/null +++ b/doreah/timing.py @@ -0,0 +1,44 @@ +import time + +from ._internal import defaultarguments + +_config = {} + + +# set configuration +# si 0 means seconds, 1 ms, 2 μs, 3 ns etc +def config(si=0): + global _config + _config["si"] = si + + +# initial config on import, set everything to default +config() + + +# Take clock. Returns time passed since last call of this function. if called with an identifier, will only +# consider calls with that identifier. No identifier means any call is valid. +# identifiers arbitrary strings to remember different timers. guaranteed to set all timers to exactly the same time for +# all identifiers in one call. will return tuple of all identifiers, singular value if only one identifier +def clock(*identifiers,lastcalls={None:None}): + + if len(identifiers) == 0: identifiers = (None,) + + now = time.time() + # get last calls + stamps = (lastcalls.get(i) for i in identifiers) + results = tuple(None if lc is None else (now - lc) * (1000**_config["si"]) for lc in stamps) + if len(results) == 1: results = results[0] + + # set new stamps + for i in identifiers: + lastcalls[i] = now + lastcalls[None] = now # always save last overall call so we can directly access it + + return results + + + +def clockp(name,*identifiers): + time = clock(*identifiers) + print(name + ": " + str(time)) diff --git a/doreah/tsv.py b/doreah/tsv.py new file mode 100644 index 0000000..739154e --- /dev/null +++ b/doreah/tsv.py @@ -0,0 +1,115 @@ +import os + +from ._internal import defaultarguments + +_config = {} + +# set configuration +# defaultextension files with this extension will be regarded as valid files. can be overwritten per request. +# comments whether files may include commenting (indicated by #) +# multitab whether fields can be separated by multiple tabs (this makes empty fields impossible except when trailing) +def config(defaultextension=".tsv",comments=True,multitab=True): + global _config + _config["defaultextension"] = defaultextension + _config["comments"] = comments + _config["multitab"] = multitab + + +# initial config on import, set everything to default +config() + + +@defaultarguments(_config,comments="comments",multitab="multitab") +def parse(filename,*args,comments,multitab): + + if not os.path.exists(filename): + filename = filename + _config["defaultextension"] + + f = open(filename) + + result = [] + for l in [l for l in f if (not l.startswith("#")) and (not l.strip()=="")]: + l = l.replace("\n","") + + # if the file allows comments, we need to replace the escape sequence and properly stop parsing for inline comments + if comments: + l = l.split("#")[0] + l = l.replace(r"\num","#") + l = l.replace(r"\hashtag","#") + + # we either allow multiple tabs, or we don't (in which case empty fields are possible) + if multitab: + data = list(filter(None,l.split("\t"))) + else: + data = list(l.split("\t")) + + entry = [] * len(args) + for i in range(len(args)): + if args[i] in ["list","ls","array"]: + try: + entry.append(data[i].split("␟")) + except: + entry.append([]) + elif args[i] in ["string","str","text"]: + try: + entry.append(data[i]) + except: + entry.append("") + elif args[i] in ["int","integer","num","number"]: + try: + entry.append(int(data[i])) + except: + entry.append(0) + elif args[i] in ["bool","boolean"]: + try: + entry.append((data[i].lower() in ["true","yes","1","y"])) + except: + entry.append(False) + else: + raise TypeError() + + result.append(entry) + + f.close() + return result + +@defaultarguments(_config,extension="defaultextension") +def parse_all(path,*args,extension,**kwargs): + + result = [] + for f in os.listdir(path + "/"): + if (f.endswith(extension)): # use "" if all files are valid + result += parse(path + "/" + f,*args,**kwargs) + + return result + + + +def create(filename): + + if not os.path.exists(filename): + open(filename,"w").close() + +@defaultarguments(_config,comments="comments") +def add_entry(filename,a,comments): + + create(filename) + # remove all tabs and create tab-separated string + line = "\t".join([str(e).replace("\t"," ") for e in a]) + + # replace comment symbol + if comments: line = line.replace("#",r"\num") + + with open(filename,"a") as f: + f.write(line + "\n") + +@defaultarguments(_config,comments="comments") +def add_entries(filename,al,comments): + + create(filename) + + with open(filename,"a") as f: + for a in al: + line = "\t".join([str(e).replace("\t"," ") for e in a]) + if comments: line = line.replace("#",r"\num") + f.write(line + "\n") diff --git a/fixexisting.py b/fixexisting.py index d653b4a..5c4538e 100644 --- a/fixexisting.py +++ b/fixexisting.py @@ -1,7 +1,7 @@ import os import re from cleanup import CleanerAgent -from utilities import log +from doreah.logging import log import difflib wendigo = CleanerAgent() @@ -13,34 +13,34 @@ for fn in os.listdir("scrobbles/"): f = open("scrobbles/" + fn) fnew = open("scrobbles/" + fn + "_new","w") for l in f: - + a,t = re.sub(exp,r"\3",l), re.sub(exp,r"\5",l) r1,r2,r3 = re.sub(exp,r"\1\2",l),re.sub(exp,r"\4",l),re.sub(exp,r"\6\7",l) - + a = a.replace("␟",";") - + (al,t) = wendigo.fullclean(a,t) a = "␟".join(al) fnew.write(r1 + a + r2 + t + r3 + "\n") - + #print("Artists: " + a) #print("Title: " + t) #print("1: " + r1) #print("2: " + r2) #print("3: " + r3) - + f.close() fnew.close() - + #os.system("diff " + "scrobbles/" + fn + "_new" + " " + "scrobbles/" + fn) with open("scrobbles/" + fn + "_new","r") as newfile: with open("scrobbles/" + fn,"r") as oldfile: diff = difflib.unified_diff(oldfile.read().split("\n"),newfile.read().split("\n"),lineterm="") diff = list(diff)[2:] log("Diff for scrobbles/" + fn + "".join("\n\t" + d for d in diff),module="fixer") - + os.rename("scrobbles/" + fn + "_new","scrobbles/" + fn) - + checkfile = open("scrobbles/" + fn + ".rulestate","w") checkfile.write(wendigo.checksums) checkfile.close() diff --git a/lastfmconverter.py b/lastfmconverter.py index 755632c..162234b 100644 --- a/lastfmconverter.py +++ b/lastfmconverter.py @@ -14,26 +14,26 @@ stamps = [99999999999999] for l in log: l = l.replace("\n","") data = l.split(",") - + artist = data[0] album = data[1] title = data[2] time = data[3] - - + + (artists,title) = c.fullclean(artist,title) - + artistsstr = "␟".join(artists) - - + + timeparts = time.split(" ") (h,m) = timeparts[3].split(":") - + months = {"Jan":1,"Feb":2,"Mar":3,"Apr":4,"May":5,"Jun":6,"Jul":7,"Aug":8,"Sep":9,"Oct":10,"Nov":11,"Dec":12} - + timestamp = int(datetime.datetime(int(timeparts[2]),months[timeparts[1]],int(timeparts[0]),int(h),int(m)).timestamp()) - - + + ## We prevent double timestamps in the database creation, so we technically don't need them in the files ## however since the conversion from lastfm to maloja is a one-time thing, we should take any effort to make the file as good as possible if (timestamp < stamps[-1]): @@ -43,24 +43,21 @@ for l in log: else: while(timestamp in stamps): timestamp -= 1 - - if (timestamp < stamps[-1]): + + if (timestamp < stamps[-1]): stamps.append(timestamp) else: stamps.insert(0,timestamp) - - + + entry = "\t".join([str(timestamp),artistsstr,title,album]) entry = entry.replace("#",r"\num") - + outputlog.write(entry) outputlog.write("\n") - + checksumfile.write(c.checksums) - + log.close() outputlog.close() checksumfile.close() - - - diff --git a/server.py b/server.py index 14e59a5..acb264b 100755 --- a/server.py +++ b/server.py @@ -10,6 +10,7 @@ from utilities import * from htmlgenerators import KeySplit # doreah toolkit from doreah import settings +from doreah.logging import log # technical from importlib.machinery import SourceFileLoader import _thread diff --git a/utilities.py b/utilities.py index d7a7efb..ab858fc 100644 --- a/utilities.py +++ b/utilities.py @@ -164,27 +164,29 @@ def addEntries(filename,al,escape=True): ### Logging +# now handled by doreah -def log(msg,module=None): - now = datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S") - if module is None: - import inspect - module = inspect.getmodule(inspect.stack()[1][0]).__name__ - if module == "__main__": module = "mainserver" - print("[" + module + "] " + msg) - with open("logs/" + module + ".log","a") as logfile: - logfile.write(now + " " + msg + "\n") +#def log(msg,module=None): +# now = datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S") +# if module is None: +# import inspect +# module = inspect.getmodule(inspect.stack()[1][0]).__name__ +# if module == "__main__": module = "mainserver" +# print("[" + module + "] " + msg) +# with open("logs/" + module + ".log","a") as logfile: +# logfile.write(now + " " + msg + "\n") ### not meant to be precise, just for a rough idea -measurement = 0 -def clock(*args): - import time - global measurement - now = time.time() - if len(args) > 0: - print(args[0] + ": " + str(now - measurement)) - measurement = now +# now handled by doreah +#measurement = 0 +#def clock(*args): +# import time +# global measurement +# now = time.time() +# if len(args) > 0: +# print(args[0] + ": " + str(now - measurement)) +# measurement = now diff --git a/website/artist.html b/website/artist.html index c563d2e..7546f66 100644 --- a/website/artist.html +++ b/website/artist.html @@ -5,7 +5,7 @@