Major restructuring of the database from dicts to tuples

This commit is contained in:
Krateng 2018-11-25 18:17:14 +01:00
parent 8a3a03b9f3
commit 91d4f79aa4

View File

@ -6,8 +6,70 @@ import datetime
DATABASE = [] DATABASE = []
ARTISTS = [] SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
TRACKS = [] ARTISTS = [] # Format: artist
TRACKS = [] # Format: tuple(frozenset(artist_ref,...),title)
# by id
#def getScrobbleObject(o):
# #return {"artists":getTrackObject(SCROBBLES[o][0])["artists"],"title":getTrackObject(SCROBBLES[o][0])["title"],"time":SCROBBLES[o][1],"saved":SCROBBLES[o][2]}
# return {"artists":getTrackObject(SCROBBLES[o][0])["artists"],"title":getTrackObject(SCROBBLES[o][0])["title"],"time":SCROBBLES[o][1]}
#
#def getArtistObject(o):
# return ARTISTS[o]
#
#def getTrackObject(o):
# return {"artists":[getArtistObject(a) for a in TRACKS[o][0]],"title":TRACKS[o][1]}
# by object
def getScrobbleObject(o):
#return {"artists":getTrackObject(SCROBBLES[o][0])["artists"],"title":getTrackObject(SCROBBLES[o][0])["title"],"time":SCROBBLES[o][1],"saved":SCROBBLES[o][2]}
track = getTrackObject(TRACKS[o[0]])
return {"artists":track["artists"],"title":track["title"],"time":o[1]}
def getArtistObject(o):
return o
def getTrackObject(o):
artists = [getArtistObject(ARTISTS[a]) for a in o[0]]
return {"artists":artists,"title":o[1]}
def createScrobble(artists,title,time):
i = getTrackID(artists,title)
obj = (i,time,False)
SCROBBLES.append(obj)
def readScrobble(artists,title,time):
i = getTrackID(artists,title)
obj = (i,time,True)
SCROBBLES.append(obj)
def getArtistID(name):
obj = name
try:
i = ARTISTS.index(obj)
except:
i = len(ARTISTS)
ARTISTS.append(obj)
return i
def getTrackID(artists,title):
artistset = set()
for a in artists:
artistset.add(getArtistID(name=a))
obj = (frozenset(artistset),title)
try:
i = TRACKS.index(obj)
except:
i = len(TRACKS)
TRACKS.append(obj)
return i
@route("/scrobbles") @route("/scrobbles")
@ -33,9 +95,25 @@ def get_scrobbles():
def get_tracks(): def get_tracks():
artist = request.query.get("artist") artist = request.query.get("artist")
global TRACKS
# turn the tupel of frozensets into a jsonable object # turn the tupel of frozensets into a jsonable object
tracklist = [{"artists":list(a[0]),"title":a[1]} for a in TRACKS] #tracklist = [{"artists":list(a[0]),"title":a[1]} for a in TRACKS]
ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
### WHICH ONE IS FASTER
import time
# Option 1
ls = [getTrackObject(t) for t in TRACKS if (artist in t[0]) or (artist==None)]
# Option 2 is a bit more elegant but much slower
#tracklist = [getTrackObject(t) for t in TRACKS]
#ls = [t for t in tracklist if (artist in t["artists"]) or (artist==None)]
return {"list":ls} return {"list":ls}
@route("/artists") @route("/artists")
@ -48,18 +126,52 @@ def get_artists():
def get_charts(): def get_charts():
since = request.query.get("since") since = request.query.get("since")
to = request.query.get("to") to = request.query.get("to")
results = db_query(since=since,to=to)
return {"list":results} #better do something here to sum up the totals on db level (before converting to dicts)
#results = db_query(since=since,to=to)
#return {"list":results}
# Starts the server # Starts the server
def runserver(DATABASE_PORT): def runserver(DATABASE_PORT):
reload() #reload()
buildh() #buildh()
build_db()
run(host='0.0.0.0', port=DATABASE_PORT, server='waitress') run(host='0.0.0.0', port=DATABASE_PORT, server='waitress')
def build_db():
newscrobbles = [t for t in SCROBBLES if not t[2]]
for f in os.listdir("logs/"):
#print(f)
if not (".csv" in f):
continue
logfile = open("logs/" + f)
for l in logfile:
l = l.replace("\n","")
data = l.split(",")
#print(l)
## saving album in the scrobbles is supported, but for now we don't use it. It shouldn't be a defining part of the track (same song from Album or EP), but derived information
artists = data[1].split("/")
#album = data[3]
title = data[2]
time = int(data[0])
readScrobble(artists,title,time)
#DATABASE.append({"artists":artists,"title":title,"time":time,"saved":True})
# builds database of artists and tracks # builds database of artists and tracks
# UNUSED as it is very resource-heavy, use buildh() instead # UNUSED as it is very resource-heavy, use buildh() instead
def build(): def build():
@ -105,6 +217,7 @@ def build():
# builds database of artists and tracks # builds database of artists and tracks
# uses better data types to quickly find all unique tracks # uses better data types to quickly find all unique tracks
# now also UNUSED since we build everything in one step with build_db()
def buildh(): def buildh():
global ARTISTS global ARTISTS
global TRACKS global TRACKS
@ -130,6 +243,7 @@ def buildh():
# Rebuilds the database from disk, keeps cached entries # Rebuilds the database from disk, keeps cached entries
# unused, this is now done in build_db()
def reload(): def reload():
newdb = [t for t in DATABASE if not t["saved"]] newdb = [t for t in DATABASE if not t["saved"]]
@ -174,7 +288,7 @@ def flush():
# Queries the database # Queries the database
def db_query(artist=None,title=None,since=0,to=9999999999): def db_query(artist=None,track=None,since=0,to=9999999999):
if isinstance(since, str): if isinstance(since, str):
sdate = [int(x) for x in since.split("/")] sdate = [int(x) for x in since.split("/")]
date = [1970,1,1,0,0] date = [1970,1,1,0,0]
@ -186,8 +300,18 @@ def db_query(artist=None,title=None,since=0,to=9999999999):
date[:len(sdate)] = sdate date[:len(sdate)] = sdate
to = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp()) to = int(datetime.datetime(date[0],date[1],date[2],date[3],date[4],tzinfo=datetime.timezone.utc).timestamp())
thingsweneed = ["artists","title","time"] # this is not meant as a search function. we *can* query the db with a string, but it only works if it matches exactly (and title string simply picks the first track with that name)
return [{key:t[key] for key in thingsweneed} for t in DATABASE if (artist in t["artists"] or artist==None) and (t["title"]==title or title==None) and (since < t["time"] < to)] if isinstance(artist, str):
artist = ARTISTS.index(artist)
if isinstance(track, str):
track = TRACKS.index(track)
return [getScrobbleObject(s) for s in SCROBBLES if (s[0] == track or track==None) and (artist in TRACKS[s[0]][0] or artist==None) and (since < s[1] < to)]
# pointless to check for artist when track is checked because every track has a fixed set of artists, but it's more elegant this way
#thingsweneed = ["artists","title","time"]
#return [{key:t[key] for key in thingsweneed} for t in DATABASE if (artist in t["artists"] or artist==None) and (t["title"]==title or title==None) and (since < t["time"] < to)]
# Search for strings # Search for strings
def db_search(query,type=None): def db_search(query,type=None):