Improved performance of database build and readability (namedtuples)

This commit is contained in:
Krateng 2019-04-07 14:43:36 +02:00
parent a6472c11b0
commit 4701228c3f

View File

@ -15,6 +15,7 @@ import datetime
import sys import sys
import unicodedata import unicodedata
import json import json
from collections import namedtuple
# url handling # url handling
from importlib.machinery import SourceFileLoader from importlib.machinery import SourceFileLoader
import urllib import urllib
@ -27,12 +28,17 @@ dbserver = Bottle()
SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved) SCROBBLES = [] # Format: tuple(track_ref,timestamp,saved)
ARTISTS = [] # Format: artist ARTISTS = [] # Format: artist
TRACKS = [] # Format: tuple(frozenset(artist_ref,...),title) TRACKS = [] # Format: namedtuple(artists=frozenset(artist_ref,...),title=title)
Track = namedtuple("Track",["artists","title"])
Scrobble = namedtuple("Scrobble",["track","timestamp","saved"])
### OPTIMIZATION ### OPTIMIZATION
SCROBBLESDICT = {} # timestamps to scrobble mapping SCROBBLESDICT = {} # timestamps to scrobble mapping
STAMPS = [] # sorted STAMPS = [] # sorted
#STAMPS_SET = set() # as set for easier check if exists #STAMPS_SET = set() # as set for easier check if exists
TRACKS_LOWER = []
ARTISTS_LOWER = []
MEDALS = {} #literally only changes once per year, no need to calculate that on the fly MEDALS = {} #literally only changes once per year, no need to calculate that on the fly
MEDALS_TRACKS = {} MEDALS_TRACKS = {}
@ -65,15 +71,15 @@ def checkAPIkey(k):
#### ####
def getScrobbleObject(o): def getScrobbleObject(o):
track = getTrackObject(TRACKS[o[0]]) track = getTrackObject(TRACKS[o.track)
return {"artists":track["artists"],"title":track["title"],"time":o[1]} return {"artists":track["artists"],"title":track["title"],"time":o.time}
def getArtistObject(o): def getArtistObject(o):
return o return o
def getTrackObject(o): def getTrackObject(o):
artists = [getArtistObject(ARTISTS[a]) for a in o[0]] artists = [getArtistObject(ARTISTS[a]) for a in o.artists]
return {"artists":artists,"title":o[1]} return {"artists":artists,"title":o.title}
#### ####
@ -86,7 +92,7 @@ def createScrobble(artists,title,time,volatile=False):
while (time in SCROBBLESDICT): while (time in SCROBBLESDICT):
time += 1 time += 1
i = getTrackID(artists,title) i = getTrackID(artists,title)
obj = (i,time,volatile) # if volatile generated, we simply pretend we have already saved it to disk obj = Scrobble(i,time,volatile) # if volatile generated, we simply pretend we have already saved it to disk
#SCROBBLES.append(obj) #SCROBBLES.append(obj)
# immediately insert scrobble correctly so we can guarantee sorted list # immediately insert scrobble correctly so we can guarantee sorted list
index = insert(SCROBBLES,obj,key=lambda x:x[1]) index = insert(SCROBBLES,obj,key=lambda x:x[1])
@ -100,7 +106,7 @@ def readScrobble(artists,title,time):
while (time in SCROBBLESDICT): while (time in SCROBBLESDICT):
time += 1 time += 1
i = getTrackID(artists,title) i = getTrackID(artists,title)
obj = (i,time,True) obj = Scrobble(i,time,True)
SCROBBLES.append(obj) SCROBBLES.append(obj)
SCROBBLESDICT[time] = obj SCROBBLESDICT[time] = obj
#STAMPS.append(time) #STAMPS.append(time)
@ -117,29 +123,34 @@ def getArtistID(name):
except: except:
pass pass
try: try:
return [a.lower() for a in ARTISTS].index(objlower) return ARTISTS_LOWER.index(objlower)
except: except:
i = len(ARTISTS) i = len(ARTISTS)
ARTISTS.append(obj) ARTISTS.append(obj)
ARTISTS_LOWER.append(objlower)
return i return i
def getTrackID(artists,title): def getTrackID(artists,title):
artistset = set() artistset = set()
for a in artists: for a in artists:
artistset.add(getArtistID(name=a)) artistset.add(getArtistID(name=a))
obj = (frozenset(artistset),title) obj = Track(artists=frozenset(artistset),title=title)
objlower = (frozenset(artistset),title.lower()) objlower = Track(artists=frozenset(artistset),title=title.lower())
try: try:
return TRACKS.index(obj) return TRACKS.index(obj)
except: except:
pass pass
try: try:
# better now
return TRACKS_LOWER.index(objlower)
# not the best performance # not the best performance
return [(t[0],t[1].lower()) for t in TRACKS].index(objlower) #return [(t.artists,t.title.lower()) for t in TRACKS].index(objlower)
except: except:
i = len(TRACKS) i = len(TRACKS)
TRACKS.append(obj) TRACKS.append(obj)
TRACKS_LOWER.append(obj)
return i return i
@ -149,6 +160,8 @@ def getTrackID(artists,title):
######## ########
######## ########
## HTTP requests and their associated functions ## HTTP requests and their associated functions
@ -292,7 +305,7 @@ def get_tracks(artist=None):
artistid = None artistid = None
# Option 1 # Option 1
return [getTrackObject(t) for t in TRACKS if (artistid in t[0]) or (artistid==None)] return [getTrackObject(t) for t in TRACKS if (artistid in t.artists) or (artistid==None)]
# Option 2 is a bit more elegant but much slower # Option 2 is a bit more elegant but much slower
#tracklist = [getTrackObject(t) for t in TRACKS] #tracklist = [getTrackObject(t) for t in TRACKS]
@ -788,7 +801,6 @@ def build_db():
#start regular tasks #start regular tasks
update_medals() update_medals()
scheduletest()