More catalog tweaks

This commit is contained in:
Kovid Goyal 2010-01-31 13:15:52 -07:00
parent ddb593a814
commit 4d1cfec950

View File

@ -4,7 +4,7 @@ from collections import namedtuple
from datetime import date from datetime import date
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from calibre import filesystem_encoding, prints, strftime from calibre import filesystem_encoding, prints, prepare_string_for_xml, strftime
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
@ -524,6 +524,7 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.output_profile and \ self.opts.output_profile and \
self.opts.output_profile.startswith("kindle")) else False self.opts.output_profile.startswith("kindle")) else False
self.__genres = None self.__genres = None
self.__genre_tags_dict = None
self.__htmlFileList = [] self.__htmlFileList = []
self.__markerTags = self.getMarkerTags() self.__markerTags = self.getMarkerTags()
self.__ncxSoup = None self.__ncxSoup = None
@ -630,6 +631,13 @@ class EPUB_MOBI(CatalogPlugin):
self.__genres = val self.__genres = val
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@dynamic_property @dynamic_property
def genre_tags_dict(self):
def fget(self):
return self.__genre_tags_dict
def fset(self, val):
self.__genre_tags_dict = val
return property(fget=fget, fset=fset)
@dynamic_property
def htmlFileList(self): def htmlFileList(self):
def fget(self): def fget(self):
return self.__htmlFileList return self.__htmlFileList
@ -797,10 +805,13 @@ class EPUB_MOBI(CatalogPlugin):
os.path.join(self.catalogPath, file[0])) os.path.join(self.catalogPath, file[0]))
# Create the custom masthead image overwriting default # Create the custom masthead image overwriting default
try: # If failure, default mastheadImage.gif should still be in place
self.generate_masthead_image(os.path.join(self.catalogPath, 'images/mastheadImage.gif')) if self.generateForKindle:
except: try:
pass self.generate_masthead_image(os.path.join(self.catalogPath,
'images/mastheadImage.gif'))
except:
pass
def fetchBooksByTitle(self): def fetchBooksByTitle(self):
self.updateProgressFullStep("Fetching database") self.updateProgressFullStep("Fetching database")
@ -853,7 +864,11 @@ class EPUB_MOBI(CatalogPlugin):
this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple())
this_title['timestamp'] = record['timestamp'] this_title['timestamp'] = record['timestamp']
if record['comments']: if record['comments']:
this_title['description'] = re.sub('&', '&', record['comments']) #this_title['description'] = re.sub('&', '&', record['comments'])
if re.search('<(?P<tag>.+)>.+</(?P=tag)>|<!--.+-->|<.+/>',record['comments']):
self.opts.log(" %d: %s (%s) contains suspect metadata" % \
(this_title['id'], this_title['title'],this_title['author']))
this_title['description'] = prepare_string_for_xml(record['comments'])
this_title['short_description'] = self.generateShortDescription(this_title['description']) this_title['short_description'] = self.generateShortDescription(this_title['description'])
else: else:
this_title['description'] = None this_title['description'] = None
@ -1003,7 +1018,8 @@ class EPUB_MOBI(CatalogPlugin):
for tag in title['tags']: for tag in title['tags']:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['href'] = "Genre%s.html" % re.sub("\W","",self.convertHTMLEntities(tag)) #print "aTag: %s" % "Genre_%s.html" % re.sub("\W","",tag.lower())
aTag['href'] = "Genre_%s.html" % re.sub("\W","",tag.lower())
aTag.insert(0,escape(NavigableString(tag))) aTag.insert(0,escape(NavigableString(tag)))
emTag = Tag(soup, "em") emTag = Tag(soup, "em")
emTag.insert(0, aTag) emTag.insert(0, aTag)
@ -1435,17 +1451,22 @@ class EPUB_MOBI(CatalogPlugin):
self.updateProgressFullStep("'Genres'") self.updateProgressFullStep("'Genres'")
# Filter out REMOVE_TAGS, sort # filtered_tags = {friendly:normalized, }
filtered_tags = self.filterDbTags(self.db.all_tags()) self.genre_tags_dict = self.filterDbTags(self.db.all_tags())
# Extract books matching filtered_tags # Extract books matching filtered_tags
genre_list = [] genre_list = []
for tag in filtered_tags: for friendly_tag in self.genre_tags_dict:
#print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag
# tag_list => {'tag': '<normalized_genre_tag>', 'books':[{}, {}, {}]}
tag_list = {} tag_list = {}
tag_list['tag'] = tag tag_list['tag'] = self.genre_tags_dict[friendly_tag]
tag_list['books'] = [] tag_list['books'] = []
for book in self.booksByAuthor: for book in self.booksByAuthor:
if 'tags' in book and tag in book['tags']: # Scan each book for tag matching friendly_tag
#if 'tags' in book: print " evaluating %s with tags: %s" % (book['title'], book['tags'])
if 'tags' in book and friendly_tag in book['tags']:
#print " adding '%s'" % (book['title'])
this_book = {} this_book = {}
this_book['author'] = book['author'] this_book['author'] = book['author']
this_book['title'] = book['title'] this_book['title'] = book['title']
@ -1455,8 +1476,36 @@ class EPUB_MOBI(CatalogPlugin):
tag_list['books'].append(this_book) tag_list['books'].append(this_book)
if len(tag_list['books']): if len(tag_list['books']):
# Possible to have an empty tag list if the books were excluded genre_exists = False
genre_list.append(tag_list) book_not_in_genre = True
if not genre_list:
#print " genre_list empty, adding '%s'" % tag_list['tag']
genre_list.append(tag_list)
else:
# Check for existing_genre
for genre in genre_list:
if genre['tag'] == tag_list['tag']:
genre_exists = True
# Check to see if the book is already in this list
for existing_book in genre['books']:
if this_book['title'] == existing_book['title']:
#print "%s already in %s" % (this_book['title'], genre)
book_not_in_genre = False
break
break
if genre_exists:
if book_not_in_genre:
#print " adding %s to existing genre '%s'" % (this_book['title'],genre['tag'])
genre['books'].append(this_book)
else:
#print " appending genre '%s'" % tag_list['tag']
genre_list.append(tag_list)
if self.opts.verbose:
self.opts.log.info(" Genre summary: %d active genres" % len(genre_list))
for genre in genre_list:
self.opts.log.info(" %s: %d titles" % (genre['tag'], len(genre['books'])))
# Write the results # Write the results
# genre_list = [ [tag_list], [tag_list] ...] # genre_list = [ [tag_list], [tag_list] ...]
@ -1489,12 +1538,11 @@ class EPUB_MOBI(CatalogPlugin):
if not author in unique_authors: if not author in unique_authors:
unique_authors.append(author) unique_authors.append(author)
''' '''
# Write the genre book list as an article # Write the genre book list as an article
titles_spanned = self.generateHTMLByGenre(genre['tag'], True if index==0 else False, genre['books'], titles_spanned = self.generateHTMLByGenre(genre['tag'], True if index==0 else False, genre['books'],
"%s/Genre%s.html" % (self.contentDir, re.sub("\W","", self.convertHTMLEntities(genre['tag'])))) "%s/Genre_%s.html" % (self.contentDir, genre['tag']))
tag_file = "content/Genre%s.html" % (re.sub("\W","", self.convertHTMLEntities(genre['tag']))) tag_file = "content/Genre_%s.html" % genre['tag']
master_genre_list.append({'tag':genre['tag'], master_genre_list.append({'tag':genre['tag'],
'file':tag_file, 'file':tag_file,
'authors':unique_authors, 'authors':unique_authors,
@ -1586,7 +1634,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateOPF(self): def generateOPF(self):
self.updateProgressFullStep("Saving OPF") self.updateProgressFullStep("Generating OPF")
header = ''' header = '''
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
@ -2107,9 +2155,6 @@ class EPUB_MOBI(CatalogPlugin):
self.updateProgressFullStep("NCX 'Genres'") self.updateProgressFullStep("NCX 'Genres'")
if not len(self.genres): if not len(self.genres):
self.opts.log.warn(" No genres found in tags.\n" self.opts.log.warn(" No genres found in tags.\n"
" No Genre section added to Catalog") " No Genre section added to Catalog")
@ -2136,13 +2181,12 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
nptc += 1 nptc += 1
contentTag = Tag(ncx_soup,"content") contentTag = Tag(ncx_soup,"content")
contentTag['src'] = "content/Genre%s.html#section_start" % (re.sub("\W","", self.convertHTMLEntities(self.genres[0]['tag']))) contentTag['src'] = "content/Genre_%s.html#section_start" % self.genres[0]['tag']
navPointTag.insert(nptc, contentTag) navPointTag.insert(nptc, contentTag)
nptc += 1 nptc += 1
for genre in self.genres: for genre in self.genres:
# Add an article for each genre # Add an article for each genre
navPointVolumeTag = Tag(ncx_soup, 'navPoint') navPointVolumeTag = Tag(ncx_soup, 'navPoint')
navPointVolumeTag['class'] = "article" navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag'] navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag']
@ -2150,13 +2194,18 @@ class EPUB_MOBI(CatalogPlugin):
self.playOrder += 1 self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel") navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text") textTag = Tag(ncx_soup, "text")
textTag.insert(0, self.formatNCXText(NavigableString(genre['tag'])))
# GwR *** Can this be optimized?
normalized_tag = None
for genre_tag in self.genre_tags_dict:
if self.genre_tags_dict[genre_tag] == genre['tag']:
normalized_tag = self.genre_tags_dict[genre_tag]
break
textTag.insert(0, self.formatNCXText(NavigableString(genre_tag)))
navLabelTag.insert(0,textTag) navLabelTag.insert(0,textTag)
navPointVolumeTag.insert(0,navLabelTag) navPointVolumeTag.insert(0,navLabelTag)
contentTag = Tag(ncx_soup, "content") contentTag = Tag(ncx_soup, "content")
genre_name = re.sub("\W","", self.convertHTMLEntities(genre['tag'])) contentTag['src'] = "content/Genre_%s.html#Genre_%s" % (normalized_tag, normalized_tag)
contentTag['src'] = "content/Genre%s.html#Genre%s" % (genre_name, genre_name)
navPointVolumeTag.insert(1, contentTag) navPointVolumeTag.insert(1, contentTag)
if self.generateForKindle: if self.generateForKindle:
@ -2268,16 +2317,10 @@ class EPUB_MOBI(CatalogPlugin):
def filterDbTags(self, tags): def filterDbTags(self, tags):
# Remove the special marker tags from the database's tag list, # Remove the special marker tags from the database's tag list,
# return sorted list of tags representing valid genres # return sorted list of normalized genre tags
def next_tag(tags): normalized_tags = []
for (i, tag) in enumerate(tags): friendly_tags = []
if i < len(tags) - 1:
yield tag + ", "
else:
yield tag
filtered_tags = []
for tag in tags: for tag in tags:
if tag[0] in self.markerTags: if tag[0] in self.markerTags:
continue continue
@ -2286,32 +2329,39 @@ class EPUB_MOBI(CatalogPlugin):
if tag == ' ': if tag == ' ':
continue continue
filtered_tags.append(tag) normalized_tags.append(re.sub('\W','',tag).lower())
friendly_tags.append(tag)
filtered_tags.sort()
# Enable this code to force certain tags to the front of the genre list genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
if False:
for (i, tag) in enumerate(filtered_tags): # Test for multiple genres resolving to same normalized form
if tag == 'Fiction': normalized_set = set(normalized_tags)
filtered_tags.insert(0, (filtered_tags.pop(i))) for normalized in normalized_set:
elif tag == 'Nonfiction': if normalized_tags.count(normalized) > 1:
filtered_tags.insert(1, (filtered_tags.pop(i))) self.opts.log.warn(" Warning: multiple tags resolving to genre '%s':" % normalized)
else: for key in genre_tags_dict:
continue if genre_tags_dict[key] == normalized:
self.opts.log.warn(" %s" % key)
if self.verbose: if self.verbose:
self.opts.log.info(u' %d Genre tags in database (exclude_genre: %s):' % \ def next_tag(tags):
(len(filtered_tags), self.opts.exclude_genre)) for (i, tag) in enumerate(tags):
out_buf = '' if i < len(tags) - 1:
yield tag + ", "
else:
yield tag
for tag in next_tag(filtered_tags): self.opts.log.info(u' %d total genre tags in database (exclude_genre: %s):' % \
out_buf += tag (len(genre_tags_dict), self.opts.exclude_genre))
if len(out_buf) > 72:
self.opts.log(u' %s' % out_buf.rstrip())
out_buf = ''
self.opts.log(u' %s' % out_buf)
return filtered_tags # Display friendly/normalized genres
# friendly => normalized
sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
for tag in next_tag(sorted_tags):
self.opts.log(u' %s' % tag)
return genre_tags_dict
def formatNCXText(self, description): def formatNCXText(self, description):
# Kindle TOC descriptions won't render certain characters # Kindle TOC descriptions won't render certain characters
@ -2343,15 +2393,23 @@ class EPUB_MOBI(CatalogPlugin):
body.insert(btc, aTag) body.insert(btc, aTag)
btc += 1 btc += 1
# Insert the anchor with spaces stripped # Create an anchor from the tag
aTag = Tag(soup, 'a') aTag = Tag(soup, 'a')
aTag['name'] = "Genre%s" % re.sub("\W","", genre) #aTag['name'] = "Genre%s" % re.sub("\W","", genre)
aTag['name'] = "Genre_%s" % genre
body.insert(btc,aTag) body.insert(btc,aTag)
btc += 1 btc += 1
# Insert the genre title # Insert the genre title using the friendly name
# GwR *** optimize
for genre_tag in self.genre_tags_dict:
if self.genre_tags_dict[genre_tag] == genre:
friendly_tag = genre_tag
break
titleTag = body.find(attrs={'class':'title'}) titleTag = body.find(attrs={'class':'title'})
titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(genre))) titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(friendly_tag)))
# Insert the books by author list # Insert the books by author list
divTag = body.find(attrs={'class':'authors'}) divTag = body.find(attrs={'class':'authors'})
@ -2512,6 +2570,8 @@ class EPUB_MOBI(CatalogPlugin):
return soup return soup
def generate_masthead_image(self, out_path): def generate_masthead_image(self, out_path):
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
MI_WIDTH = 600 MI_WIDTH = 600
MI_HEIGHT = 60 MI_HEIGHT = 60
@ -2523,7 +2583,7 @@ class EPUB_MOBI(CatalogPlugin):
img = Image.new('RGB', (MI_WIDTH, MI_HEIGHT), 'white') img = Image.new('RGB', (MI_WIDTH, MI_HEIGHT), 'white')
draw = ImageDraw.Draw(img) draw = ImageDraw.Draw(img)
font = ImageFont.truetype(P('fonts/liberation/LiberationSerif-Bold.ttf'), 48) font = ImageFont.truetype(font_path, 48)
text = self.title.encode('utf-8') text = self.title.encode('utf-8')
width, height = draw.textsize(text, font=font) width, height = draw.textsize(text, font=font)
left = max(int((MI_WIDTH - width)/2.), 0) left = max(int((MI_WIDTH - width)/2.), 0)
@ -2671,7 +2731,7 @@ class EPUB_MOBI(CatalogPlugin):
opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 90 opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 90
opts.basename = "Catalog" opts.basename = "Catalog"
opts.plugin_path = self.plugin_path opts.plugin_path = self.plugin_path
opts.cli_environment = getattr(opts,'sync',True) opts.cli_environment = not hasattr(opts,'sync')
if opts.verbose: if opts.verbose:
opts_dict = vars(opts) opts_dict = vars(opts)