Handle various types of bad metadata when generating the catalog

This commit is contained in:
Kovid Goyal 2010-01-26 08:56:01 -07:00
commit e87c1cc343

View File

@ -3,7 +3,7 @@ import os, re, shutil, htmlentitydefs
from collections import namedtuple from collections import namedtuple
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from calibre import filesystem_encoding from calibre import filesystem_encoding, prints
from calibre.customize import CatalogPlugin from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
@ -330,13 +330,15 @@ class EPUB_MOBI(CatalogPlugin):
tensComponentString = "%s" % tensPart tensComponentString = "%s" % tensPart
# Concatenate the results # Concatenate the results
result = ''
if hundredsComponent and not tensComponent: if hundredsComponent and not tensComponent:
result = hundredsComponentString result = hundredsComponentString
if not hundredsComponent and tensComponent: elif not hundredsComponent and tensComponent:
result = tensComponentString result = tensComponentString
if hundredsComponent and tensComponent: elif hundredsComponent and tensComponent:
result = hundredsComponentString + " " + tensComponentString result = hundredsComponentString + " " + tensComponentString
else:
prints(" NumberToText.stringFromInt(): empty result translating %d" % intToTranslate)
return result return result
def numberTranslate(self): def numberTranslate(self):
@ -837,7 +839,8 @@ class EPUB_MOBI(CatalogPlugin):
title = this_title['title'] = self.convertHTMLEntities(record['title']) title = this_title['title'] = self.convertHTMLEntities(record['title'])
this_title['title_sort'] = self.generateSortTitle(title) this_title['title_sort'] = self.generateSortTitle(title)
this_title['author'] = " & ".join(record['authors']) this_title['author'] = " & ".join(record['authors'])
this_title['author_sort'] = record['author_sort'] this_title['author_sort'] = record['author_sort'] if len(record['author_sort']) \
else this_title['author']
this_title['id'] = record['id'] this_title['id'] = record['id']
if record['publisher']: if record['publisher']:
this_title['publisher'] = re.sub('&', '&', record['publisher']) this_title['publisher'] = re.sub('&', '&', record['publisher'])
@ -920,11 +923,11 @@ class EPUB_MOBI(CatalogPlugin):
books_by_current_author)) books_by_current_author))
else: else:
books_by_current_author += 1 books_by_current_author += 1
else:
# Allow for single-author dataset # Add final author to list or single-author dataset
if not multiple_authors: if (current_author == author and len(authors) > 1) or not multiple_authors:
unique_authors.append((current_author[0], current_author[1].title(), unique_authors.append((current_author[0], current_author[1].title(),
books_by_current_author)) books_by_current_author))
if False and self.verbose: if False and self.verbose:
self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors)) self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
@ -1877,6 +1880,11 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.log.info(self.updateProgressFullStep("generateNCXByTags()")) self.opts.log.info(self.updateProgressFullStep("generateNCXByTags()"))
if not len(self.genres):
self.opts.log.warn(" No genres found in tags.\n"
" No Genre section added to Catalog")
return
ncx_soup = self.ncxSoup ncx_soup = self.ncxSoup
body = ncx_soup.find("navPoint") body = ncx_soup.find("navPoint")
btc = len(body.contents) btc = len(body.contents)
@ -2048,6 +2056,10 @@ class EPUB_MOBI(CatalogPlugin):
filtered_tags.insert(1, (filtered_tags.pop(i))) filtered_tags.insert(1, (filtered_tags.pop(i)))
else: else:
continue continue
if self.verbose:
self.opts.log.info(' %d Genre tags in database (exclude_genre: %s):' % \
(len(filtered_tags), self.opts.exclude_genre))
self.opts.log.info(' %s' % ', '.join(filtered_tags))
return filtered_tags return filtered_tags