Catalog refactoring, wip

This commit is contained in:
GRiker 2012-09-01 06:04:42 -06:00
parent 31308e5811
commit 2b85633a87
3 changed files with 224 additions and 178 deletions

View File

@ -11,7 +11,7 @@ import re, sys
from functools import partial from functools import partial
from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.conversion.config import load_defaults
from calibre.gui2 import gprefs, question_dialog from calibre.gui2 import gprefs, info_dialog, question_dialog
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from catalog_epub_mobi_ui import Ui_Form from catalog_epub_mobi_ui import Ui_Form
@ -75,7 +75,6 @@ class PluginWidget(QWidget,Ui_Form):
# LineEditControls # LineEditControls
option_fields += zip(['exclude_genre'],['\[.+\]|\+'],['line_edit']) option_fields += zip(['exclude_genre'],['\[.+\]|\+'],['line_edit'])
#option_fields += zip(['exclude_genre_results'],['excluded genres will appear here'],['line_edit'])
# TextEditControls # TextEditControls
#option_fields += zip(['exclude_genre_results'],['excluded genres will appear here'],['text_edit']) #option_fields += zip(['exclude_genre_results'],['excluded genres will appear here'],['text_edit'])
@ -172,7 +171,7 @@ class PluginWidget(QWidget,Ui_Form):
if hit: if hit:
excluded_tags.append(hit.string) excluded_tags.append(hit.string)
if excluded_tags: if excluded_tags:
results = ', '.join(excluded_tags) results = ', '.join(sorted(excluded_tags))
finally: finally:
if self.DEBUG: if self.DEBUG:
print(results) print(results)
@ -334,16 +333,21 @@ class PluginWidget(QWidget,Ui_Form):
elif self.merge_after.isChecked(): elif self.merge_after.isChecked():
checked = 'after' checked = 'after'
include_hr = self.include_hr.isChecked() include_hr = self.include_hr.isChecked()
opts_dict['merge_comments'] = "%s:%s:%s" % \ opts_dict['merge_comments_rule'] = "%s:%s:%s" % \
(self.merge_source_field_name, checked, include_hr) (self.merge_source_field_name, checked, include_hr)
opts_dict['header_note_source_field'] = self.header_note_source_field_name opts_dict['header_note_source_field'] = self.header_note_source_field_name
# Fix up exclude_genre regex if blank. Assume blank = no exclusions
if opts_dict['exclude_genre'] == '':
opts_dict['exclude_genre'] = 'a^'
# Append the output profile # Append the output profile
try: try:
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']] opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
except: except:
opts_dict['output_profile'] = ['default'] opts_dict['output_profile'] = ['default']
if self.DEBUG: if self.DEBUG:
print "opts_dict" print "opts_dict"
for opt in sorted(opts_dict.keys(), key=sort_key): for opt in sorted(opts_dict.keys(), key=sort_key):

View File

@ -120,9 +120,9 @@ class EPUB_MOBI(CatalogPlugin):
help=_("Custom field containing note text to insert in Description header.\n" help=_("Custom field containing note text to insert in Description header.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: AZW3, ePub, MOBI output formats")), "Applies to: AZW3, ePub, MOBI output formats")),
Option('--merge-comments', Option('--merge-comments-rule',
default='::', default='::',
dest='merge_comments', dest='merge_comments_rule',
action = None, action = None,
help=_("#<custom field>:[before|after]:[True|False] specifying:\n" help=_("#<custom field>:[before|after]:[True|False] specifying:\n"
" <custom field> Custom field containing notes to merge with Comments\n" " <custom field> Custom field containing notes to merge with Comments\n"
@ -182,8 +182,8 @@ class EPUB_MOBI(CatalogPlugin):
else: else:
op = "kindle" op = "kindle"
opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100 opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60 opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
opts.output_profile = op opts.output_profile = op
opts.basename = "Catalog" opts.basename = "Catalog"
@ -198,11 +198,12 @@ class EPUB_MOBI(CatalogPlugin):
(self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '', (self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '',
'CLI' if opts.cli_environment else 'GUI')) 'CLI' if opts.cli_environment else 'GUI'))
# If exclude_genre is blank, assume user wants all genre tags included # If exclude_genre is blank, assume user wants all tags as genres
if opts.exclude_genre.strip() == '': if opts.exclude_genre.strip() == '':
opts.exclude_genre = '\[^.\]' #opts.exclude_genre = '\[^.\]'
build_log.append(" converting empty exclude_genre to '\[^.\]'") #build_log.append(" converting empty exclude_genre to '\[^.\]'")
opts.exclude_genre = 'a^'
build_log.append(" converting empty exclude_genre to 'a^'")
if opts.connected_device['is_device_connected'] and \ if opts.connected_device['is_device_connected'] and \
opts.connected_device['kind'] == 'device': opts.connected_device['kind'] == 'device':
if opts.connected_device['serial']: if opts.connected_device['serial']:
@ -304,10 +305,10 @@ class EPUB_MOBI(CatalogPlugin):
keys.sort() keys.sort()
build_log.append(" opts:") build_log.append(" opts:")
for key in keys: for key in keys:
if key in ['catalog_title','authorClip','connected_kindle','descriptionClip', if key in ['catalog_title','author_clip','connected_kindle','description_clip',
'exclude_book_marker','exclude_genre','exclude_tags', 'exclude_book_marker','exclude_genre','exclude_tags',
'exclusion_rules', 'exclusion_rules', 'fmt',
'header_note_source_field','merge_comments', 'header_note_source_field','merge_comments_rule',
'output_profile','prefix_rules','read_book_marker', 'output_profile','prefix_rules','read_book_marker',
'search_text','sort_by','sort_descriptions_by_author','sync', 'search_text','sort_by','sort_descriptions_by_author','sync',
'thumb_width','wishlist_tag']: 'thumb_width','wishlist_tag']:
@ -323,10 +324,7 @@ class EPUB_MOBI(CatalogPlugin):
if opts.verbose: if opts.verbose:
log.info(" Begin catalog source generation") log.info(" Begin catalog source generation")
catalog.createDirectoryStructure() catalog_source_built = catalog.build_sources()
catalog.copyResources()
catalog.calculateThumbnailSize()
catalog_source_built = catalog.buildSources()
if opts.verbose: if opts.verbose:
if catalog_source_built: if catalog_source_built:
@ -388,7 +386,7 @@ class EPUB_MOBI(CatalogPlugin):
# Run ebook-convert # Run ebook-convert
from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(os.path.join(catalog.catalogPath, plumber = Plumber(os.path.join(catalog.catalog_path,
opts.basename + '.opf'), path_to_output, log, report_progress=notification, opts.basename + '.opf'), path_to_output, log, report_progress=notification,
abort_after_input_dump=False) abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)

View File

@ -9,6 +9,7 @@ from xml.sax.saxutils import escape
from calibre import (prepare_string_for_xml, strftime, force_unicode) from calibre import (prepare_string_for_xml, strftime, force_unicode)
from calibre.customize.conversion import DummyReporter from calibre.customize.conversion import DummyReporter
from calibre.customize.ui import output_profiles
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites from calibre.ebooks.chardet import substitute_entites
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
@ -32,7 +33,7 @@ class CatalogBuilder(object):
Options managed in gui2.catalog.catalog_epub_mobi.py Options managed in gui2.catalog.catalog_epub_mobi.py
''' '''
DEBUG = True DEBUG = False
# A single number creates 'Last x days' only. # A single number creates 'Last x days' only.
# Multiple numbers create 'Last x days', 'x to y days ago' ... # Multiple numbers create 'Last x days', 'x to y days ago' ...
@ -46,78 +47,21 @@ class CatalogBuilder(object):
# basename output file basename # basename output file basename
# creator dc:creator in OPF metadata # creator dc:creator in OPF metadata
# description_clip limits size of NCX descriptions (Kindle only) # description_clip limits size of NCX descriptions (Kindle only)
# includeSources Used in filter_excluded_tags to skip tags like '[SPL]' # includeSources Used in filter_excluded_genres to skip tags like '[SPL]'
# notification Used to check for cancel, report progress # notification Used to check for cancel, report progress
# stylesheet CSS stylesheet # stylesheet CSS stylesheet
# title dc:title in OPF metadata, NCX periodical # title dc:title in OPF metadata, NCX periodical
# verbosity level of diagnostic printout # verbosity level of diagnostic printout
def __init__(self, db, opts, plugin, """ property decorators for attributes """
report_progress=DummyReporter(), if True:
stylesheet="content/stylesheet.css",
init_resources=True):
''' active database '''
@property
def db(self):
return self.__db
self.__db = db
''' opts passed from gui2.catalog.catalog_epub_mobi.py '''
@property
def opts(self):
return self.__opts
self.__opts = opts
''' catalog??? device??? '''
@property
def plugin(self):
return self.__plugin
self.__plugin = plugin
''' Progress Reporter for Jobs '''
@property
def reporter(self):
return self.__reporter
self.__reporter = report_progress
''' stylesheet to include with catalog '''
@property
def stylesheet(self):
return self.__stylesheet
self.__stylesheet = stylesheet
# Initialize properties with dependents in _initialize()
''' directory to store cached thumbs '''
@property
def cache_dir(self):
return self.__cache_dir
self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
''' temp dir to store generated catalog '''
@property
def catalog_path(self):
return self.__catalog_path
self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
''' True if generating for Kindle in MOBI format '''
@property
def generate_for_kindle(self):
return self.__generate_for_kindle
self.__generate_for_kindle = True if (opts.fmt == 'mobi' and
opts.output_profile and
opts.output_profile.startswith("kindle")) else False
self._initialize(init_resources)
def _initialize(self,init_resources):
# continue with initialization
''' list of unique authors ''' ''' list of unique authors '''
@property @property
def authors(self): def authors(self):
return self.__authors return self.__authors
self.__authors = None @authors.setter
def authors(self, val):
self.__authors = val
''' dict of bookmarked books ''' ''' dict of bookmarked books '''
@property @property
@ -126,7 +70,6 @@ class CatalogBuilder(object):
@bookmarked_books.setter @bookmarked_books.setter
def bookmarked_books(self, val): def bookmarked_books(self, val):
self.__bookmarked_books = val self.__bookmarked_books = val
self.__bookmarked_books = None
''' list of bookmarked books, sorted by date read ''' ''' list of bookmarked books, sorted by date read '''
@property @property
@ -135,7 +78,6 @@ class CatalogBuilder(object):
@bookmarked_books_by_date_read.setter @bookmarked_books_by_date_read.setter
def bookmarked_books_by_date_read(self, val): def bookmarked_books_by_date_read(self, val):
self.__bookmarked_books_by_date_read = val self.__bookmarked_books_by_date_read = val
self.__bookmarked_books_by_date_read = None
''' list of books, sorted by author ''' ''' list of books, sorted by author '''
@property @property
@ -144,7 +86,6 @@ class CatalogBuilder(object):
@books_by_author.setter @books_by_author.setter
def books_by_author(self, val): def books_by_author(self, val):
self.__books_by_author = val self.__books_by_author = val
self.__books_by_author = None
''' list of books, grouped by date range (30 days) ''' ''' list of books, grouped by date range (30 days) '''
@property @property
@ -153,7 +94,6 @@ class CatalogBuilder(object):
@books_by_date_range.setter @books_by_date_range.setter
def books_by_date_range(self, val): def books_by_date_range(self, val):
self.__books_by_date_range = val self.__books_by_date_range = val
self.__books_by_date_range = None
''' list of books, by date added reverse (most recent first) ''' ''' list of books, by date added reverse (most recent first) '''
@property @property
@ -162,7 +102,6 @@ class CatalogBuilder(object):
@books_by_month.setter @books_by_month.setter
def books_by_month(self, val): def books_by_month(self, val):
self.__books_by_month = val self.__books_by_month = val
self.__books_by_month = None
''' list of books in series ''' ''' list of books in series '''
@property @property
@ -171,7 +110,6 @@ class CatalogBuilder(object):
@books_by_series.setter @books_by_series.setter
def books_by_series(self, val): def books_by_series(self, val):
self.__books_by_series = val self.__books_by_series = val
self.__books_by_series = None
''' list of books, sorted by title ''' ''' list of books, sorted by title '''
@property @property
@ -180,22 +118,29 @@ class CatalogBuilder(object):
@books_by_title.setter @books_by_title.setter
def books_by_title(self, val): def books_by_title(self, val):
self.__books_by_title = val self.__books_by_title = val
self.__books_by_title = None
''' list of books in series, without series prefix ''' ''' list of books in series, without series prefix '''
@property @property
def books_by_title_no_series_prefix(self): def books_by_title_no_series_prefix(self):
return books_by_title_no_series_prefix.__prop return self.__books_by_title_no_series_prefix
@books_by_title_no_series_prefix.setter @books_by_title_no_series_prefix.setter
def books_by_title_no_series_prefix(self, val): def books_by_title_no_series_prefix(self, val):
self.__books_by_title_no_series_prefix = val self.__books_by_title_no_series_prefix = val
self.__books_by_title_no_series_prefix = None
''' directory to store cached thumbs '''
@property
def cache_dir(self):
return self.__cache_dir
''' temp dir to store generated catalog '''
@property
def catalog_path(self):
return self.__catalog_path
''' content dir in generated catalog ''' ''' content dir in generated catalog '''
@property @property
def content_dir(self): def content_dir(self):
return self.__content_dir return self.__content_dir
self.__content_dir = os.path.join(self.catalog_path, "content")
''' track Job progress ''' ''' track Job progress '''
@property @property
@ -204,7 +149,11 @@ class CatalogBuilder(object):
@current_step.setter @current_step.setter
def current_step(self, val): def current_step(self, val):
self.__current_step = val self.__current_step = val
self.__current_step = 0.0
''' active database '''
@property
def db(self):
return self.__db
''' cumulative error messages to report at conclusion ''' ''' cumulative error messages to report at conclusion '''
@property @property
@ -213,21 +162,21 @@ class CatalogBuilder(object):
@error.setter @error.setter
def error(self, val): def error(self, val):
self.__error = val self.__error = val
self.__error = []
''' tags to exclude as genres ''' ''' tags to exclude as genres '''
@property @property
def excluded_tags(self): def excluded_tags(self):
return self.__excluded_tags return self.__excluded_tags
self.__excluded_tags = self.get_excluded_tags()
''' True if generating for Kindle in MOBI format '''
@property
def generate_for_kindle(self):
return self.__generate_for_kindle
''' True if connected Kindle and generating for Kindle ''' ''' True if connected Kindle and generating for Kindle '''
@property @property
def generate_recently_read(self): def generate_recently_read(self):
return self.__generate_recently_read return self.__generate_recently_read
self.__generate_recently_read = True if (opts.generate_recently_added and
opts.connected_kindle and
self.generate_for_kindle) else False
''' list of dicts with books by genre ''' ''' list of dicts with books by genre '''
@property @property
@ -236,7 +185,6 @@ class CatalogBuilder(object):
@genres.setter @genres.setter
def genres(self, val): def genres(self, val):
self.__genres = val self.__genres = val
self.__genres = []
''' dict of enabled genre tags ''' ''' dict of enabled genre tags '''
@property @property
@ -245,7 +193,6 @@ class CatalogBuilder(object):
@genre_tags_dict.setter @genre_tags_dict.setter
def genre_tags_dict(self, val): def genre_tags_dict(self, val):
self.__genre_tags_dict = val self.__genre_tags_dict = val
self.__genre_tags_dict = None
''' Author, Title, Series sections ''' ''' Author, Title, Series sections '''
@property @property
@ -254,7 +201,6 @@ class CatalogBuilder(object):
@html_filelist_1.setter @html_filelist_1.setter
def html_filelist_1(self, val): def html_filelist_1(self, val):
self.__html_filelist_1 = val self.__html_filelist_1 = val
self.__html_filelist_1 = []
''' Date Added, Date Read ''' ''' Date Added, Date Read '''
@property @property
@ -263,15 +209,11 @@ class CatalogBuilder(object):
@html_filelist_2.setter @html_filelist_2.setter
def html_filelist_2(self, val): def html_filelist_2(self, val):
self.__html_filelist_2 = val self.__html_filelist_2 = val
self.__html_filelist_2 = []
''' additional field to include before/after comments ''' ''' additional field to include before/after comments '''
@property @property
def merge_comments_rule(self): def merge_comments_rule(self):
return self.__merge_comments_rule return self.__merge_comments_rule
#f, p, hr = opts.merge_comments_rule.split(':')
#self.__merge_comments_rule = {'field':f, 'position':p, 'hr':hr}
self.__merge_comments_rule = dict(zip(['field','position','hr'],opts.merge_comments_rule.split(':')))
''' cumulative HTML for NCX file ''' ''' cumulative HTML for NCX file '''
@property @property
@ -280,18 +222,16 @@ class CatalogBuilder(object):
@ncx_soup.setter @ncx_soup.setter
def ncx_soup(self, val): def ncx_soup(self, val):
self.__ncx_soup = val self.__ncx_soup = val
self.__ncx_soup = None
''' opts passed from gui2.catalog.catalog_epub_mobi.py '''
@property
def opts(self):
return self.__opts
''' output_profile declares special symbols ''' ''' output_profile declares special symbols '''
@property @property
def output_profile(self): def output_profile(self):
return self.__output_profile return self.__output_profile
self.__output_profile = None
from calibre.customize.ui import output_profiles
for profile in output_profiles():
if profile.short_name == opts.output_profile:
self.__output_profile = profile
break
''' playOrder value for building NCX ''' ''' playOrder value for building NCX '''
@property @property
@ -300,7 +240,11 @@ class CatalogBuilder(object):
@play_order.setter @play_order.setter
def play_order(self, val): def play_order(self, val):
self.__play_order = val self.__play_order = val
self.__play_order = 1
''' catalog??? device??? '''
@property
def plugin(self):
return self.__plugin
''' dict of prefix rules ''' ''' dict of prefix rules '''
@property @property
@ -309,7 +253,6 @@ class CatalogBuilder(object):
@prefix_rules.setter @prefix_rules.setter
def prefix_rules(self, val): def prefix_rules(self, val):
self.__prefix_rules = val self.__prefix_rules = val
self.__prefix_rules = self.get_prefix_rules()
''' used with ProgressReporter() ''' ''' used with ProgressReporter() '''
@property @property
@ -318,7 +261,6 @@ class CatalogBuilder(object):
@progress_int.setter @progress_int.setter
def progress_int(self, val): def progress_int(self, val):
self.__progress_int = val self.__progress_int = val
self.__progress_int = 0.0
''' used with ProgressReporter() ''' ''' used with ProgressReporter() '''
@property @property
@ -327,7 +269,16 @@ class CatalogBuilder(object):
@progress_string.setter @progress_string.setter
def progress_string(self, val): def progress_string(self, val):
self.__progress_string = val self.__progress_string = val
self.__progress_string = ''
''' Progress Reporter for Jobs '''
@property
def reporter(self):
return self.__reporter
''' stylesheet to include with catalog '''
@property
def stylesheet(self):
return self.__stylesheet
''' device-specific symbol (default empty star) ''' ''' device-specific symbol (default empty star) '''
@property @property
@ -369,7 +320,6 @@ class CatalogBuilder(object):
@thumb_height.setter @thumb_height.setter
def thumb_height(self, val): def thumb_height(self, val):
self.__thumb_height = val self.__thumb_height = val
self.__thumb_height = 0
@property @property
def thumb_width(self): def thumb_width(self):
@ -377,7 +327,6 @@ class CatalogBuilder(object):
@thumb_width.setter @thumb_width.setter
def thumb_width(self, val): def thumb_width(self, val):
self.__thumb_width = val self.__thumb_width = val
self.__thumb_width = 0
''' list of generated thumbs ''' ''' list of generated thumbs '''
@property @property
@ -386,27 +335,78 @@ class CatalogBuilder(object):
@thumbs.setter @thumbs.setter
def thumbs(self, val): def thumbs(self, val):
self.__thumbs = val self.__thumbs = val
self.__thumbs = None
''' full path to thumbs archive ''' ''' full path to thumbs archive '''
@property @property
def thumbs_path(self): def thumbs_path(self):
return self.__thumbs_path return self.__thumbs_path
self.__thumbs_path = os.path.join(self.cache_dir, "thumbs.zip")
''' used with ProgressReporter() ''' ''' used with ProgressReporter() '''
@property @property
def total_steps(self): def total_steps(self):
return self.__total_steps return self.__total_steps
self.__total_steps = 6.0 @total_steps.setter
def total_steps(self, val):
self.__total_steps = val
''' switch controlling format of series books in Titles section ''' ''' switch controlling format of series books in Titles section '''
@property @property
def use_series_prefix_in_titles_section(self): def use_series_prefix_in_titles_section(self):
return self.__use_series_prefix_in_titles_section return self.__use_series_prefix_in_titles_section
def __init__(self, db, _opts, plugin,
report_progress=DummyReporter(),
stylesheet="content/stylesheet.css",
init_resources=True):
self.__db = db
self.__opts = _opts
self.__plugin = plugin
self.__reporter = report_progress
self.__stylesheet = stylesheet
self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and
_opts.output_profile and
_opts.output_profile.startswith("kindle")) else False
self.__authors = None
self.__bookmarked_books = None
self.__bookmarked_books_by_date_read = None
self.__books_by_author = None
self.__books_by_date_range = None
self.__books_by_month = None
self.__books_by_series = None
self.__books_by_title = None
self.__books_by_title_no_series_prefix = None
self.__content_dir = os.path.join(self.catalog_path, "content")
self.__current_step = 0.0
self.__error = []
self.__excluded_tags = self.get_excluded_tags()
self.__generate_recently_read = True if (_opts.generate_recently_added and
_opts.connected_kindle and
self.generate_for_kindle) else False
self.__genres = []
self.__genre_tags_dict = None
self.__html_filelist_1 = []
self.__html_filelist_2 = []
self.__merge_comments_rule = dict(zip(['field','position','hr'],_opts.merge_comments_rule.split(':')))
self.__ncx_soup = None
self.__output_profile = None
self.__output_profile = self.get_output_profile(_opts)
self.__play_order = 1
self.__prefix_rules = self.get_prefix_rules()
self.__progress_int = 0.0
self.__progress_string = ''
self.__thumb_height = 0
self.__thumb_width = 0
self.__thumbs = None
self.__thumbs_path = os.path.join(self.cache_dir, "thumbs.zip")
self.__total_steps = 6.0
self.__use_series_prefix_in_titles_section = False self.__use_series_prefix_in_titles_section = False
self.compute_total_steps() self.compute_total_steps()
self.calculate_thumbnail_dimensions()
self.confirm_thumbs_archive() self.confirm_thumbs_archive()
self.load_section_templates() self.load_section_templates()
if init_resources: if init_resources:
@ -414,7 +414,7 @@ class CatalogBuilder(object):
""" key() functions """ """ key() functions """
def kf_author_to_author_sort(self, author): def _kf_author_to_author_sort(self, author):
""" Compute author_sort value from author """ Compute author_sort value from author
Tokenize author string, return capitalized string with last token first Tokenize author string, return capitalized string with last token first
@ -431,10 +431,11 @@ class CatalogBuilder(object):
tokens[0] += ',' tokens[0] += ','
return ' '.join(tokens).capitalize() return ' '.join(tokens).capitalize()
def kf_books_by_author_sorter_author(self, book): def _kf_books_by_author_sorter_author(self, book):
""" Generate book sort key with computed author_sort. """ Generate book sort key with computed author_sort.
Generate a sort key of computed author_sort, title. Generate a sort key of computed author_sort, title. Used to look for
author_sort mismatches.
Twiddle included to force series to sort after non-series books. Twiddle included to force series to sort after non-series books.
'Smith, john Star Wars' 'Smith, john Star Wars'
'Smith, john ~Star Wars 0001.0000' 'Smith, john ~Star Wars 0001.0000'
@ -446,25 +447,23 @@ class CatalogBuilder(object):
(str): sort key (str): sort key
""" """
if not book['series']: if not book['series']:
key = '%s %s' % (self.kf_author_to_author_sort(book['author']), key = '%s %s' % (self._kf_author_to_author_sort(book['author']),
capitalize(book['title_sort'])) capitalize(book['title_sort']))
else: else:
index = book['series_index'] index = book['series_index']
integer = int(index) integer = int(index)
fraction = index-integer fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
key = '%s ~%s %s' % (self.kf_author_to_author_sort(book['author']), key = '%s ~%s %s' % (self._kf_author_to_author_sort(book['author']),
self.generate_sort_title(book['series']), self.generate_sort_title(book['series']),
series_index) series_index)
return key return key
def kf_books_by_author_sorter_author_sort(self, book): def _kf_books_by_author_sorter_author_sort(self, book, longest_author_sort=60):
""" Generate book sort key with supplied author_sort. """ Generate book sort key with supplied author_sort.
Generate a sort key of author_sort, title. Generate a sort key of author_sort, title.
Twiddle included to force series to sort after non-series books. Bang, tilde included to force series to sort after non-series books.
'Smith, john Star Wars'
'Smith, john ~Star Wars 0001.0000'
Args: Args:
book (dict): book metadata book (dict): book metadata
@ -473,19 +472,20 @@ class CatalogBuilder(object):
(str): sort key (str): sort key
""" """
if not book['series']: if not book['series']:
key = '%s ~%s' % (capitalize(book['author_sort']), fs = '{:<%d}!{!s}' % longest_author_sort
key = fs.format(capitalize(book['author_sort']),
capitalize(book['title_sort'])) capitalize(book['title_sort']))
else: else:
index = book['series_index'] index = book['series_index']
integer = int(index) integer = int(index)
fraction = index-integer fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
key = '%s %s %s' % (capitalize(book['author_sort']), fs = '{:<%d}~{!s}{!s}' % longest_author_sort
key = fs.format(capitalize(book['author_sort']),
self.generate_sort_title(book['series']), self.generate_sort_title(book['series']),
series_index) series_index)
return key return key
""" Methods """ """ Methods """
def build_sources(self): def build_sources(self):
@ -557,7 +557,6 @@ class CatalogBuilder(object):
self.write_ncx() self.write_ncx()
return True return True
'''
def calculate_thumbnail_dimensions(self): def calculate_thumbnail_dimensions(self):
""" Calculate thumb dimensions based on device DPI. """ Calculate thumb dimensions based on device DPI.
@ -587,9 +586,9 @@ class CatalogBuilder(object):
self.thumb_height = self.thumb_height/2 self.thumb_height = self.thumb_height/2
break break
if self.opts.verbose: if self.opts.verbose:
self.opts.log(" Thumbnails:")
self.opts.log(" DPI = %d; thumbnail dimensions: %d x %d" % \ self.opts.log(" DPI = %d; thumbnail dimensions: %d x %d" % \
(x.dpi, self.thumb_width, self.thumb_height)) (x.dpi, self.thumb_width, self.thumb_height))
'''
def compute_total_steps(self): def compute_total_steps(self):
""" Calculate number of build steps to generate catalog. """ Calculate number of build steps to generate catalog.
@ -665,7 +664,6 @@ class CatalogBuilder(object):
self.opts.log.info(' existing thumb cache at %s, cached_thumb_width: %1.2f"' % self.opts.log.info(' existing thumb cache at %s, cached_thumb_width: %1.2f"' %
(self.thumbs_path, float(cached_thumb_width))) (self.thumbs_path, float(cached_thumb_width)))
def convert_html_entities(self, s): def convert_html_entities(self, s):
""" Convert string containing HTML entities to its unicode equivalent. """ Convert string containing HTML entities to its unicode equivalent.
@ -854,11 +852,12 @@ class CatalogBuilder(object):
cl_list[idx] = last_c cl_list[idx] = last_c
if self.DEBUG and self.opts.verbose: if self.DEBUG and self.opts.verbose:
print(" establish_equivalencies():")
if key: if key:
for idx, item in enumerate(item_list): for idx, item in enumerate(item_list):
print("%s %s" % (cl_list[idx],item[sort_field])) print(" %s %s" % (cl_list[idx],item[sort_field]))
else: else:
print("%s %s" % (cl_list[0], item)) print(" %s %s" % (cl_list[0], item))
return cl_list return cl_list
@ -883,9 +882,10 @@ class CatalogBuilder(object):
""" """
self.update_progress_full_step(_("Sorting database")) self.update_progress_full_step(_("Sorting database"))
self.books_by_author = sorted(list(self.books_by_title), key=self.kf_books_by_author_sorter_author)
# Build the unique_authors set from existing data, test for author_sort mismatches # First pass: Sort by author, test for author_sort mismatches
self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author)
authors = [(record['author'], record['author_sort']) for record in self.books_by_author] authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
current_author = authors[0] current_author = authors[0]
for (i,author) in enumerate(authors): for (i,author) in enumerate(authors):
@ -920,8 +920,20 @@ Author '{0}':
current_author = author current_author = author
# Second pass: Sort using sort_key to normalize accented letters
# Determine the longest author_sort length before sorting
asl = [i['author_sort'] for i in self.books_by_author]
las = max(asl, key=len)
self.books_by_author = sorted(self.books_by_author, self.books_by_author = sorted(self.books_by_author,
key=lambda x: sort_key(self.kf_books_by_author_sorter_author_sort(x))) key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
if self.DEBUG and self.opts.verbose:
tl = [i['title'] for i in self.books_by_author]
lt = max(tl, key=len)
fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
print(fs.format('','Title','Author','Series'))
for i in self.books_by_author:
print(fs.format('', i['title'],i['author_sort'],i['series']))
# Build the unique_authors set from existing data # Build the unique_authors set from existing data
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author] authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
@ -1029,7 +1041,7 @@ Author '{0}':
if 'author_sort' in record and record['author_sort'].strip(): if 'author_sort' in record and record['author_sort'].strip():
this_title['author_sort'] = record['author_sort'] this_title['author_sort'] = record['author_sort']
else: else:
this_title['author_sort'] = self.kf_author_to_author_sort(this_title['author']) this_title['author_sort'] = self._kf_author_to_author_sort(this_title['author'])
if record['publisher']: if record['publisher']:
this_title['publisher'] = re.sub('&', '&amp;', record['publisher']) this_title['publisher'] = re.sub('&', '&amp;', record['publisher'])
@ -1076,7 +1088,7 @@ Author '{0}':
this_title['prefix'] = self.discover_prefix(record) this_title['prefix'] = self.discover_prefix(record)
if record['tags']: if record['tags']:
this_title['tags'] = self.filter_excluded_tags(record['tags'], this_title['tags'] = self.filter_excluded_genres(record['tags'],
self.opts.exclude_genre) self.opts.exclude_genre)
if record['formats']: if record['formats']:
formats = [] formats = []
@ -1097,7 +1109,7 @@ Author '{0}':
notes = ' &middot; '.join(notes) notes = ' &middot; '.join(notes)
elif field_md['datatype'] == 'datetime': elif field_md['datatype'] == 'datetime':
notes = format_date(notes,'dd MMM yyyy') notes = format_date(notes,'dd MMM yyyy')
this_title['notes'] = {'source':field_md['name'], this_title['notes'] = {'source':field_md['name'],'content':notes}
return this_title return this_title
@ -1143,7 +1155,7 @@ Author '{0}':
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort')) self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
for title in self.books_by_title: for title in self.books_by_title:
self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40], self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
title['title_sort'][0:40])).decode('mac-roman')) title['title_sort'][0:40])).encode('utf-8'))
return True return True
else: else:
error_msg = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.\n") error_msg = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.\n")
@ -1311,7 +1323,7 @@ Author '{0}':
if tag == ' ': if tag == ' ':
continue continue
normalized_tags.append(re.sub('\W','',ascii_text(tag)).lower()) normalized_tags.append(self.normalize_tag(tag))
friendly_tags.append(tag) friendly_tags.append(tag)
genre_tags_dict = dict(zip(friendly_tags,normalized_tags)) genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -1330,7 +1342,7 @@ Author '{0}':
return genre_tags_dict return genre_tags_dict
def filter_excluded_tags(self, tags, regex): def filter_excluded_genres(self, tags, regex):
""" Remove excluded tags from a tag list """ Remove excluded tags from a tag list
Run regex against list of tags, remove matching tags. Return filtered list. Run regex against list of tags, remove matching tags. Return filtered list.
@ -1352,7 +1364,7 @@ Author '{0}':
else: else:
tag_list.append(tag) tag_list.append(tag)
except: except:
self.opts.log.error("\tfilter_excluded_tags(): malformed --exclude-genre regex pattern: %s" % regex) self.opts.log.error("\tfilter_excluded_genres(): malformed --exclude-genre regex pattern: %s" % regex)
return tags return tags
return tag_list return tag_list
@ -1490,8 +1502,6 @@ Author '{0}':
# Establish initial letter equivalencies # Establish initial letter equivalencies
sort_equivalents = self.establish_equivalencies(self.books_by_author,key='author_sort') sort_equivalents = self.establish_equivalencies(self.books_by_author,key='author_sort')
#for book in sorted(self.books_by_author, key = self.kf_books_by_author_sorter_author_sort):
#for book in self.books_by_author:
for idx, book in enumerate(self.books_by_author): for idx, book in enumerate(self.books_by_author):
book_count += 1 book_count += 1
if self.letter_or_symbol(sort_equivalents[idx]) != current_letter : if self.letter_or_symbol(sort_equivalents[idx]) != current_letter :
@ -1680,8 +1690,11 @@ Author '{0}':
def _add_books_to_html_by_month(this_months_list, dtc): def _add_books_to_html_by_month(this_months_list, dtc):
if len(this_months_list): if len(this_months_list):
# Determine the longest author_sort_length before sorting
this_months_list = sorted(this_months_list, key=lambda x: sort_key(self.kf_books_by_author_sorter_author_sort)(x))) asl = [i['author_sort'] for i in this_months_list]
las = max(asl, key=len)
this_months_list = sorted(this_months_list,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
# Create a new month anchor # Create a new month anchor
date_string = strftime(u'%B %Y', current_date.timetuple()) date_string = strftime(u'%B %Y', current_date.timetuple())
@ -1722,8 +1735,6 @@ Author '{0}':
pSeriesTag['class'] = "series_mobi" pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series: if self.opts.generate_series:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
if self.letter_or_symbol(new_entry['series']) == self.SYMBOLS:
aTag['href'] = "%s.html#%s" % ('BySeries',self.generate_series_anchor(new_entry['series'])) aTag['href'] = "%s.html#%s" % ('BySeries',self.generate_series_anchor(new_entry['series']))
aTag.insert(0, new_entry['series']) aTag.insert(0, new_entry['series'])
pSeriesTag.insert(0, aTag) pSeriesTag.insert(0, aTag)
@ -2740,7 +2751,7 @@ Author '{0}':
for (i, tag) in enumerate(sorted(book.get('tags', []))): for (i, tag) in enumerate(sorted(book.get('tags', []))):
aTag = Tag(_soup,'a') aTag = Tag(_soup,'a')
if self.opts.generate_genres: if self.opts.generate_genres:
aTag['href'] = "Genre_%s.html" % re.sub("\W","",ascii_text(tag).lower()) aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
aTag.insert(0,escape(NavigableString(tag))) aTag.insert(0,escape(NavigableString(tag)))
genresTag.insert(gtc, aTag) genresTag.insert(gtc, aTag)
gtc += 1 gtc += 1
@ -2852,6 +2863,7 @@ Author '{0}':
newEmptyTag.insert(0,NavigableString('&nbsp;')) newEmptyTag.insert(0,NavigableString('&nbsp;'))
mt.replaceWith(newEmptyTag) mt.replaceWith(newEmptyTag)
return soup
def generate_html_descriptions(self): def generate_html_descriptions(self):
""" Generate Description HTML for each book. """ Generate Description HTML for each book.
@ -2933,7 +2945,6 @@ Author '{0}':
bodyTag.insert(1,divTag) bodyTag.insert(1,divTag)
return soup return soup
def generate_masthead_image(self, out_path): def generate_masthead_image(self, out_path):
""" Generate a Kindle masthead image. """ Generate a Kindle masthead image.
@ -4314,9 +4325,6 @@ Author '{0}':
# process # process
pass pass
if self.DEBUG and self.opts.verbose:
self.opts.log.info(" generate_thumbnail():")
# Generate crc for current cover # Generate crc for current cover
with open(title['cover'], 'rb') as f: with open(title['cover'], 'rb') as f:
data = f.read() data = f.read()
@ -4466,11 +4474,12 @@ Author '{0}':
# Report excluded books # Report excluded books
if self.opts.verbose and excluded_tags: if self.opts.verbose and excluded_tags:
self.opts.log.info(" Excluded books:")
data = self.db.get_data_as_dict(ids=self.opts.ids) data = self.db.get_data_as_dict(ids=self.opts.ids)
for record in data: for record in data:
matched = list(set(record['tags']) & set(excluded_tags)) matched = list(set(record['tags']) & set(excluded_tags))
if matched : if matched :
self.opts.log.info(" - %s by %s (Exclusion rule Tags: '%s')" % self.opts.log.info(" - '%s' by %s (Exclusion rule Tags: '%s')" %
(record['title'], record['authors'][0], str(matched[0]))) (record['title'], record['authors'][0], str(matched[0])))
return excluded_tags return excluded_tags
@ -4491,6 +4500,19 @@ Author '{0}':
if self.genre_tags_dict[friendly_tag] == genre: if self.genre_tags_dict[friendly_tag] == genre:
return friendly_tag return friendly_tag
def get_output_profile(self, _opts):
""" Return profile matching opts.output_profile
Input:
_opts (object): build options object
Return:
(profile): output profile matching name
"""
for profile in output_profiles():
if profile.short_name == _opts.output_profile:
return profile
def get_prefix_rules(self): def get_prefix_rules(self):
""" Convert opts.prefix_rules to dict. """ Convert opts.prefix_rules to dict.
@ -4502,7 +4524,6 @@ Author '{0}':
Return: Return:
(list): list of prefix_rules dicts (list): list of prefix_rules dicts
""" """
pr = [] pr = []
if self.opts.prefix_rules: if self.opts.prefix_rules:
try: try:
@ -4721,6 +4742,28 @@ Author '{0}':
return merged return merged
def normalize_tag(self, tag):
""" Generate an XHTML-legal anchor string from tag.
Parse tag for non-ascii, convert to unicode name.
Args:
tags (str): tag name possible containing symbols
Return:
normalized (str): unicode names substituted for non-ascii chars
"""
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
if re.search('\W',normalized):
normalized = ''
for c in massaged:
if re.search('\W',c):
normalized += self.generate_unicode_name(c)
else:
normalized += c
return normalized
def process_exclusions(self, data_set): def process_exclusions(self, data_set):
""" Filter data_set based on exclusion_rules. """ Filter data_set based on exclusion_rules.
@ -4744,7 +4787,6 @@ Author '{0}':
exclusion_pairs.append((field,pat)) exclusion_pairs.append((field,pat))
else: else:
continue continue
if exclusion_pairs: if exclusion_pairs:
for record in data_set: for record in data_set:
for exclusion_pair in exclusion_pairs: for exclusion_pair in exclusion_pairs:
@ -4786,6 +4828,8 @@ Author '{0}':
self.current_step += 1 self.current_step += 1
self.progress_string = description self.progress_string = description
self.progress_int = float((self.current_step-1)/self.total_steps) self.progress_int = float((self.current_step-1)/self.total_steps)
if not self.progress_int:
self.progress_int = 0.01
self.reporter(self.progress_int, self.progress_string) self.reporter(self.progress_int, self.progress_string)
if self.opts.cli_environment: if self.opts.cli_environment:
self.opts.log(u"%3.0f%% %s" % (self.progress_int*100, self.progress_string)) self.opts.log(u"%3.0f%% %s" % (self.progress_int*100, self.progress_string))