Catalog refactoring, wip

2025-12-05 12:45:02 -05:00 · 2012-09-01 06:04:42 -06:00 · 2012-09-01 06:04:42 -06:00 · 2b85633a87
commit 2b85633a87
parent 31308e5811
3 changed files with 224 additions and 178 deletions
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -11,7 +11,7 @@ import re, sys
 from functools import partial
 from calibre.ebooks.conversion.config import load_defaults
-from calibre.gui2 import gprefs, question_dialog
+from calibre.gui2 import gprefs, info_dialog, question_dialog
 from calibre.utils.icu import sort_key
 from catalog_epub_mobi_ui import Ui_Form
@ -75,7 +75,6 @@ class PluginWidget(QWidget,Ui_Form):
        # LineEditControls
        option_fields += zip(['exclude_genre'],['\[.+\]|\+'],['line_edit'])
        #option_fields += zip(['exclude_genre_results'],['excluded genres will appear here'],['line_edit'])
        # TextEditControls
        #option_fields += zip(['exclude_genre_results'],['excluded genres will appear here'],['text_edit'])
@ -172,7 +171,7 @@ class PluginWidget(QWidget,Ui_Form):
                if hit:
                    excluded_tags.append(hit.string)
            if excluded_tags:
-                results = ', '.join(excluded_tags)
+                results = ', '.join(sorted(excluded_tags))
        finally:
            if self.DEBUG:
                print(results)
@ -334,16 +333,21 @@ class PluginWidget(QWidget,Ui_Form):
        elif self.merge_after.isChecked():
            checked = 'after'
        include_hr = self.include_hr.isChecked()
-        opts_dict['merge_comments'] = "%s:%s:%s" % \
+        opts_dict['merge_comments_rule'] = "%s:%s:%s" % \
            (self.merge_source_field_name, checked, include_hr)
        opts_dict['header_note_source_field'] = self.header_note_source_field_name
        # Fix up exclude_genre regex if blank. Assume blank = no exclusions
        if opts_dict['exclude_genre'] == '':
            opts_dict['exclude_genre'] = 'a^'
        # Append the output profile
        try:
            opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
        except:
            opts_dict['output_profile'] = ['default']
        if self.DEBUG:
            print "opts_dict"
            for opt in sorted(opts_dict.keys(), key=sort_key):
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -120,9 +120,9 @@ class EPUB_MOBI(CatalogPlugin):
                          help=_("Custom field containing note text to insert in Description header.\n"
                          "Default: '%default'\n"
                          "Applies to: AZW3, ePub, MOBI output formats")),
-                   Option('--merge-comments',
+                   Option('--merge-comments-rule',
                          default='::',
-                          dest='merge_comments',
+                          dest='merge_comments_rule',
                          action = None,
                          help=_("#<custom field>:[before|after]:[True|False] specifying:\n"
                          " <custom field> Custom field containing notes to merge with Comments\n"
@ -182,8 +182,8 @@ class EPUB_MOBI(CatalogPlugin):
            else:
                op = "kindle"
-        opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100
+        opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
-        opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60
+        opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
        opts.output_profile = op
        opts.basename = "Catalog"
@ -198,11 +198,12 @@ class EPUB_MOBI(CatalogPlugin):
            (self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '',
             'CLI' if opts.cli_environment else 'GUI'))
-        # If exclude_genre is blank, assume user wants all genre tags included
+        # If exclude_genre is blank, assume user wants all tags as genres
        if opts.exclude_genre.strip() == '':
-            opts.exclude_genre = '\[^.\]'
+            #opts.exclude_genre = '\[^.\]'
-            build_log.append(" converting empty exclude_genre to '\[^.\]'")
+            #build_log.append(" converting empty exclude_genre to '\[^.\]'")
-
+            opts.exclude_genre = 'a^'
            build_log.append(" converting empty exclude_genre to 'a^'")
        if opts.connected_device['is_device_connected'] and \
           opts.connected_device['kind'] == 'device':
            if opts.connected_device['serial']:
@ -304,10 +305,10 @@ class EPUB_MOBI(CatalogPlugin):
        keys.sort()
        build_log.append(" opts:")
        for key in keys:
-            if key in ['catalog_title','authorClip','connected_kindle','descriptionClip',
+            if key in ['catalog_title','author_clip','connected_kindle','description_clip',
                       'exclude_book_marker','exclude_genre','exclude_tags',
-                       'exclusion_rules',
+                       'exclusion_rules', 'fmt',
-                       'header_note_source_field','merge_comments',
+                       'header_note_source_field','merge_comments_rule',
                       'output_profile','prefix_rules','read_book_marker',
                       'search_text','sort_by','sort_descriptions_by_author','sync',
                       'thumb_width','wishlist_tag']:
@ -323,10 +324,7 @@ class EPUB_MOBI(CatalogPlugin):
        if opts.verbose:
            log.info(" Begin catalog source generation")
-        catalog.createDirectoryStructure()
+        catalog_source_built = catalog.build_sources()
        catalog.copyResources()
        catalog.calculateThumbnailSize()
        catalog_source_built = catalog.buildSources()
        if opts.verbose:
            if catalog_source_built:
@ -388,7 +386,7 @@ class EPUB_MOBI(CatalogPlugin):
            # Run ebook-convert
            from calibre.ebooks.conversion.plumber import Plumber
-            plumber = Plumber(os.path.join(catalog.catalogPath,
+            plumber = Plumber(os.path.join(catalog.catalog_path,
                            opts.basename + '.opf'), path_to_output, log, report_progress=notification,
                            abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -9,6 +9,7 @@ from xml.sax.saxutils import escape
 from calibre import (prepare_string_for_xml, strftime, force_unicode)
 from calibre.customize.conversion import DummyReporter
 from calibre.customize.ui import output_profiles
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
 from calibre.ebooks.chardet import substitute_entites
 from calibre.ptempfile import PersistentTemporaryDirectory
@ -32,7 +33,7 @@ class CatalogBuilder(object):
    Options managed in gui2.catalog.catalog_epub_mobi.py
    '''
-    DEBUG = True
+    DEBUG = False
    # A single number creates 'Last x days' only.
    # Multiple numbers create 'Last x days', 'x to y days ago' ...
@ -46,78 +47,21 @@ class CatalogBuilder(object):
    # basename              output file basename
    # creator               dc:creator in OPF metadata
    # description_clip       limits size of NCX descriptions (Kindle only)
-    # includeSources        Used in filter_excluded_tags to skip tags like '[SPL]'
+    # includeSources        Used in filter_excluded_genres to skip tags like '[SPL]'
    # notification          Used to check for cancel, report progress
    # stylesheet            CSS stylesheet
    # title                 dc:title in OPF metadata, NCX periodical
    # verbosity             level of diagnostic printout
-    def __init__(self, db, opts, plugin,
+    """ property decorators for attributes """
-                    report_progress=DummyReporter(),
+    if True:
                    stylesheet="content/stylesheet.css",
                    init_resources=True):
        ''' active database '''
        @property
        def db(self):
            return self.__db
        self.__db = db
        ''' opts passed from gui2.catalog.catalog_epub_mobi.py '''
        @property
        def opts(self):
            return self.__opts
        self.__opts = opts
        ''' catalog??? device??? '''
        @property
        def plugin(self):
            return self.__plugin
        self.__plugin = plugin
        ''' Progress Reporter for Jobs '''
        @property
        def reporter(self):
            return self.__reporter
        self.__reporter = report_progress
        ''' stylesheet to include with catalog '''
        @property
        def stylesheet(self):
            return self.__stylesheet
        self.__stylesheet = stylesheet
        # Initialize properties with dependents in _initialize()
        ''' directory to store cached thumbs '''
        @property
        def cache_dir(self):
            return self.__cache_dir
        self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
        ''' temp dir to store generated catalog '''
        @property
        def catalog_path(self):
            return self.__catalog_path
        self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
        ''' True if generating for Kindle in MOBI format '''
        @property
        def generate_for_kindle(self):
            return self.__generate_for_kindle
        self.__generate_for_kindle = True if (opts.fmt == 'mobi' and
                                              opts.output_profile and
                                              opts.output_profile.startswith("kindle")) else False
        self._initialize(init_resources)
    def _initialize(self,init_resources):
        # continue with initialization
        ''' list of unique authors '''
        @property
        def authors(self):
            return self.__authors
-        self.__authors = None
+        @authors.setter
        def authors(self, val):
            self.__authors = val
        ''' dict of bookmarked books '''
        @property
@ -126,7 +70,6 @@ class CatalogBuilder(object):
        @bookmarked_books.setter
        def bookmarked_books(self, val):
            self.__bookmarked_books = val
        self.__bookmarked_books = None
        ''' list of bookmarked books, sorted by date read '''
        @property
@ -135,7 +78,6 @@ class CatalogBuilder(object):
        @bookmarked_books_by_date_read.setter
        def bookmarked_books_by_date_read(self, val):
            self.__bookmarked_books_by_date_read = val
        self.__bookmarked_books_by_date_read = None
        ''' list of books, sorted by author '''
        @property
@ -144,7 +86,6 @@ class CatalogBuilder(object):
        @books_by_author.setter
        def books_by_author(self, val):
            self.__books_by_author = val
        self.__books_by_author = None
        ''' list of books, grouped by date range (30 days) '''
        @property
@ -153,7 +94,6 @@ class CatalogBuilder(object):
        @books_by_date_range.setter
        def books_by_date_range(self, val):
            self.__books_by_date_range = val
        self.__books_by_date_range = None
        ''' list of books, by date added reverse (most recent first) '''
        @property
@ -162,7 +102,6 @@ class CatalogBuilder(object):
        @books_by_month.setter
        def books_by_month(self, val):
            self.__books_by_month = val
        self.__books_by_month = None
        ''' list of books in series '''
        @property
@ -171,7 +110,6 @@ class CatalogBuilder(object):
        @books_by_series.setter
        def books_by_series(self, val):
            self.__books_by_series = val
        self.__books_by_series = None
        ''' list of books, sorted by title '''
        @property
@ -180,22 +118,29 @@ class CatalogBuilder(object):
        @books_by_title.setter
        def books_by_title(self, val):
            self.__books_by_title = val
        self.__books_by_title = None
        ''' list of books in series, without series prefix '''
        @property
        def books_by_title_no_series_prefix(self):
-            return books_by_title_no_series_prefix.__prop
+            return self.__books_by_title_no_series_prefix
        @books_by_title_no_series_prefix.setter
        def books_by_title_no_series_prefix(self, val):
            self.__books_by_title_no_series_prefix = val
-        self.__books_by_title_no_series_prefix = None
+
        ''' directory to store cached thumbs '''
        @property
        def cache_dir(self):
            return self.__cache_dir
        ''' temp dir to store generated catalog '''
        @property
        def catalog_path(self):
            return self.__catalog_path
        ''' content dir in generated catalog '''
        @property
        def content_dir(self):
            return self.__content_dir
        self.__content_dir = os.path.join(self.catalog_path, "content")
        ''' track Job progress '''
        @property
@ -204,7 +149,11 @@ class CatalogBuilder(object):
        @current_step.setter
        def current_step(self, val):
            self.__current_step = val
-        self.__current_step = 0.0
+
        ''' active database '''
        @property
        def db(self):
            return self.__db
        ''' cumulative error messages to report at conclusion  '''
        @property
@ -213,21 +162,21 @@ class CatalogBuilder(object):
        @error.setter
        def error(self, val):
            self.__error = val
        self.__error = []
        ''' tags to exclude as genres '''
        @property
        def excluded_tags(self):
            return self.__excluded_tags
-        self.__excluded_tags = self.get_excluded_tags()
+
        ''' True if generating for Kindle in MOBI format '''
        @property
        def generate_for_kindle(self):
            return self.__generate_for_kindle
        ''' True if connected Kindle and generating for Kindle '''
        @property
        def generate_recently_read(self):
            return self.__generate_recently_read
        self.__generate_recently_read = True if (opts.generate_recently_added and
                                                 opts.connected_kindle and
                                                 self.generate_for_kindle) else False
        ''' list of dicts with books by genre '''
        @property
@ -236,7 +185,6 @@ class CatalogBuilder(object):
        @genres.setter
        def genres(self, val):
            self.__genres = val
        self.__genres = []
        ''' dict of enabled genre tags '''
        @property
@ -245,7 +193,6 @@ class CatalogBuilder(object):
        @genre_tags_dict.setter
        def genre_tags_dict(self, val):
            self.__genre_tags_dict = val
        self.__genre_tags_dict = None
        ''' Author, Title, Series sections '''
        @property
@ -254,7 +201,6 @@ class CatalogBuilder(object):
        @html_filelist_1.setter
        def html_filelist_1(self, val):
            self.__html_filelist_1 = val
        self.__html_filelist_1 = []
        ''' Date Added, Date Read '''
        @property
@ -263,15 +209,11 @@ class CatalogBuilder(object):
        @html_filelist_2.setter
        def html_filelist_2(self, val):
            self.__html_filelist_2 = val
        self.__html_filelist_2 = []
        ''' additional field to include before/after comments '''
        @property
        def merge_comments_rule(self):
            return self.__merge_comments_rule
        #f, p, hr = opts.merge_comments_rule.split(':')
        #self.__merge_comments_rule = {'field':f, 'position':p, 'hr':hr}
        self.__merge_comments_rule = dict(zip(['field','position','hr'],opts.merge_comments_rule.split(':')))
        ''' cumulative HTML for NCX file '''
        @property
@ -280,18 +222,16 @@ class CatalogBuilder(object):
        @ncx_soup.setter
        def ncx_soup(self, val):
            self.__ncx_soup = val
-        self.__ncx_soup = None
+
        ''' opts passed from gui2.catalog.catalog_epub_mobi.py '''
        @property
        def opts(self):
            return self.__opts
        ''' output_profile declares special symbols '''
        @property
        def output_profile(self):
            return self.__output_profile
        self.__output_profile = None
        from calibre.customize.ui import output_profiles
        for profile in output_profiles():
            if profile.short_name == opts.output_profile:
                self.__output_profile = profile
                break
        ''' playOrder value for building NCX '''
        @property
@ -300,7 +240,11 @@ class CatalogBuilder(object):
        @play_order.setter
        def play_order(self, val):
            self.__play_order = val
-        self.__play_order = 1
+
        ''' catalog??? device??? '''
        @property
        def plugin(self):
            return self.__plugin
        ''' dict of prefix rules '''
        @property
@ -309,7 +253,6 @@ class CatalogBuilder(object):
        @prefix_rules.setter
        def prefix_rules(self, val):
            self.__prefix_rules = val
        self.__prefix_rules = self.get_prefix_rules()
        ''' used with ProgressReporter() '''
        @property
@ -318,7 +261,6 @@ class CatalogBuilder(object):
        @progress_int.setter
        def progress_int(self, val):
            self.__progress_int = val
        self.__progress_int = 0.0
        ''' used with ProgressReporter() '''
        @property
@ -327,7 +269,16 @@ class CatalogBuilder(object):
        @progress_string.setter
        def progress_string(self, val):
            self.__progress_string = val
-        self.__progress_string = ''
+
        ''' Progress Reporter for Jobs '''
        @property
        def reporter(self):
            return self.__reporter
        ''' stylesheet to include with catalog '''
        @property
        def stylesheet(self):
            return self.__stylesheet
        ''' device-specific symbol (default empty star) '''
        @property
@ -369,7 +320,6 @@ class CatalogBuilder(object):
        @thumb_height.setter
        def thumb_height(self, val):
            self.__thumb_height = val
        self.__thumb_height = 0
        @property
        def thumb_width(self):
@ -377,7 +327,6 @@ class CatalogBuilder(object):
        @thumb_width.setter
        def thumb_width(self, val):
            self.__thumb_width = val
        self.__thumb_width = 0
        ''' list of generated thumbs '''
        @property
@ -386,27 +335,78 @@ class CatalogBuilder(object):
        @thumbs.setter
        def thumbs(self, val):
            self.__thumbs = val
        self.__thumbs = None
        ''' full path to thumbs archive '''
        @property
        def thumbs_path(self):
            return self.__thumbs_path
        self.__thumbs_path = os.path.join(self.cache_dir, "thumbs.zip")
        ''' used with ProgressReporter() '''
        @property
        def total_steps(self):
            return self.__total_steps
-        self.__total_steps = 6.0
+        @total_steps.setter
        def total_steps(self, val):
            self.__total_steps = val
        ''' switch controlling format of series books in Titles section '''
        @property
        def use_series_prefix_in_titles_section(self):
            return self.__use_series_prefix_in_titles_section
    def __init__(self, db, _opts, plugin,
                    report_progress=DummyReporter(),
                    stylesheet="content/stylesheet.css",
                    init_resources=True):
        self.__db = db
        self.__opts = _opts
        self.__plugin = plugin
        self.__reporter = report_progress
        self.__stylesheet = stylesheet
        self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
        self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
        self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and
                                              _opts.output_profile and
                                              _opts.output_profile.startswith("kindle")) else False
        self.__authors = None
        self.__bookmarked_books = None
        self.__bookmarked_books_by_date_read = None
        self.__books_by_author = None
        self.__books_by_date_range = None
        self.__books_by_month = None
        self.__books_by_series = None
        self.__books_by_title = None
        self.__books_by_title_no_series_prefix = None
        self.__content_dir = os.path.join(self.catalog_path, "content")
        self.__current_step = 0.0
        self.__error = []
        self.__excluded_tags = self.get_excluded_tags()
        self.__generate_recently_read = True if (_opts.generate_recently_added and
                                                 _opts.connected_kindle and
                                                 self.generate_for_kindle) else False
        self.__genres = []
        self.__genre_tags_dict = None
        self.__html_filelist_1 = []
        self.__html_filelist_2 = []
        self.__merge_comments_rule = dict(zip(['field','position','hr'],_opts.merge_comments_rule.split(':')))
        self.__ncx_soup = None
        self.__output_profile = None
        self.__output_profile = self.get_output_profile(_opts)
        self.__play_order = 1
        self.__prefix_rules = self.get_prefix_rules()
        self.__progress_int = 0.0
        self.__progress_string = ''
        self.__thumb_height = 0
        self.__thumb_width = 0
        self.__thumbs = None
        self.__thumbs_path = os.path.join(self.cache_dir, "thumbs.zip")
        self.__total_steps = 6.0
        self.__use_series_prefix_in_titles_section = False
        self.compute_total_steps()
        self.calculate_thumbnail_dimensions()
        self.confirm_thumbs_archive()
        self.load_section_templates()
        if init_resources:
@ -414,7 +414,7 @@ class CatalogBuilder(object):
    """ key() functions """
-    def kf_author_to_author_sort(self, author):
+    def _kf_author_to_author_sort(self, author):
        """ Compute author_sort value from author
        Tokenize author string, return capitalized string with last token first
@ -431,10 +431,11 @@ class CatalogBuilder(object):
            tokens[0] += ','
        return ' '.join(tokens).capitalize()
-    def kf_books_by_author_sorter_author(self, book):
+    def _kf_books_by_author_sorter_author(self, book):
        """ Generate book sort key with computed author_sort.
-        Generate a sort key of computed author_sort, title.
+        Generate a sort key of computed author_sort, title. Used to look for
        author_sort mismatches.
        Twiddle included to force series to sort after non-series books.
         'Smith, john Star Wars'
         'Smith, john ~Star Wars 0001.0000'
@ -446,25 +447,23 @@ class CatalogBuilder(object):
         (str): sort key
        """
        if not book['series']:
-            key = '%s %s' % (self.kf_author_to_author_sort(book['author']),
+            key = '%s %s' % (self._kf_author_to_author_sort(book['author']),
                                capitalize(book['title_sort']))
        else:
            index = book['series_index']
            integer = int(index)
            fraction = index-integer
            series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
-            key = '%s ~%s %s' % (self.kf_author_to_author_sort(book['author']),
+            key = '%s ~%s %s' % (self._kf_author_to_author_sort(book['author']),
                                    self.generate_sort_title(book['series']),
                                    series_index)
        return key
-    def kf_books_by_author_sorter_author_sort(self, book):
+    def _kf_books_by_author_sorter_author_sort(self, book, longest_author_sort=60):
        """ Generate book sort key with supplied author_sort.
        Generate a sort key of author_sort, title.
-        Twiddle included to force series to sort after non-series books.
+        Bang, tilde included to force series to sort after non-series books.
         'Smith, john Star Wars'
         'Smith, john ~Star Wars 0001.0000'
        Args:
         book (dict): book metadata
@ -473,19 +472,20 @@ class CatalogBuilder(object):
         (str): sort key
        """
        if not book['series']:
-            key = '%s ~%s' % (capitalize(book['author_sort']),
+            fs = '{:<%d}!{!s}' % longest_author_sort
            key = fs.format(capitalize(book['author_sort']),
                            capitalize(book['title_sort']))
        else:
            index = book['series_index']
            integer = int(index)
            fraction = index-integer
            series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
-            key = '%s %s %s' % (capitalize(book['author_sort']),
+            fs = '{:<%d}~{!s}{!s}' % longest_author_sort
            key = fs.format(capitalize(book['author_sort']),
                            self.generate_sort_title(book['series']),
                            series_index)
        return key
    """ Methods """
    def build_sources(self):
@ -557,7 +557,6 @@ class CatalogBuilder(object):
        self.write_ncx()
        return True
    '''
    def calculate_thumbnail_dimensions(self):
        """ Calculate thumb dimensions based on device DPI.
@ -587,9 +586,9 @@ class CatalogBuilder(object):
                    self.thumb_height = self.thumb_height/2
                break
        if self.opts.verbose:
            self.opts.log(" Thumbnails:")
            self.opts.log("  DPI = %d; thumbnail dimensions: %d x %d" % \
                            (x.dpi, self.thumb_width, self.thumb_height))
    '''
    def compute_total_steps(self):
        """ Calculate number of build steps to generate catalog.
@ -665,7 +664,6 @@ class CatalogBuilder(object):
                    self.opts.log.info('  existing thumb cache at %s, cached_thumb_width: %1.2f"' %
                                            (self.thumbs_path, float(cached_thumb_width)))
    def convert_html_entities(self, s):
        """ Convert string containing HTML entities to its unicode equivalent.
@ -854,11 +852,12 @@ class CatalogBuilder(object):
            cl_list[idx] = last_c
        if self.DEBUG and self.opts.verbose:
            print("     establish_equivalencies():")
            if key:
                for idx, item in enumerate(item_list):
-                    print("%s %s" % (cl_list[idx],item[sort_field]))
+                    print("      %s %s" % (cl_list[idx],item[sort_field]))
            else:
-                    print("%s %s" % (cl_list[0], item))
+                    print("      %s %s" % (cl_list[0], item))
        return cl_list
@ -883,9 +882,10 @@ class CatalogBuilder(object):
        """
        self.update_progress_full_step(_("Sorting database"))
        self.books_by_author = sorted(list(self.books_by_title), key=self.kf_books_by_author_sorter_author)
-        # Build the unique_authors set from existing data, test for author_sort mismatches
+        # First pass: Sort by author, test for author_sort mismatches
        self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author)
        authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
        current_author = authors[0]
        for (i,author) in enumerate(authors):
@ -920,8 +920,20 @@ Author '{0}':
                current_author = author
        # Second pass: Sort using sort_key to normalize accented letters
        # Determine the longest author_sort length before sorting
        asl = [i['author_sort'] for i in self.books_by_author]
        las = max(asl, key=len)
        self.books_by_author = sorted(self.books_by_author,
-                                    key=lambda x: sort_key(self.kf_books_by_author_sorter_author_sort(x)))
+            key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
        if self.DEBUG and self.opts.verbose:
            tl = [i['title'] for i in self.books_by_author]
            lt = max(tl, key=len)
            fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
            print(fs.format('','Title','Author','Series'))
            for i in self.books_by_author:
                print(fs.format('', i['title'],i['author_sort'],i['series']))
        # Build the unique_authors set from existing data
        authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
@ -1029,7 +1041,7 @@ Author '{0}':
            if 'author_sort' in record and record['author_sort'].strip():
                this_title['author_sort'] = record['author_sort']
            else:
-                this_title['author_sort'] = self.kf_author_to_author_sort(this_title['author'])
+                this_title['author_sort'] = self._kf_author_to_author_sort(this_title['author'])
            if record['publisher']:
                this_title['publisher'] = re.sub('&', '&amp;', record['publisher'])
@ -1076,7 +1088,7 @@ Author '{0}':
            this_title['prefix'] = self.discover_prefix(record)
            if record['tags']:
-                this_title['tags'] = self.filter_excluded_tags(record['tags'],
+                this_title['tags'] = self.filter_excluded_genres(record['tags'],
                                        self.opts.exclude_genre)
            if record['formats']:
                formats = []
@ -1097,7 +1109,7 @@ Author '{0}':
                            notes = ' &middot; '.join(notes)
                    elif field_md['datatype'] == 'datetime':
                        notes = format_date(notes,'dd MMM yyyy')
-                    this_title['notes'] = {'source':field_md['name'],
+                    this_title['notes'] = {'source':field_md['name'],'content':notes}
            return this_title
@ -1143,7 +1155,7 @@ Author '{0}':
                self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
                for title in self.books_by_title:
                    self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
-                                                            title['title_sort'][0:40])).decode('mac-roman'))
+                                                            title['title_sort'][0:40])).encode('utf-8'))
            return True
        else:
            error_msg = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.\n")
@ -1311,7 +1323,7 @@ Author '{0}':
            if tag == ' ':
                continue
-            normalized_tags.append(re.sub('\W','',ascii_text(tag)).lower())
+            normalized_tags.append(self.normalize_tag(tag))
            friendly_tags.append(tag)
        genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -1330,7 +1342,7 @@ Author '{0}':
        return genre_tags_dict
-    def filter_excluded_tags(self, tags, regex):
+    def filter_excluded_genres(self, tags, regex):
        """ Remove excluded tags from a tag list
        Run regex against list of tags, remove matching tags. Return filtered list.
@ -1352,7 +1364,7 @@ Author '{0}':
                else:
                    tag_list.append(tag)
        except:
-            self.opts.log.error("\tfilter_excluded_tags(): malformed --exclude-genre regex pattern: %s" % regex)
+            self.opts.log.error("\tfilter_excluded_genres(): malformed --exclude-genre regex pattern: %s" % regex)
            return tags
        return tag_list
@ -1490,8 +1502,6 @@ Author '{0}':
        # Establish initial letter equivalencies
        sort_equivalents = self.establish_equivalencies(self.books_by_author,key='author_sort')
        #for book in sorted(self.books_by_author, key = self.kf_books_by_author_sorter_author_sort):
        #for book in self.books_by_author:
        for idx, book in enumerate(self.books_by_author):
            book_count += 1
            if self.letter_or_symbol(sort_equivalents[idx]) != current_letter :
@ -1680,8 +1690,11 @@ Author '{0}':
        def _add_books_to_html_by_month(this_months_list, dtc):
            if len(this_months_list):
-
+                # Determine the longest author_sort_length before sorting
-                this_months_list = sorted(this_months_list, key=lambda x: sort_key(self.kf_books_by_author_sorter_author_sort)(x)))
+                asl = [i['author_sort'] for i in this_months_list]
                las = max(asl, key=len)
                this_months_list = sorted(this_months_list,
                    key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
                # Create a new month anchor
                date_string = strftime(u'%B %Y', current_date.timetuple())
@ -1722,8 +1735,6 @@ Author '{0}':
                            pSeriesTag['class'] = "series_mobi"
                        if self.opts.generate_series:
                            aTag = Tag(soup,'a')
                            if self.letter_or_symbol(new_entry['series']) == self.SYMBOLS:
                            aTag['href'] = "%s.html#%s" % ('BySeries',self.generate_series_anchor(new_entry['series']))
                            aTag.insert(0, new_entry['series'])
                            pSeriesTag.insert(0, aTag)
@ -2740,7 +2751,7 @@ Author '{0}':
            for (i, tag) in enumerate(sorted(book.get('tags', []))):
                aTag = Tag(_soup,'a')
                if self.opts.generate_genres:
-                    aTag['href'] = "Genre_%s.html" % re.sub("\W","",ascii_text(tag).lower())
+                    aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
                aTag.insert(0,escape(NavigableString(tag)))
                genresTag.insert(gtc, aTag)
                gtc += 1
@ -2852,6 +2863,7 @@ Author '{0}':
            newEmptyTag.insert(0,NavigableString('&nbsp;'))
            mt.replaceWith(newEmptyTag)
        return soup
    def generate_html_descriptions(self):
        """ Generate Description HTML for each book.
@ -2933,7 +2945,6 @@ Author '{0}':
        bodyTag.insert(1,divTag)
        return soup
    def generate_masthead_image(self, out_path):
        """ Generate a Kindle masthead image.
@ -4314,9 +4325,6 @@ Author '{0}':
                # process
                pass
        if self.DEBUG and self.opts.verbose:
            self.opts.log.info(" generate_thumbnail():")
        # Generate crc for current cover
        with open(title['cover'], 'rb') as f:
            data = f.read()
@ -4466,11 +4474,12 @@ Author '{0}':
        # Report excluded books
        if self.opts.verbose and excluded_tags:
            self.opts.log.info(" Excluded books:")
            data = self.db.get_data_as_dict(ids=self.opts.ids)
            for record in data:
                matched = list(set(record['tags']) & set(excluded_tags))
                if matched :
-                    self.opts.log.info("     - %s by %s (Exclusion rule Tags: '%s')" %
+                    self.opts.log.info("  - '%s' by %s (Exclusion rule Tags: '%s')" %
                        (record['title'], record['authors'][0], str(matched[0])))
        return excluded_tags
@ -4491,6 +4500,19 @@ Author '{0}':
            if self.genre_tags_dict[friendly_tag] == genre:
                return friendly_tag
    def get_output_profile(self, _opts):
        """ Return profile matching opts.output_profile
        Input:
         _opts (object): build options object
        Return:
         (profile): output profile matching name
        """
        for profile in output_profiles():
            if profile.short_name == _opts.output_profile:
                return profile
    def get_prefix_rules(self):
        """ Convert opts.prefix_rules to dict.
@ -4502,7 +4524,6 @@ Author '{0}':
        Return:
         (list): list of prefix_rules dicts
        """
        pr = []
        if self.opts.prefix_rules:
            try:
@ -4721,6 +4742,28 @@ Author '{0}':
        return merged
    def normalize_tag(self, tag):
        """ Generate an XHTML-legal anchor string from tag.
        Parse tag for non-ascii, convert to unicode name.
        Args:
         tags (str): tag name possible containing symbols
        Return:
         normalized (str): unicode names substituted for non-ascii chars
        """
        normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
        if re.search('\W',normalized):
            normalized = ''
            for c in massaged:
                if re.search('\W',c):
                    normalized += self.generate_unicode_name(c)
                else:
                    normalized += c
        return normalized
    def process_exclusions(self, data_set):
        """ Filter data_set based on exclusion_rules.
@ -4744,7 +4787,6 @@ Author '{0}':
                exclusion_pairs.append((field,pat))
            else:
                continue
        if exclusion_pairs:
            for record in data_set:
                for exclusion_pair in exclusion_pairs:
@ -4786,6 +4828,8 @@ Author '{0}':
        self.current_step += 1
        self.progress_string = description
        self.progress_int = float((self.current_step-1)/self.total_steps)
        if not self.progress_int:
            self.progress_int = 0.01
        self.reporter(self.progress_int, self.progress_string)
        if self.opts.cli_environment:
            self.opts.log(u"%3.0f%% %s" % (self.progress_int*100, self.progress_string))