From 2742bd7e293d5d0c6814da8bb7a6ff74b80a6e4b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 11 Sep 2019 10:23:17 +0530 Subject: [PATCH] Dont use BS to generate NCX files for ebook catalogs It is very slow and buggy --- .../library/catalogs/epub_mobi_builder.py | 665 ++++-------------- 1 file changed, 136 insertions(+), 529 deletions(-) diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index e2a89e65d4..d6daa1b319 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -23,9 +23,7 @@ from calibre import ( from calibre.constants import cache_dir, isosx from calibre.customize.conversion import DummyReporter from calibre.customize.ui import output_profiles -from calibre.ebooks.BeautifulSoup import ( - BeautifulSoup, BeautifulStoneSoup, NavigableString, prettify -) +from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, prettify from calibre.ebooks.chardet import substitute_entites from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree @@ -53,7 +51,7 @@ def makeelement(tag_name, parent, **attrs): ans = parent.makeelement(tag_name) for k, v in attrs.items(): k = k.replace('_', '-').rstrip('-') - ans.set(k, v) + ans.set(k, unicode_type(v)) parent.append(ans) ans.tail = '\n' return ans @@ -190,7 +188,7 @@ class CatalogBuilder(object): self.individual_authors = None self.merge_comments_rule = dict(zip(['field', 'position', 'hr'], _opts.merge_comments_rule.split(':'))) - self.ncx_soup = None + self.ncx_root = None self.output_profile = self.get_output_profile(_opts) self.play_order = 1 self.prefix_rules = self.get_prefix_rules() @@ -2988,70 +2986,69 @@ class CatalogBuilder(object): play_order (int) Outputs: - ncx_soup (file): NCX foundation + ncx_root (file): NCX foundation """ self.update_progress_full_step(_("NCX header")) header = ''' - + ''' - soup = BeautifulStoneSoup(header) - - ncx = soup.find('ncx') - navMapTag = soup.new_tag('navMap') + root = self.ncx_root = etree.fromstring(header) + navMapTag = root[0] if self.generate_for_kindle_mobi: # Build a top-level navPoint for Kindle periodicals - navPointTag = soup.new_tag('navPoint') - navPointTag['class'] = "periodical" - navPointTag['id'] = "title" - navPointTag['playOrder'] = self.play_order + navPointTag = makeelement('navPoint', navMapTag, class_='periodical', id='title', playOrder=self.play_order) self.play_order += 1 - navLabelTag = soup.new_tag('navLabel') - textTag = soup.new_tag('text') - textTag.insert(0, NavigableString(self.opts.catalog_title)) - navLabelTag.insert(0, textTag) - navPointTag.insert(0, navLabelTag) + makeelement('{http://calibre.kovidgoyal.net/2009/metadata}meta-img', navPointTag, id="mastheadImage", src="images/mastheadImage.gif") + navLabelTag = makeelement('navLabel', navPointTag) + makeelement('text', navLabelTag).text = self.opts.catalog_title if self.opts.generate_authors: - contentTag = soup.new_tag('content') - contentTag['src'] = "content/ByAlphaAuthor.html" - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="content/ByAlphaAuthor.html") elif self.opts.generate_titles: - contentTag = soup.new_tag('content') - contentTag['src'] = "content/ByAlphaTitle.html" - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="content/ByAlphaTitle.html") elif self.opts.generate_series: - contentTag = soup.new_tag('content') - contentTag['src'] = "content/BySeries.html" - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="content/BySeries.html") elif self.opts.generate_genres: - contentTag = soup.new_tag('content') - # contentTag['src'] = "content/ByGenres.html" - contentTag['src'] = "%s" % self.genres[0]['file'] - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="%s" % self.genres[0]['file']) elif self.opts.generate_recently_added: - contentTag = soup.new_tag('content') - contentTag['src'] = "content/ByDateAdded.html" - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="content/ByDateAdded.html") elif self.opts.generate_descriptions: # Descriptions only - contentTag = soup.new_tag('content') - contentTag['src'] = "content/book_%d.html" % int(self.books_by_description[0]['id']) - navPointTag.insert(1, contentTag) + makeelement('content', navPointTag, src="content/book_%d.html" % int(self.books_by_description[0]['id'])) - if self.generate_for_kindle_mobi: - cmiTag = soup.new_tag('calibre:meta-img') - cmiTag['id'] = "mastheadImage" - cmiTag['src'] = "images/mastheadImage.gif" - navPointTag.insert(2, cmiTag) - navMapTag.insert(0, navPointTag) + def generate_ncx_section_header(self, section_id, section_header, content_src): + root = self.ncx_root + if self.generate_for_kindle_mobi: + body = root.xpath('//*[local-name()="navPoint"]')[0] + else: + body = root.xpath('//*[local-name()="navMap"]')[0] + navPointTag = makeelement('navPoint', body, id=section_id, playOrder=self.play_order) + if self.generate_for_kindle_mobi: + navPointTag.set('class', 'section') + self.play_order += 1 + navLabelTag = makeelement('navLabel', navPointTag) + textTag = makeelement('text', navLabelTag) + textTag.text = section_header + makeelement('content', navPointTag, src=content_src) + return navPointTag - ncx.insert(0, navMapTag) - self.ncx_soup = soup + def generate_ncx_subsection(self, navPointTag, section_id, section_text, content_src, cm_tags={}): + navPointVolumeTag = makeelement('navPoint', navPointTag, id=section_id, playOrder=self.play_order) + if self.generate_for_kindle_mobi: + navPointVolumeTag.set('class', "article") + self.play_order += 1 + navLabelTag = makeelement("navLabel", navPointVolumeTag) + makeelement("text", navLabelTag).text = section_text + makeelement("content", navPointVolumeTag, src=content_src) + + if self.generate_for_kindle_mobi: + for name, text in cm_tags.items(): + makeelement('{http://calibre.kovidgoyal.net/2009/metadata}meta', navPointVolumeTag, name=name).text = text def generate_ncx_descriptions(self, tocTitle): """ Add Descriptions to the basic NCX file. @@ -3067,62 +3064,31 @@ class CatalogBuilder(object): Outputs: ncx_soup (file): updated """ + section_header = '%s [%d]' % (tocTitle, len(self.books_by_description)) + if self.generate_for_kindle_mobi: + section_header = tocTitle + navPointTag = self.generate_ncx_section_header('bydescription-ID', section_header, "content/book_%d.html" % int(self.books_by_description[0]['id'])) self.update_progress_full_step(_("NCX for Descriptions")) # --- Construct the 'Descriptions' section --- - ncx_soup = self.ncx_soup - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) - # Add the section navPoint - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" - navPointTag['id'] = "bydescription-ID" - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - section_header = '%s [%d]' % (tocTitle, len(self.books_by_description)) - if self.generate_for_kindle_mobi: - section_header = tocTitle - textTag.insert(0, NavigableString(section_header)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/book_%d.html" % int(self.books_by_description[0]['id']) - navPointTag.insert(nptc, contentTag) - nptc += 1 - # Loop over the titles for book in self.books_by_description: - navPointVolumeTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointVolumeTag['class'] = "article" - navPointVolumeTag['id'] = "book%dID" % int(book['id']) - navPointVolumeTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag("navLabel") - textTag = ncx_soup.new_tag("text") + sec_id = "book%dID" % int(book['id']) if book['series']: series_index = unicode_type(book['series_index']) if series_index.endswith('.0'): series_index = series_index[:-2] if self.generate_for_kindle_mobi: # Don't include Author for Kindle - textTag.insert(0, NavigableString(self.format_ncx_text('%s (%s [%s])' % - (book['title'], book['series'], series_index), dest='title'))) + sec_text = self.format_ncx_text('%s (%s [%s])' % + (book['title'], book['series'], series_index), dest='title') else: # Include Author for non-Kindle - textTag.insert(0, NavigableString(self.format_ncx_text('%s (%s [%s]) · %s ' % - (book['title'], book['series'], series_index, book['author']), dest='title'))) + sec_text = self.format_ncx_text('%s (%s [%s]) · %s ' % + (book['title'], book['series'], series_index, book['author']), dest='title') else: if self.generate_for_kindle_mobi: # Don't include Author for Kindle @@ -3135,50 +3101,29 @@ class CatalogBuilder(object): title_str += ' %s%s' % (dot_string,empty_dots) ''' title_str += '*' - textTag.insert(0, NavigableString(title_str)) + sec_text = title_str else: # Include Author for non-Kindle - textTag.insert(0, NavigableString(self.format_ncx_text('%s · %s' % - (book['title'], book['author']), dest='title'))) - navLabelTag.insert(0, textTag) - navPointVolumeTag.insert(0, navLabelTag) + sec_text = self.format_ncx_text('%s · %s' % + (book['title'], book['author']), dest='title') - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/book_%d.html#book%d" % (int(book['id']), int(book['id'])) - navPointVolumeTag.insert(1, contentTag) + content_src="content/book_%d.html#book%d" % (int(book['id']), int(book['id'])) + cm_tags = {} - if self.generate_for_kindle_mobi: - # Add the author tag - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "author" + if book['date']: + navStr = '%s | %s' % (self.format_ncx_text(book['author'], dest='author'), + book['date'].split()[1]) + else: + navStr = '%s' % (self.format_ncx_text(book['author'], dest='author')) - if book['date']: - navStr = '%s | %s' % (self.format_ncx_text(book['author'], dest='author'), - book['date'].split()[1]) - else: - navStr = '%s' % (self.format_ncx_text(book['author'], dest='author')) + if 'tags' in book and len(book['tags']): + navStr = self.format_ncx_text(navStr + ' | ' + ' · '.join(sorted(book['tags'])), dest='author') + cm_tags['author'] = navStr - if 'tags' in book and len(book['tags']): - navStr = self.format_ncx_text(navStr + ' | ' + ' · '.join(sorted(book['tags'])), dest='author') - cmTag.insert(0, NavigableString(navStr)) - navPointVolumeTag.insert(2, cmTag) - - # Add the description tag - if book['short_description']: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(self.format_ncx_text(book['short_description'], dest='description'))) - navPointVolumeTag.insert(3, cmTag) - - # Add this volume to the section tag - navPointTag.insert(nptc, navPointVolumeTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - - self.ncx_soup = ncx_soup + # Add the description tag + if book['short_description']: + cm_tags['description'] = self.format_ncx_text(book['short_description'], dest='description') + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_series(self, tocTitle): """ Add Series to the basic NCX file. @@ -3202,35 +3147,12 @@ class CatalogBuilder(object): current_series_list = self.format_ncx_text(current_series_list, dest="description") series_by_letter.append(current_series_list) - ncx_soup = self.ncx_soup - output = "BySeries" - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) - # --- Construct the 'Books By Series' section --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" - navPointTag['id'] = "byseries-ID" - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') section_header = '%s [%d]' % (tocTitle, len(self.all_series)) if self.generate_for_kindle_mobi: section_header = tocTitle - textTag.insert(0, NavigableString(section_header)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/%s.html#section_start" % (output) - navPointTag.insert(nptc, contentTag) - nptc += 1 + output = "BySeries" + navPointTag = self.generate_ncx_section_header('byseries-ID', section_header, "content/%s.html#section_start" % (output)) series_by_letter = [] # Establish initial letter equivalencies @@ -3271,45 +3193,19 @@ class CatalogBuilder(object): # Add *article* entries for each populated series title letter for (i, books) in enumerate(series_by_letter): - navPointByLetterTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByLetterTag['class'] = "article" - navPointByLetterTag['id'] = "%sSeries-ID" % (title_letters[i].upper()) - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') + sec_id = "%sSeries-ID" % (title_letters[i].upper()) if len(title_letters[i]) > 1: - fmt_string = _(u"Series beginning with %s") + fmt_string = _("Series beginning with %s") else: - fmt_string = _(u"Series beginning with '%s'") - textTag.insert(0, NavigableString(fmt_string % - (title_letters[i] if len(title_letters[i]) > 1 else title_letters[i]))) - navLabelTag.insert(0, textTag) - navPointByLetterTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') - # contentTag['src'] = "content/%s.html#%s_series" % (output, title_letters[i]) + fmt_string = _("Series beginning with '%s'") + sec_text = fmt_string % (title_letters[i] if len(title_letters[i]) > 1 else title_letters[i]) if title_letters[i] == self.SYMBOLS: - contentTag['src'] = "content/%s.html#%s_series" % (output, self.SYMBOLS) + content_src = "content/%s.html#%s_series" % (output, self.SYMBOLS) else: - contentTag['src'] = "content/%s.html#%s_series" % (output, self.generate_unicode_name(title_letters[i])) + content_src = "content/%s.html#%s_series" % (output, self.generate_unicode_name(title_letters[i])) - navPointByLetterTag.insert(1, contentTag) - - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(self.format_ncx_text(books, dest='description'))) - navPointByLetterTag.insert(2, cmTag) - - navPointTag.insert(nptc, navPointByLetterTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - - self.ncx_soup = ncx_soup + cm_tags = {'description': self.format_ncx_text(books, dest='description')} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_title(self, tocTitle): """ Add Titles to the basic NCX file. @@ -3333,35 +3229,12 @@ class CatalogBuilder(object): current_book_list = self.format_ncx_text(current_book_list, dest="description") books_by_letter.append(current_book_list) - ncx_soup = self.ncx_soup - output = "ByAlphaTitle" - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) - # --- Construct the 'Books By Title' section --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" - navPointTag['id'] = "byalphatitle-ID" - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') section_header = '%s [%d]' % (tocTitle, len(self.books_by_title)) if self.generate_for_kindle_mobi: section_header = tocTitle - textTag.insert(0, NavigableString(section_header)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/%s.html#section_start" % (output) - navPointTag.insert(nptc, contentTag) - nptc += 1 + output = "ByAlphaTitle" + navPointTag = self.generate_ncx_section_header("byalphatitle-ID", section_header, "content/%s.html#section_start" % (output)) books_by_letter = [] @@ -3404,43 +3277,19 @@ class CatalogBuilder(object): # Add *article* entries for each populated title letter for (i, books) in enumerate(books_by_letter): - navPointByLetterTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByLetterTag['class'] = "article" - navPointByLetterTag['id'] = "%sTitles-ID" % (title_letters[i].upper()) - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') + sec_id = "%sTitles-ID" % (title_letters[i].upper()) if len(title_letters[i]) > 1: fmt_string = _(u"Titles beginning with %s") else: fmt_string = _(u"Titles beginning with '%s'") - textTag.insert(0, NavigableString(fmt_string % - (title_letters[i] if len(title_letters[i]) > 1 else title_letters[i]))) - navLabelTag.insert(0, textTag) - navPointByLetterTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') + sec_text = fmt_string % (title_letters[i] if len(title_letters[i]) > 1 else title_letters[i]) if title_letters[i] == self.SYMBOLS: - contentTag['src'] = "content/%s.html#%s_titles" % (output, self.SYMBOLS) + content_src = "content/%s.html#%s_titles" % (output, self.SYMBOLS) else: - contentTag['src'] = "content/%s.html#%s_titles" % (output, self.generate_unicode_name(title_letters[i])) - navPointByLetterTag.insert(1, contentTag) + content_src = "content/%s.html#%s_titles" % (output, self.generate_unicode_name(title_letters[i])) - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(self.format_ncx_text(books, dest='description'))) - navPointByLetterTag.insert(2, cmTag) - - navPointTag.insert(nptc, navPointByLetterTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - - self.ncx_soup = ncx_soup + cm_tags = {'description': self.format_ncx_text(books, dest='description')} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_author(self, tocTitle): """ Add Authors to the basic NCX file. @@ -3464,37 +3313,15 @@ class CatalogBuilder(object): current_author_list = self.format_ncx_text(current_author_list, dest="description") master_author_list.append((current_author_list, current_letter)) - ncx_soup = self.ncx_soup HTML_file = "content/ByAlphaAuthor.html" - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) # --- Construct the 'Books By Author' *section* --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" file_ID = "%s" % tocTitle.lower() file_ID = file_ID.replace(" ", "") - navPointTag['id'] = "%s-ID" % file_ID - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') section_header = '%s [%d]' % (tocTitle, len(self.individual_authors)) if self.generate_for_kindle_mobi: section_header = tocTitle - textTag.insert(0, NavigableString(section_header)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "%s#section_start" % HTML_file - navPointTag.insert(nptc, contentTag) - nptc += 1 + navPointTag = self.generate_ncx_section_header("%s-ID" % file_ID, section_header, "%s#section_start" % HTML_file) # Create an NCX article entry for each populated author index letter # Loop over the sorted_authors list, find start of each letter, @@ -3527,42 +3354,19 @@ class CatalogBuilder(object): # Add *article* entries for each populated author initial letter # master_author_list{}: [0]:author list [1]:Initial letter for authors_by_letter in master_author_list: - navPointByLetterTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByLetterTag['class'] = "article" - navPointByLetterTag['id'] = "%sauthors-ID" % (authors_by_letter[1]) - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') + sec_id = "%sauthors-ID" % (authors_by_letter[1]) if authors_by_letter[1] == self.SYMBOLS: fmt_string = _(u"Authors beginning with %s") else: fmt_string = _(u"Authors beginning with '%s'") - textTag.insert(0, NavigableString(fmt_string % authors_by_letter[1])) - navLabelTag.insert(0, textTag) - navPointByLetterTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') + sec_text = fmt_string % authors_by_letter[1] if authors_by_letter[1] == self.SYMBOLS: - contentTag['src'] = "%s#%s_authors" % (HTML_file, authors_by_letter[1]) + content_src = "%s#%s_authors" % (HTML_file, authors_by_letter[1]) else: - contentTag['src'] = "%s#%s_authors" % (HTML_file, self.generate_unicode_name(authors_by_letter[1])) - navPointByLetterTag.insert(1, contentTag) + content_src = "%s#%s_authors" % (HTML_file, self.generate_unicode_name(authors_by_letter[1])) - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(authors_by_letter[0])) - navPointByLetterTag.insert(2, cmTag) - - navPointTag.insert(nptc, navPointByLetterTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - - self.ncx_soup = ncx_soup + cm_tags = {'description': authors_by_letter[0]} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_date_added(self, tocTitle): """ Add Recently Added to the basic NCX file. @@ -3593,34 +3397,12 @@ class CatalogBuilder(object): current_titles_list = self.format_ncx_text(current_titles_list, dest='description') master_date_range_list.append((current_titles_list, date_range, book_count)) - ncx_soup = self.ncx_soup HTML_file = "content/ByDateAdded.html" - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) # --- Construct the 'Recently Added' *section* --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" file_ID = "%s" % tocTitle.lower() file_ID = file_ID.replace(" ", "") - navPointTag['id'] = "%s-ID" % file_ID - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - textTag.insert(0, NavigableString('%s' % tocTitle)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "%s#section_start" % HTML_file - navPointTag.insert(nptc, contentTag) - nptc += 1 + navPointTag = self.generate_ncx_section_header("%s-ID" % file_ID, tocTitle, "%s#section_start" % HTML_file) # Create an NCX article entry for each date range current_titles_list = [] @@ -3647,38 +3429,14 @@ class CatalogBuilder(object): # Add *article* entries for each populated date range # master_date_range_list{}: [0]:titles list [1]:datestr for books_by_date_range in master_date_range_list: - navPointByDateRangeTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByDateRangeTag['class'] = "article" - navPointByDateRangeTag['id'] = "%s-ID" % books_by_date_range[1].replace(' ', '') - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - textTag.insert(0, NavigableString(books_by_date_range[1])) - navLabelTag.insert(0, textTag) - navPointByDateRangeTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') - contentTag['src'] = "%s#bda_%s" % (HTML_file, + sec_id = "%s-ID" % books_by_date_range[1].replace(' ', '') + sec_text = books_by_date_range[1] + content_src = "%s#bda_%s" % (HTML_file, books_by_date_range[1].replace(' ', '')) - - navPointByDateRangeTag.insert(1, contentTag) - - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(books_by_date_range[0])) - navPointByDateRangeTag.insert(2, cmTag) - - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "author" - navStr = '%d titles' % books_by_date_range[2] if books_by_date_range[2] > 1 else \ - '%d title' % books_by_date_range[2] - cmTag.insert(0, NavigableString(navStr)) - navPointByDateRangeTag.insert(3, cmTag) - - navPointTag.insert(nptc, navPointByDateRangeTag) - nptc += 1 + navStr = '%d titles' % books_by_date_range[2] if books_by_date_range[2] > 1 else \ + '%d title' % books_by_date_range[2] + cm_tags = {'description': books_by_date_range[0], 'author': navStr} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) # Create an NCX article entry for each populated month # Loop over the booksByDate list, find start of each month, @@ -3707,43 +3465,14 @@ class CatalogBuilder(object): # master_months_list{}: [0]:titles list [1]:date for books_by_month in master_month_list: datestr = strftime('%B %Y', books_by_month[1].timetuple()) - navPointByMonthTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByMonthTag['class'] = "article" - navPointByMonthTag['id'] = "bda_%s-%s-ID" % (books_by_month[1].year, books_by_month[1].month) - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - textTag.insert(0, NavigableString(datestr)) - navLabelTag.insert(0, textTag) - navPointByMonthTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') - contentTag['src'] = "%s#bda_%s-%s" % (HTML_file, + sec_id = "bda_%s-%s-ID" % (books_by_month[1].year, books_by_month[1].month) + sec_text = datestr + content_src = "%s#bda_%s-%s" % (HTML_file, books_by_month[1].year, books_by_month[1].month) - - navPointByMonthTag.insert(1, contentTag) - - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(books_by_month[0])) - navPointByMonthTag.insert(2, cmTag) - - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "author" - navStr = '%d titles' % books_by_month[2] if books_by_month[2] > 1 else \ - '%d title' % books_by_month[2] - cmTag.insert(0, NavigableString(navStr)) - navPointByMonthTag.insert(3, cmTag) - - navPointTag.insert(nptc, navPointByMonthTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - self.ncx_soup = ncx_soup + navStr = '%d titles' % books_by_month[2] if books_by_month[2] > 1 else \ + '%d title' % books_by_month[2] + cm_tags = {'description': books_by_month[0], 'author': navStr} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_date_read(self, tocTitle): """ Add By Date Read to the basic NCX file. @@ -3777,34 +3506,12 @@ class CatalogBuilder(object): if not self.bookmarked_books_by_date_read: return - ncx_soup = self.ncx_soup HTML_file = "content/ByDateRead.html" - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) # --- Construct the 'Recently Read' *section* --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" file_ID = "%s" % tocTitle.lower() file_ID = file_ID.replace(" ", "") - navPointTag['id'] = "%s-ID" % file_ID - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - textTag.insert(0, NavigableString('%s' % tocTitle)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "%s#section_start" % HTML_file - navPointTag.insert(nptc, contentTag) - nptc += 1 + navPointTag = self.generate_ncx_section_header("%s-ID" % file_ID, tocTitle, "%s#section_start" % HTML_file) # Create an NCX article entry for each date range current_titles_list = [] @@ -3857,47 +3564,18 @@ class CatalogBuilder(object): # master_day_list{}: [0]:titles list [1]:date for books_by_day in master_day_list: datestr = strftime('%A, %B %d', books_by_day[1].timetuple()) - navPointByDayTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointByDayTag['class'] = "article" - navPointByDayTag['id'] = "bdr_%s-%s-%sID" % (books_by_day[1].year, + sec_id = "bdr_%s-%s-%sID" % (books_by_day[1].year, books_by_day[1].month, books_by_day[1].day) - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') - textTag.insert(0, NavigableString(datestr)) - navLabelTag.insert(0, textTag) - navPointByDayTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag('content') - contentTag['src'] = "%s#bdr_%s-%s-%s" % (HTML_file, + sec_text = datestr + content_src = "%s#bdr_%s-%s-%s" % (HTML_file, books_by_day[1].year, books_by_day[1].month, books_by_day[1].day) - - navPointByDayTag.insert(1, contentTag) - - if self.generate_for_kindle_mobi: - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - cmTag.insert(0, NavigableString(books_by_day[0])) - navPointByDayTag.insert(2, cmTag) - - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "author" - navStr = '%d titles' % books_by_day[2] if books_by_day[2] > 1 else \ - '%d title' % books_by_day[2] - cmTag.insert(0, NavigableString(navStr)) - navPointByDayTag.insert(3, cmTag) - - navPointTag.insert(nptc, navPointByDayTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - self.ncx_soup = ncx_soup + navStr = '%d titles' % books_by_day[2] if books_by_day[2] > 1 else \ + '%d title' % books_by_day[2] + cm_tags = {'description': books_by_day[0], 'author': navStr} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_ncx_by_genre(self, tocTitle): """ Add Genres to the basic NCX file. @@ -3921,105 +3599,36 @@ class CatalogBuilder(object): " No Genre section added to Catalog") return - ncx_soup = self.ncx_soup - if self.generate_for_kindle_mobi: - body = ncx_soup.find("navPoint") - else: - body = ncx_soup.find('navMap') - btc = len(body.contents) - # --- Construct the 'Books By Genre' *section* --- - navPointTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointTag['class'] = "section" file_ID = "%s" % tocTitle.lower() file_ID = file_ID.replace(" ", "") - navPointTag['id'] = "%s-ID" % file_ID - navPointTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag('navLabel') - textTag = ncx_soup.new_tag('text') section_header = '%s [%d]' % (tocTitle, len(self.genres)) if self.generate_for_kindle_mobi: section_header = tocTitle - textTag.insert(0, NavigableString(section_header)) - navLabelTag.insert(0, textTag) - nptc = 0 - navPointTag.insert(nptc, navLabelTag) - nptc += 1 - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/Genre_%s.html#section_start" % self.genres[0]['tag'] - navPointTag.insert(nptc, contentTag) - nptc += 1 + navPointTag = self.generate_ncx_section_header("%s-ID" % file_ID, section_header, "content/Genre_%s.html#section_start" % self.genres[0]['tag']) for genre in self.genres: # Add an article for each genre - navPointVolumeTag = ncx_soup.new_tag('navPoint') - if self.generate_for_kindle_mobi: - navPointVolumeTag['class'] = "article" - navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag'] - navPointVolumeTag['playOrder'] = self.play_order - self.play_order += 1 - navLabelTag = ncx_soup.new_tag("navLabel") - textTag = ncx_soup.new_tag("text") - + sec_id = "genre-%s-ID" % genre['tag'] # GwR *** Can this be optimized? normalized_tag = None for friendly_tag in self.genre_tags_dict: if self.genre_tags_dict[friendly_tag] == genre['tag']: normalized_tag = self.genre_tags_dict[friendly_tag] break - textTag.insert(0, self.format_ncx_text(NavigableString(friendly_tag), dest='description')) - navLabelTag.insert(0, textTag) - navPointVolumeTag.insert(0, navLabelTag) - contentTag = ncx_soup.new_tag("content") - contentTag['src'] = "content/Genre_%s.html#Genre_%s" % (normalized_tag, normalized_tag) - navPointVolumeTag.insert(1, contentTag) - - if self.generate_for_kindle_mobi: - # Build the author tag - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "author" - # First - Last author - - if len(genre['titles_spanned']) > 1: - author_range = "%s - %s" % (genre['titles_spanned'][0][0], genre['titles_spanned'][1][0]) - else: - author_range = "%s" % (genre['titles_spanned'][0][0]) - - cmTag.insert(0, NavigableString(author_range)) - navPointVolumeTag.insert(2, cmTag) - - # Build the description tag - cmTag = ncx_soup.new_tag('calibre:meta') - cmTag['name'] = "description" - - if False: - # Form 1: Titles spanned - if len(genre['titles_spanned']) > 1: - title_range = "%s -\n%s" % (genre['titles_spanned'][0][1], genre['titles_spanned'][1][1]) - else: - title_range = "%s" % (genre['titles_spanned'][0][1]) - cmTag.insert(0, NavigableString(self.format_ncx_text(title_range, dest='description'))) - else: - # Form 2: title • title • title ... - titles = [] - for title in genre['books']: - titles.append(title['title']) - titles = sorted(titles, key=lambda x: (self.generate_sort_title(x), self.generate_sort_title(x))) - titles_list = self.generate_short_description(" • ".join(titles), dest="description") - cmTag.insert(0, NavigableString(self.format_ncx_text(titles_list, dest='description'))) - - navPointVolumeTag.insert(3, cmTag) - - # Add this volume to the section tag - navPointTag.insert(nptc, navPointVolumeTag) - nptc += 1 - - # Add this section to the body - body.insert(btc, navPointTag) - btc += 1 - self.ncx_soup = ncx_soup + sec_text = self.format_ncx_text(NavigableString(friendly_tag), dest='description') + content_src = "content/Genre_%s.html#Genre_%s" % (normalized_tag, normalized_tag) + if len(genre['titles_spanned']) > 1: + author_range = "%s - %s" % (genre['titles_spanned'][0][0], genre['titles_spanned'][1][0]) + else: + author_range = "%s" % (genre['titles_spanned'][0][0]) + titles = [] + for title in genre['books']: + titles.append(title['title']) + titles = sorted(titles, key=lambda x: (self.generate_sort_title(x), self.generate_sort_title(x))) + titles_list = self.generate_short_description(" • ".join(titles), dest="description") + cm_tags = {'author': author_range, 'description': self.format_ncx_text(titles_list, dest='description')} + self.generate_ncx_subsection(navPointTag, sec_id, sec_text, content_src, cm_tags) def generate_opf(self): """ Generate the OPF file. @@ -4783,9 +4392,7 @@ class CatalogBuilder(object): """ self.update_progress_full_step(_("Saving NCX")) - ncx = self.ncx_soup.prettify(encoding='utf-8') - if isinstance(ncx, unicode_type): - ncx = ncx.encode('utf-8') - + pretty_xml_tree(self.ncx_root) + ncx = etree.tostring(self.ncx_root, encoding='utf-8') with lopen("%s/%s.ncx" % (self.catalog_path, self.opts.basename), 'wb') as outfile: - outfile.write(ncx.strip()) + outfile.write(ncx)