diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py
index 1a795810fc..83736fb65a 100644
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@@ -1,34 +1,49 @@
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2010, Greg Riker
from __future__ import print_function
-__license__ = 'GPL v3'
-__copyright__ = '2010, Greg Riker'
-import datetime, os, platform, re, shutil, time, unicodedata, zlib
+import datetime
+import os
+import platform
+import re
+import shutil
+import time
+import unicodedata
+import zlib
from copy import deepcopy
from xml.sax.saxutils import escape
from calibre import (
- prepare_string_for_xml, strftime, force_unicode, isbytestring, replace_entities, as_unicode, xml_replace_entities)
-from calibre.constants import isosx, cache_dir
+ as_unicode, force_unicode, isbytestring, replace_entities, strftime,
+ xml_replace_entities
+)
+from calibre.constants import cache_dir, isosx
from calibre.customize.conversion import DummyReporter
from calibre.customize.ui import output_profiles
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, prettify
+from calibre.ebooks.BeautifulSoup import (
+ BeautifulSoup, BeautifulStoneSoup, NavigableString, prettify
+)
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.metadata import author_to_author_sort
-from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException, \
- InvalidGenresSourceFieldException
+from calibre.library.catalogs import (
+ AuthorSortMismatchException, EmptyCatalogException,
+ InvalidGenresSourceFieldException
+)
+from calibre.library.comments import comments_to_html
from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.utils.date import format_date, is_date_undefined, now as nowf, as_local_time
+from calibre.utils.date import (
+ as_local_time, format_date, is_date_undefined, now as nowf
+)
from calibre.utils.filenames import ascii_text, shorten_components_to
from calibre.utils.formatter import TemplateFormatter
from calibre.utils.icu import capitalize, collation_order, sort_key
from calibre.utils.img import scale_image
-from calibre.utils.zipfile import ZipFile
from calibre.utils.localization import get_lang, lang_as_iso639_1
+from calibre.utils.zipfile import ZipFile
from polyglot.builtins import unicode_type
-
NBSP = u'\u00a0'
@@ -953,7 +968,7 @@ class CatalogBuilder(object):
if ad_offset >= 0:
record['comments'] = record['comments'][:ad_offset]
- this_title['description'] = self.massage_comments(record['comments'])
+ this_title['description'] = comments_to_html(record['comments'])
# Create short description
paras = BeautifulSoup(this_title['description']).findAll('p')
@@ -4016,17 +4031,17 @@ class CatalogBuilder(object):
if lang_as_iso639_1(lang):
lang = lang_as_iso639_1(lang)
- header = '''
-
-
s - 'plain text' returns as -
plain text
- - 'plain text with minimal markup' returns as -plain text with minimal markup
- - 'pre-formatted text
returns untouched - - 'A line of text\n\nFollowed by a line of text' returns as -A line of text
-Followed by a line of text
- - 'A line of text.\nA second line of text.\rA third line of text' returns as -A line of text.
A second line of text.
A third line of text.
...end of a paragraph.
-Somehow the break was lost...
- - Deprecated HTML returns as HTML via BeautifulSoup() - - Args: - comments (str): comments from metadata, possibly HTML - - Return: - result (BeautifulSoup): massaged comments in HTML form - """ - - # Hackish - ignoring sentences ending or beginning in numbers to avoid - # confusion with decimal points. - - # Explode lost CRs to \n\n - for lost_cr in re.finditer('([a-z])([\\.\\?!])([A-Z])', comments): - comments = comments.replace(lost_cr.group(), - '%s%s\n\n%s' % (lost_cr.group(1), - lost_cr.group(2), - lost_cr.group(3))) - # Extract pre-built elements - annotations, etc. - if not isinstance(comments, unicode_type): - comments = comments.decode('utf-8', 'replace') - soup = BeautifulSoup(comments) - elems = soup.findAll('div') - for elem in elems: - elem.extract() - - # Reconstruct comments w/os
- if re.search('\n\n', comments):
- soup = BeautifulSoup()
- split_ps = comments.split(u'\n\n')
- tsc = 0
- for p in split_ps:
- pTag = soup.new_tag('p')
- pTag.insert(0, p)
- soup.insert(tsc, pTag)
- tsc += 1
- comments = soup.decode_contents()
-
- # Convert solo returns to
- comments = re.sub('[\r\n]', '
', comments)
-
- # Convert two hypens to emdash
- comments = re.sub('--', '—', comments)
- soup = BeautifulSoup(comments)
- result = BeautifulSoup()
- rtc = 0
- open_pTag = False
-
- all_tokens = list(soup.contents)
- for token in all_tokens:
- if type(token) is NavigableString:
- if not open_pTag:
- pTag = result.new_tag('p')
- open_pTag = True
- ptc = 0
- pTag.insert(ptc, prepare_string_for_xml(token))
- ptc += 1
-
- elif token.name in ['br', 'b', 'i', 'em']:
- if not open_pTag:
- pTag = result.new_tag('p')
- open_pTag = True
- ptc = 0
- pTag.insert(ptc, token)
- ptc += 1
-
- else:
- if open_pTag:
- result.insert(rtc, pTag)
- rtc += 1
- open_pTag = False
- ptc = 0
- # Clean up NavigableStrings for xml
- sub_tokens = list(token.contents)
- for sub_token in sub_tokens:
- if type(sub_token) is NavigableString:
- sub_token.replaceWith(prepare_string_for_xml(sub_token))
- result.insert(rtc, token)
- rtc += 1
-
- if open_pTag:
- result.insert(rtc, pTag)
- rtc += 1
-
- paras = result.findAll('p')
- for p in paras:
- p['class'] = 'description'
-
- # Add back