From e80fcc13fcffef68f7eccb7d0f135f08dce91f12 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2009 12:22:39 -0700 Subject: [PATCH 1/3] More miscellaneous fixes --- src/calibre/ebooks/oeb/base.py | 5 ++++- src/calibre/ebooks/oeb/transforms/structure.py | 1 + src/calibre/gui2/dialogs/metadata_single.py | 7 +++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 33bb44840b..9d8598c766 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -941,7 +941,10 @@ class Manifest(object): href = urlunparse(purl) path, frag = urldefrag(href) if not path: - return '#'.join((self.href, frag)) + if frag: + return '#'.join((self.href, frag)) + else: + return self.href if '/' not in self.href: return href dirname = os.path.dirname(self.href) diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 197a265139..605cdaa7cf 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -102,6 +102,7 @@ class DetectStructure(object): play_order=self.oeb.toc.next_play_order()) + def elem_to_link(self, item, elem, counter): text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')]) text = text[:100].strip() diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index e3e2080cc0..4d5471caf0 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -159,9 +159,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): row = self.formats.currentRow() fmt = self.formats.item(row) if fmt is None: - error_dialog(self, _('No format selected'), + if self.formats.count() == 1: + fmt = self.formats.item(0) + if fmt is None: + error_dialog(self, _('No format selected'), _('No format selected')).exec_() - return + return ext = fmt.ext.lower() if fmt.path is None: stream = self.db.format(self.row, ext, as_file=True) From 0d07ad2610b8b58d237075392353fb35e45d2ae7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2009 14:12:23 -0700 Subject: [PATCH 2/3] Strip 0 bytes from HTML before parsing --- src/calibre/ebooks/conversion/preprocess.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index fb55ee74fb..42e6654127 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -26,9 +26,9 @@ def sanitize_head(match): def chap_head(match): chap = match.group('chap') title = match.group('title') - if not title: + if not title: return '

'+chap+'


' - else: + else: return '

'+chap+'
'+title+'


' @@ -49,19 +49,19 @@ def line_length(raw, percent): total = sum(lengths) avg = total / len(lengths) max_line = avg * 2 - + lengths = sorted(lengths) for i in range(len(lengths) - 1, -1, -1): if lengths[i] > max_line: del lengths[i] - + if percent > 1: percent = 1 if percent < 0: percent = 0 index = int(len(lengths) * percent) - 1 - + return lengths[index] @@ -110,17 +110,17 @@ class HTMLPreProcessor(object): # Remove non breaking spaces (re.compile(ur'\u00a0'), lambda match : ' '), - + # Detect Chapters to match default XPATH in GUI (re.compile(r'(]*>)?(]*>)?s*(?P(Chapter|Epilogue|Prologue|Book|Part)\s*(\d+|\w+)?)(]*>|]*>)\n?((?=()?\s*\w+(\s+\w+)?()?(]*>|]*>))((?P.*)(<br[^>]*>|</?p[^>]*>)))?', re.IGNORECASE), chap_head), (re.compile(r'(<br[^>]*>)?(</?p[^>]*>)?s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), - + # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), - + # Un wrap lines (re.compile(r'(?<=[^\.^\^?^!^"^”])\s*(</(i|b|u)>)*\s*<p.*?>\s*(<(i|b|u)>)*\s*(?=[a-z0-9I])', re.UNICODE), lambda match: ' '), - + # Clean up spaces (re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), # Add space before and after italics @@ -157,6 +157,7 @@ class HTMLPreProcessor(object): def __call__(self, html, remove_special_chars=None): if remove_special_chars is not None: html = remove_special_chars.sub('', html) + html = html.replace('\0', '') if self.is_baen(html): rules = [] elif self.is_book_designer(html): @@ -166,7 +167,7 @@ class HTMLPreProcessor(object): #line_length_rules = [ # (re.compile('%i' % line_length(html, .85)), lambda match:) #] - + rules = self.PDFTOHTML # + line_length_rules else: rules = [] From d253544a1f311aa692e78e5ff333af6d870fece3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 25 Apr 2009 14:38:23 -0700 Subject: [PATCH 3/3] Implement a --page-breaks-before option --- src/calibre/ebooks/conversion/cli.py | 2 +- src/calibre/ebooks/conversion/plumber.py | 8 ++++++++ src/calibre/ebooks/oeb/transforms/structure.py | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index ae0af532ab..e12686a36c 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -128,7 +128,7 @@ def add_pipeline_options(parser, plumber): [ 'dont_split_on_page_breaks', 'chapter', 'chapter_mark', 'prefer_metadata_cover', 'remove_first_image', - 'insert_comments', + 'insert_comments', 'page_breaks_before', ] ), diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index f55d677d08..da9c9f11e2 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -227,6 +227,14 @@ OptionRecommendation(name='extra_css', 'rules.') ), +OptionRecommendation(name='page_breaks_before', + recommended_value="//*[name()='h1' or name()='h2']", + level=OptionRecommendation.LOW, + help=_('An XPath expression. Page breaks are inserted ' + 'before the specified elements.') + ), + + OptionRecommendation(name='margin_top', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the top margin in pts. Default is %default')), diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 605cdaa7cf..8ec3c7737a 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -45,6 +45,14 @@ class DetectStructure(object): if not node.title or regexp.search(node.title) is not None: self.oeb.toc.remove(node) + if opts.page_breaks_before is not None: + pb_xpath = XPath(opts.page_breaks_before) + for item in oeb.spine: + for elem in pb_xpath(item.data): + style = elem.get('style', '') + if style: + style += '; ' + elem.set('style', style+'page-break-before:always') def detect_chapters(self): self.detected_chapters = []