From 4a044b8e9d6b5f0168ef4e65d6a3e9aa47f182b4 Mon Sep 17 00:00:00 2001 From: ldolse Date: Mon, 4 Oct 2010 16:16:33 +0800 Subject: [PATCH] small tweak --- src/calibre/ebooks/conversion/utils.py | 8 +++++--- src/calibre/ebooks/mobi/mobiml.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 2faec27b68..976ed6a8f4 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -146,7 +146,7 @@ class PreProcessor(object): #print "blanks between paragraphs is marked True" else: blanks_between_paragraphs = False - #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") + self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") # detect chapters/sections to match xpath or splitting logic # # Build the Regular Expressions in pieces @@ -166,13 +166,13 @@ class PreProcessor(object): title_line_close = "()?\s*()?\s*(]*>)?\s*" opt_title_close = ")?" - default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)" + default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)" typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}" numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*" uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*" chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close - #print chapter_marker + print chapter_marker heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") @@ -184,12 +184,14 @@ class PreProcessor(object): if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters") chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close + print chapter_marker chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE) html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words") chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close + print chapter_marker chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) ###### Unwrap lines ###### diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 231ad51eee..d4801e637e 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -184,12 +184,12 @@ class MobiMLizer(object): elif tag in NESTABLE_TAGS and istate.rendered: para = wrapper = bstate.nested[-1] elif left > 0 and indent >= 0: - para = wrapper = etree.SubElement(parent, XHTML('blockquote')) + para = wrapper = etree.SubElement(parent, XHTML('div')) para = wrapper emleft = int(round(left / self.profile.fbase)) - 1 emleft = min((emleft, 10)) while emleft > 0: - para = etree.SubElement(para, XHTML('blockquote')) + para = etree.SubElement(para, XHTML('div')) emleft -= 1 else: para = wrapper = etree.SubElement(parent, XHTML('p'))