small tweak

2025-07-09 03:04:10 -04:00 · 2010-10-04 16:16:33 +08:00 · 2010-10-04 16:16:33 +08:00 · 4a044b8e9d
commit 4a044b8e9d
parent cc29d2efe8
2 changed files with 7 additions and 5 deletions
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -146,7 +146,7 @@ class PreProcessor(object):
               #print "blanks between paragraphs is marked True"
            else:
                blanks_between_paragraphs = False
-        #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
+        self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
        # detect chapters/sections to match xpath or splitting logic
        #
        # Build the Regular Expressions in pieces
@ -166,13 +166,13 @@ class PreProcessor(object):
        title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
        opt_title_close = ")?"

-        default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
+        default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)"
        typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
        numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
        uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"

        chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
-        #print chapter_marker
+        print chapter_marker
        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
        self.html_preprocess_sections = len(heading.findall(html))
        self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
@ -184,12 +184,14 @@ class PreProcessor(object):
        if self.html_preprocess_sections < 10:
            self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
            chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
+            print chapter_marker
            chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
            html = chapdetect2.sub(self.chapter_head, html)

        if self.html_preprocess_sections < 10:
            self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
            chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
+            print chapter_marker
            chapdetect2 = re.compile(r'%s' % chapter_marker,  re.UNICODE)
            html = chapdetect2.sub(self.chapter_head, html)
        ###### Unwrap lines ######
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -184,12 +184,12 @@ class MobiMLizer(object):
            elif tag in NESTABLE_TAGS and istate.rendered:
                para = wrapper = bstate.nested[-1]
            elif left > 0 and indent >= 0:
-                para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
+                para = wrapper = etree.SubElement(parent, XHTML('div'))
                para = wrapper
                emleft = int(round(left / self.profile.fbase)) - 1
                emleft = min((emleft, 10))
                while emleft > 0:
-                    para = etree.SubElement(para, XHTML('blockquote'))
+                    para = etree.SubElement(para, XHTML('div'))
                    emleft -= 1
            else:
                para = wrapper = etree.SubElement(parent, XHTML('p'))