mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
small tweak
This commit is contained in:
parent
cc29d2efe8
commit
4a044b8e9d
@ -146,7 +146,7 @@ class PreProcessor(object):
|
||||
#print "blanks between paragraphs is marked True"
|
||||
else:
|
||||
blanks_between_paragraphs = False
|
||||
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
||||
self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
||||
# detect chapters/sections to match xpath or splitting logic
|
||||
#
|
||||
# Build the Regular Expressions in pieces
|
||||
@ -166,13 +166,13 @@ class PreProcessor(object):
|
||||
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
|
||||
opt_title_close = ")?"
|
||||
|
||||
default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
|
||||
default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)"
|
||||
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
|
||||
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
|
||||
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
|
||||
|
||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||
#print chapter_marker
|
||||
print chapter_marker
|
||||
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
||||
self.html_preprocess_sections = len(heading.findall(html))
|
||||
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
|
||||
@ -184,12 +184,14 @@ class PreProcessor(object):
|
||||
if self.html_preprocess_sections < 10:
|
||||
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
|
||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||
print chapter_marker
|
||||
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
|
||||
html = chapdetect2.sub(self.chapter_head, html)
|
||||
|
||||
if self.html_preprocess_sections < 10:
|
||||
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
|
||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||
print chapter_marker
|
||||
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
|
||||
html = chapdetect2.sub(self.chapter_head, html)
|
||||
###### Unwrap lines ######
|
||||
|
@ -184,12 +184,12 @@ class MobiMLizer(object):
|
||||
elif tag in NESTABLE_TAGS and istate.rendered:
|
||||
para = wrapper = bstate.nested[-1]
|
||||
elif left > 0 and indent >= 0:
|
||||
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
||||
para = wrapper = etree.SubElement(parent, XHTML('div'))
|
||||
para = wrapper
|
||||
emleft = int(round(left / self.profile.fbase)) - 1
|
||||
emleft = min((emleft, 10))
|
||||
while emleft > 0:
|
||||
para = etree.SubElement(para, XHTML('blockquote'))
|
||||
para = etree.SubElement(para, XHTML('div'))
|
||||
emleft -= 1
|
||||
else:
|
||||
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user