mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
small tweak
This commit is contained in:
parent
cc29d2efe8
commit
4a044b8e9d
@ -146,7 +146,7 @@ class PreProcessor(object):
|
|||||||
#print "blanks between paragraphs is marked True"
|
#print "blanks between paragraphs is marked True"
|
||||||
else:
|
else:
|
||||||
blanks_between_paragraphs = False
|
blanks_between_paragraphs = False
|
||||||
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
||||||
# detect chapters/sections to match xpath or splitting logic
|
# detect chapters/sections to match xpath or splitting logic
|
||||||
#
|
#
|
||||||
# Build the Regular Expressions in pieces
|
# Build the Regular Expressions in pieces
|
||||||
@ -166,13 +166,13 @@ class PreProcessor(object):
|
|||||||
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
|
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
|
||||||
opt_title_close = ")?"
|
opt_title_close = ")?"
|
||||||
|
|
||||||
default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
|
default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)"
|
||||||
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
|
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
|
||||||
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
|
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
|
||||||
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
|
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
|
||||||
|
|
||||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
#print chapter_marker
|
print chapter_marker
|
||||||
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
||||||
self.html_preprocess_sections = len(heading.findall(html))
|
self.html_preprocess_sections = len(heading.findall(html))
|
||||||
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
|
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
|
||||||
@ -184,12 +184,14 @@ class PreProcessor(object):
|
|||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
|
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
|
||||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
|
print chapter_marker
|
||||||
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
|
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
|
||||||
html = chapdetect2.sub(self.chapter_head, html)
|
html = chapdetect2.sub(self.chapter_head, html)
|
||||||
|
|
||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
|
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
|
||||||
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
|
print chapter_marker
|
||||||
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
|
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
|
||||||
html = chapdetect2.sub(self.chapter_head, html)
|
html = chapdetect2.sub(self.chapter_head, html)
|
||||||
###### Unwrap lines ######
|
###### Unwrap lines ######
|
||||||
|
@ -184,12 +184,12 @@ class MobiMLizer(object):
|
|||||||
elif tag in NESTABLE_TAGS and istate.rendered:
|
elif tag in NESTABLE_TAGS and istate.rendered:
|
||||||
para = wrapper = bstate.nested[-1]
|
para = wrapper = bstate.nested[-1]
|
||||||
elif left > 0 and indent >= 0:
|
elif left > 0 and indent >= 0:
|
||||||
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
|
para = wrapper = etree.SubElement(parent, XHTML('div'))
|
||||||
para = wrapper
|
para = wrapper
|
||||||
emleft = int(round(left / self.profile.fbase)) - 1
|
emleft = int(round(left / self.profile.fbase)) - 1
|
||||||
emleft = min((emleft, 10))
|
emleft = min((emleft, 10))
|
||||||
while emleft > 0:
|
while emleft > 0:
|
||||||
para = etree.SubElement(para, XHTML('blockquote'))
|
para = etree.SubElement(para, XHTML('div'))
|
||||||
emleft -= 1
|
emleft -= 1
|
||||||
else:
|
else:
|
||||||
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
para = wrapper = etree.SubElement(parent, XHTML('p'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user