small tweak

This commit is contained in:
ldolse 2010-10-04 16:16:33 +08:00
parent cc29d2efe8
commit 4a044b8e9d
2 changed files with 7 additions and 5 deletions

View File

@ -146,7 +146,7 @@ class PreProcessor(object):
#print "blanks between paragraphs is marked True"
else:
blanks_between_paragraphs = False
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
# detect chapters/sections to match xpath or splitting logic
#
# Build the Regular Expressions in pieces
@ -166,13 +166,13 @@ class PreProcessor(object):
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
opt_title_close = ")?"
default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)"
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
#print chapter_marker
print chapter_marker
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html))
self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
@ -184,12 +184,14 @@ class PreProcessor(object):
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying numeric chapters")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
print chapter_marker
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
html = chapdetect2.sub(self.chapter_head, html)
if self.html_preprocess_sections < 10:
self.log("not enough chapters, only " + unicode(self.html_preprocess_sections) + ", trying with uppercase words")
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
print chapter_marker
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
html = chapdetect2.sub(self.chapter_head, html)
###### Unwrap lines ######

View File

@ -184,12 +184,12 @@ class MobiMLizer(object):
elif tag in NESTABLE_TAGS and istate.rendered:
para = wrapper = bstate.nested[-1]
elif left > 0 and indent >= 0:
para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
para = wrapper = etree.SubElement(parent, XHTML('div'))
para = wrapper
emleft = int(round(left / self.profile.fbase)) - 1
emleft = min((emleft, 10))
while emleft > 0:
para = etree.SubElement(para, XHTML('blockquote'))
para = etree.SubElement(para, XHTML('div'))
emleft -= 1
else:
para = wrapper = etree.SubElement(parent, XHTML('p'))