mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
tied mobi into preprocess
This commit is contained in:
parent
0edf1e550e
commit
60c50f3944
@ -236,7 +236,7 @@ class PreProcessor(object):
|
|||||||
print unicode(self.chapters_with_title)+" chapters with titles"
|
print unicode(self.chapters_with_title)+" chapters with titles"
|
||||||
else:
|
else:
|
||||||
html = chapdetect.sub(self.chapter_head, html)
|
html = chapdetect.sub(self.chapter_head, html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
recurse_patterns(html, True)
|
recurse_patterns(html, True)
|
||||||
html = recurse_patterns(html, False)
|
html = recurse_patterns(html, False)
|
||||||
@ -322,7 +322,8 @@ class PreProcessor(object):
|
|||||||
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
||||||
# Delete microsoft 'smart' tags
|
# Delete microsoft 'smart' tags
|
||||||
html = re.sub('(?i)</?st1:\w+>', '', html)
|
html = re.sub('(?i)</?st1:\w+>', '', html)
|
||||||
# Get rid of empty span, bold, & italics tags
|
# Get rid of empty span, bold, font, & italics tags
|
||||||
|
html = re.sub(r'\s*<font[^>]*>\s*</font>\s*', '', html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
|
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
|
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.ebooks.conversion.utils import PreProcessor
|
||||||
|
|
||||||
class MOBIInput(InputFormatPlugin):
|
class MOBIInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -40,10 +41,6 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
return mr.created_opf_path
|
return mr.created_opf_path
|
||||||
|
|
||||||
def heuristics(self, options, html):
|
def heuristics(self, options, html):
|
||||||
# search for places where a first or second level heading is immediately followed by another
|
self.options = options
|
||||||
# top level heading. demote the second heading to h3 to prevent splitting between chapter
|
preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
|
||||||
# headings and titles, images, etc
|
return preprocessor(html)
|
||||||
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
|
||||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
|
||||||
return html
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user