mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
tweaked preprocess for $, added rtf to new preprocess logic, changed last pdf default
This commit is contained in:
parent
548417ea6b
commit
b73e1b3da5
@ -340,7 +340,7 @@ class HTMLPreProcessor(object):
|
|||||||
# print "The pdf line length returned is " + str(length)
|
# print "The pdf line length returned is " + str(length)
|
||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
|
(re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + start_rules:
|
for rule in self.PREPROCESS + start_rules:
|
||||||
|
@ -8,6 +8,7 @@ from lxml import etree
|
|||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.conversion.preprocess import line_length
|
from calibre.ebooks.conversion.preprocess import line_length
|
||||||
|
from calibre.ebooks.conversion.utils import PreProcessor
|
||||||
|
|
||||||
class InlineClass(etree.XSLTExtension):
|
class InlineClass(etree.XSLTExtension):
|
||||||
|
|
||||||
@ -229,16 +230,8 @@ class RTFInput(InputFormatPlugin):
|
|||||||
res = transform.tostring(result)
|
res = transform.tostring(result)
|
||||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
if self.options.preprocess_html:
|
if self.options.preprocess_html:
|
||||||
self.log("********* Preprocessing HTML *********")
|
preprocessor = PreProcessor(res)
|
||||||
# Detect Chapters to match the xpath in the GUI
|
res = preprocessor(res)
|
||||||
chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE)
|
|
||||||
res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res)
|
|
||||||
# Unwrap lines using punctation if the median length of all lines is less than 150
|
|
||||||
length = line_length('html', res, 0.4)
|
|
||||||
self.log("*** Median length is " + str(length) + " ***")
|
|
||||||
unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE)
|
|
||||||
if length < 150:
|
|
||||||
res = unwrap.sub(' ', res)
|
|
||||||
f.write(res)
|
f.write(res)
|
||||||
self.write_inline_css(inline_class)
|
self.write_inline_css(inline_class)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
<double>0.010000000000000</double>
|
<double>0.010000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
<property name="value">
|
<property name="value">
|
||||||
<double>0.500000000000000</double>
|
<double>0.450000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user