mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Change line length amount to work with duplicates removed lengths. Enhance the unwrapping regex to account for more cases.
This commit is contained in:
parent
e426c9e60d
commit
3a87f0d065
@ -183,12 +183,12 @@ class HTMLPreProcessor(object):
|
|||||||
elif self.is_book_designer(html):
|
elif self.is_book_designer(html):
|
||||||
rules = self.BOOK_DESIGNER
|
rules = self.BOOK_DESIGNER
|
||||||
elif self.is_pdftohtml(html):
|
elif self.is_pdftohtml(html):
|
||||||
length = line_length(html, .3)
|
length = line_length(html, .5)
|
||||||
line_length_rules = []
|
line_length_rules = []
|
||||||
if length:
|
if length:
|
||||||
line_length_rules = [
|
line_length_rules = [
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
|
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
|
||||||
]
|
]
|
||||||
|
|
||||||
rules = self.PDFTOHTML + line_length_rules
|
rules = self.PDFTOHTML + line_length_rules
|
||||||
|
Loading…
x
Reference in New Issue
Block a user