From 3a87f0d065c71e176f86abebe1a26989c362d6c6 Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 22 Jun 2009 21:12:17 -0400 Subject: [PATCH] Change line length amount to work with duplicates removed lengths. Enhance the unwrapping regex to account for more cases. --- src/calibre/ebooks/conversion/preprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 1bbc59a6e4..a508a81b95 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -183,12 +183,12 @@ class HTMLPreProcessor(object): elif self.is_book_designer(html): rules = self.BOOK_DESIGNER elif self.is_pdftohtml(html): - length = line_length(html, .3) + length = line_length(html, .5) line_length_rules = [] if length: line_length_rules = [ # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P)?\s*()\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), ] rules = self.PDFTOHTML + line_length_rules