From 548417ea6b6157faf1688b3b082f3eac5476636f Mon Sep 17 00:00:00 2001 From: ldolse Date: Mon, 13 Sep 2010 09:18:45 +1000 Subject: [PATCH] comments and minor tweak --- src/calibre/ebooks/conversion/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index abfa43e7ed..ecf030b27d 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -111,7 +111,7 @@ class PreProcessor(object): html = add_markup.sub('

\n

', html) # detect chapters/sections to match xpath or splitting logic - heading = re.compile(']*>', re.IGNORECASE) + heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings") # @@ -134,7 +134,7 @@ class PreProcessor(object): self.log("Unwrapping Lines") # Some OCR sourced files have line breaks in the html using a combination of span & p tags # span are used for hard line breaks, p for new paragraphs. Determine which is used so - # that lines can be wrapped across page boundaries + # that lines can be un-wrapped across page boundaries paras_reg = re.compile(']*>', re.IGNORECASE) spans_reg = re.compile(']*>', re.IGNORECASE) paras = len(paras_reg.findall(html))