From 548417ea6b6157faf1688b3b082f3eac5476636f Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Mon, 13 Sep 2010 09:18:45 +1000
Subject: [PATCH] comments and minor tweak

---
 src/calibre/ebooks/conversion/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index abfa43e7ed..ecf030b27d 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -111,7 +111,7 @@ class PreProcessor(object):
              html = add_markup.sub('</p>\n<p>', html)
         
         # detect chapters/sections to match xpath or splitting logic
-        heading = re.compile('<h(1|2)[^>]*>', re.IGNORECASE)
+        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
         self.html_preprocess_sections = len(heading.findall(html))
         self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
         # 
@@ -134,7 +134,7 @@ class PreProcessor(object):
         self.log("Unwrapping Lines")
         # Some OCR sourced files have line breaks in the html using a combination of span & p tags
         # span are used for hard line breaks, p for new paragraphs.  Determine which is used so 
-        # that lines can be wrapped across page boundaries
+        # that lines can be un-wrapped across page boundaries
         paras_reg = re.compile('<p[^>]*>', re.IGNORECASE)
         spans_reg = re.compile('<span[^>]*>', re.IGNORECASE)
         paras = len(paras_reg.findall(html))