From 2014e6520e3b9b18a7e7a733deab6ceba9096072 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 18 Oct 2019 20:29:28 +0530
Subject: [PATCH] ...

---
 src/calibre/ebooks/conversion/preprocess.py | 28 +++++++++++----------
 1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 061d8ae2b1..4cc3521c1d 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -347,19 +347,19 @@ class CSSPreProcessor(object):
 class HTMLPreProcessor(object):
 
     PREPROCESS = [
-                  # Remove huge block of contiguous spaces as they slow down
-                  # the following regexes pretty badly
-                  (re.compile(r'\s{10000,}'), lambda m: ''),
-                  # Some idiotic HTML generators (Frontpage I'm looking at you)
-                  # Put all sorts of crap into <head>. This messes up lxml
-                  (re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
-                   sanitize_head),
-                  # Convert all entities, since lxml doesn't handle them well
-                  (re.compile(r'&(\S+?);'), convert_entities),
-                  # Remove the <![if/endif tags inserted by everybody's darling, MS Word
-                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
-                   lambda match: ''),
-                  ]
+        # Remove huge block of contiguous spaces as they slow down
+        # the following regexes pretty badly
+        (re.compile(r'\s{10000,}'), lambda m: ''),
+        # Some idiotic HTML generators (Frontpage I'm looking at you)
+        # Put all sorts of crap into <head>. This messes up lxml
+        (re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
+        sanitize_head),
+        # Convert all entities, since lxml doesn't handle them well
+        (re.compile(r'&(\S+?);'), convert_entities),
+        # Remove the <![if/endif tags inserted by everybody's darling, MS Word
+        (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
+        lambda match: ''),
+    ]
 
     # Fix pdftohtml markup
     PDFTOHTML  = [
@@ -636,7 +636,9 @@ class HTMLPreProcessor(object):
 
         for rule in rules + end_rules:
             try:
+                print(rule[0].pattern)
                 html = rule[0].sub(rule[1], html)
+                print(222222222222)
             except Exception as e:
                 if rule in user_sr_rules:
                     self.log.error(