diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 7f27d7a465..67be59083e 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -363,11 +363,6 @@ class HTMLPreProcessor(object): # Remove gray background (re.compile(r']+>'), lambda match : ''), - # Detect Chapters to match default XPATH in GUI - #(re.compile(r'
\s*(?P(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(){0,2})\s*(
\s*){1,3}\s*(?P(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head), - # Cover the case where every letter in a chapter title is separated by a space - #(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head), - # Convert line breaks to paragraphs (re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'), (re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'), diff --git a/src/calibre/utils/wordcount.py b/src/calibre/utils/wordcount.py index 2bc91f4014..b317f99469 100644 --- a/src/calibre/utils/wordcount.py +++ b/src/calibre/utils/wordcount.py @@ -18,7 +18,9 @@ properties counted: * non_asian_words * words -Python License +Sourced from: +http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/ +http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/ """ __version__ = 0.1 __author__ = "Ryan Ginstrom"