mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
cleaned up comments
This commit is contained in:
parent
760d4d2fd3
commit
93bd1df11a
@ -363,11 +363,6 @@ class HTMLPreProcessor(object):
|
|||||||
# Remove gray background
|
# Remove gray background
|
||||||
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
||||||
|
|
||||||
# Detect Chapters to match default XPATH in GUI
|
|
||||||
#(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
|
|
||||||
# Cover the case where every letter in a chapter title is separated by a space
|
|
||||||
#(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
|
|
||||||
|
|
||||||
# Convert line breaks to paragraphs
|
# Convert line breaks to paragraphs
|
||||||
(re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'),
|
(re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'),
|
||||||
(re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'),
|
(re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'),
|
||||||
|
@ -18,7 +18,9 @@ properties counted:
|
|||||||
* non_asian_words
|
* non_asian_words
|
||||||
* words
|
* words
|
||||||
|
|
||||||
Python License
|
Sourced from:
|
||||||
|
http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
|
||||||
|
http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
|
||||||
"""
|
"""
|
||||||
__version__ = 0.1
|
__version__ = 0.1
|
||||||
__author__ = "Ryan Ginstrom"
|
__author__ = "Ryan Ginstrom"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user