From 93bd1df11adc6fb33ed518fe898696f99e7ed3d1 Mon Sep 17 00:00:00 2001
From: ldolse
\s*(?P
\s*){1,3}\s*(?P
)?', re.IGNORECASE), chap_head),
- # Cover the case where every letter in a chapter title is separated by a space
- #(re.compile(r'
\s*(?P
\s*){1,3}\s*(?P
))?'), chap_head),
-
# Convert line breaks to paragraphs
(re.compile(r'
]*>\s*'), lambda match : '
'), (re.compile(r'
]*>\s*'), lambda match : '\n'), diff --git a/src/calibre/utils/wordcount.py b/src/calibre/utils/wordcount.py index 2bc91f4014..b317f99469 100644 --- a/src/calibre/utils/wordcount.py +++ b/src/calibre/utils/wordcount.py @@ -18,7 +18,9 @@ properties counted: * non_asian_words * words -Python License +Sourced from: +http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/ +http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/ """ __version__ = 0.1 __author__ = "Ryan Ginstrom"