From 78a97a0e37fe66ca870ee9097d91507b67a9c721 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 27 Oct 2019 12:52:12 +0530 Subject: [PATCH] Handle comments while getting char count --- src/calibre/srv/render_book.py | 12 +++++++++--- src/calibre/srv/tests/content.py | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 6f7762a894..1ea6166ad6 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -202,8 +202,11 @@ def get_length(root): strip_space = re.compile(r'\s+') def count(elem): + tag = getattr(elem, 'tag', count) + if callable(tag): + return len(strip_space.sub('', getattr(elem, 'tail', None) or '')) num = 0 - tname = elem.tag.rpartition('}')[-1].lower() + tname = tag.rpartition('}')[-1].lower() if elem.text and tname not in ignore_tags: num += len(strip_space.sub('', elem.text)) if elem.tail: @@ -213,11 +216,14 @@ def get_length(root): return num else: def count(elem): - return fast(elem.tag, elem.text, elem.tail) + tag = getattr(elem, 'tag', count) + if callable(tag): + return fast('', None, getattr(elem, 'tail', None)) + return fast(tag, elem.text, elem.tail) for body in root.iterchildren(XHTML('body')): ans += count(body) - for elem in body.iterdescendants('*'): + for elem in body.iterdescendants(): ans += count(elem) return ans diff --git a/src/calibre/srv/tests/content.py b/src/calibre/srv/tests/content.py index 9cc3122519..ccc51a4045 100644 --- a/src/calibre/srv/tests/content.py +++ b/src/calibre/srv/tests/content.py @@ -234,4 +234,6 @@ class ContentTest(LibraryBaseTest): self.ae(get_length(root), 5) root = html5_parse('ab') self.ae(get_length(root), 1002) + root = html5_parse('

m') + self.ae(get_length(root), 1) # }}}