mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle comments while getting char count
This commit is contained in:
parent
a687204ec3
commit
78a97a0e37
@ -202,8 +202,11 @@ def get_length(root):
|
||||
strip_space = re.compile(r'\s+')
|
||||
|
||||
def count(elem):
|
||||
tag = getattr(elem, 'tag', count)
|
||||
if callable(tag):
|
||||
return len(strip_space.sub('', getattr(elem, 'tail', None) or ''))
|
||||
num = 0
|
||||
tname = elem.tag.rpartition('}')[-1].lower()
|
||||
tname = tag.rpartition('}')[-1].lower()
|
||||
if elem.text and tname not in ignore_tags:
|
||||
num += len(strip_space.sub('', elem.text))
|
||||
if elem.tail:
|
||||
@ -213,11 +216,14 @@ def get_length(root):
|
||||
return num
|
||||
else:
|
||||
def count(elem):
|
||||
return fast(elem.tag, elem.text, elem.tail)
|
||||
tag = getattr(elem, 'tag', count)
|
||||
if callable(tag):
|
||||
return fast('', None, getattr(elem, 'tail', None))
|
||||
return fast(tag, elem.text, elem.tail)
|
||||
|
||||
for body in root.iterchildren(XHTML('body')):
|
||||
ans += count(body)
|
||||
for elem in body.iterdescendants('*'):
|
||||
for elem in body.iterdescendants():
|
||||
ans += count(elem)
|
||||
return ans
|
||||
|
||||
|
@ -234,4 +234,6 @@ class ContentTest(LibraryBaseTest):
|
||||
self.ae(get_length(root), 5)
|
||||
root = html5_parse('<script>xyz</script>a<iMg>b')
|
||||
self.ae(get_length(root), 1002)
|
||||
root = html5_parse('<p><!-- abc -->m')
|
||||
self.ae(get_length(root), 1)
|
||||
# }}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user