mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle comments while getting char count
This commit is contained in:
parent
a687204ec3
commit
78a97a0e37
@ -202,8 +202,11 @@ def get_length(root):
|
|||||||
strip_space = re.compile(r'\s+')
|
strip_space = re.compile(r'\s+')
|
||||||
|
|
||||||
def count(elem):
|
def count(elem):
|
||||||
|
tag = getattr(elem, 'tag', count)
|
||||||
|
if callable(tag):
|
||||||
|
return len(strip_space.sub('', getattr(elem, 'tail', None) or ''))
|
||||||
num = 0
|
num = 0
|
||||||
tname = elem.tag.rpartition('}')[-1].lower()
|
tname = tag.rpartition('}')[-1].lower()
|
||||||
if elem.text and tname not in ignore_tags:
|
if elem.text and tname not in ignore_tags:
|
||||||
num += len(strip_space.sub('', elem.text))
|
num += len(strip_space.sub('', elem.text))
|
||||||
if elem.tail:
|
if elem.tail:
|
||||||
@ -213,11 +216,14 @@ def get_length(root):
|
|||||||
return num
|
return num
|
||||||
else:
|
else:
|
||||||
def count(elem):
|
def count(elem):
|
||||||
return fast(elem.tag, elem.text, elem.tail)
|
tag = getattr(elem, 'tag', count)
|
||||||
|
if callable(tag):
|
||||||
|
return fast('', None, getattr(elem, 'tail', None))
|
||||||
|
return fast(tag, elem.text, elem.tail)
|
||||||
|
|
||||||
for body in root.iterchildren(XHTML('body')):
|
for body in root.iterchildren(XHTML('body')):
|
||||||
ans += count(body)
|
ans += count(body)
|
||||||
for elem in body.iterdescendants('*'):
|
for elem in body.iterdescendants():
|
||||||
ans += count(elem)
|
ans += count(elem)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -234,4 +234,6 @@ class ContentTest(LibraryBaseTest):
|
|||||||
self.ae(get_length(root), 5)
|
self.ae(get_length(root), 5)
|
||||||
root = html5_parse('<script>xyz</script>a<iMg>b')
|
root = html5_parse('<script>xyz</script>a<iMg>b')
|
||||||
self.ae(get_length(root), 1002)
|
self.ae(get_length(root), 1002)
|
||||||
|
root = html5_parse('<p><!-- abc -->m')
|
||||||
|
self.ae(get_length(root), 1)
|
||||||
# }}}
|
# }}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user