mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Calculate text lengths and also detect presence of maths markup
This commit is contained in:
parent
321a80cd73
commit
16e8918d21
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
import sys, os, json
|
import sys, os, json, re
|
||||||
from base64 import standard_b64encode, standard_b64decode
|
from base64 import standard_b64encode, standard_b64decode
|
||||||
from collections import defaultdict, OrderedDict
|
from collections import defaultdict, OrderedDict
|
||||||
from itertools import count
|
from itertools import count
|
||||||
@ -88,6 +88,34 @@ def transform_sheet(sheet):
|
|||||||
changed = True
|
changed = True
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
|
def check_for_maths(root):
|
||||||
|
for x in root.iterdescendants('{*}math'):
|
||||||
|
return True
|
||||||
|
for s in root.iterdescendants(XHTML('script')):
|
||||||
|
if s.get('type') == 'text/x-mathjax-config':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_length(root):
|
||||||
|
strip_space = re.compile(r'\s+')
|
||||||
|
ans = 0
|
||||||
|
def count(elem):
|
||||||
|
num = 0
|
||||||
|
tname = elem.tag.rpartition('}')[-1].lower()
|
||||||
|
if elem.text and tname not in 'script style':
|
||||||
|
num += len(strip_space.sub(elem.text, ''))
|
||||||
|
if elem.tail:
|
||||||
|
num += len(strip_space.sub(elem.tail, ''))
|
||||||
|
if tname in 'img svg':
|
||||||
|
num += 2000
|
||||||
|
return num
|
||||||
|
|
||||||
|
for body in root.iterdescendants(XHTML('body')):
|
||||||
|
ans += count(body)
|
||||||
|
for elem in body.iterdescendants('*'):
|
||||||
|
ans += count(elem)
|
||||||
|
return ans
|
||||||
|
|
||||||
class Container(ContainerBase):
|
class Container(ContainerBase):
|
||||||
|
|
||||||
tweak_mode = True
|
tweak_mode = True
|
||||||
@ -112,6 +140,9 @@ class Container(ContainerBase):
|
|||||||
'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
|
'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
|
||||||
'raster_cover_name': raster_cover_name,
|
'raster_cover_name': raster_cover_name,
|
||||||
'title_page_name': titlepage_name,
|
'title_page_name': titlepage_name,
|
||||||
|
'has_maths': False,
|
||||||
|
'total_length': 0,
|
||||||
|
'spine_length': 0,
|
||||||
}
|
}
|
||||||
# Mark the spine as dirty since we have to ensure it is normalized
|
# Mark the spine as dirty since we have to ensure it is normalized
|
||||||
for name in data['spine']:
|
for name in data['spine']:
|
||||||
@ -121,12 +152,22 @@ class Container(ContainerBase):
|
|||||||
self.virtualize_resources()
|
self.virtualize_resources()
|
||||||
def manifest_data(name):
|
def manifest_data(name):
|
||||||
mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
|
mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
|
||||||
return {
|
ans = {
|
||||||
'size':os.path.getsize(self.name_path_map[name]),
|
'size':os.path.getsize(self.name_path_map[name]),
|
||||||
'is_virtualized': name in self.virtualized_names,
|
'is_virtualized': name in self.virtualized_names,
|
||||||
'mimetype':mt,
|
'mimetype':mt,
|
||||||
'is_html': mt in OEB_DOCS
|
'is_html': mt in OEB_DOCS,
|
||||||
}
|
}
|
||||||
|
if ans['is_html']:
|
||||||
|
root = self.parsed(name)
|
||||||
|
ans['length'] = l = get_length(root)
|
||||||
|
self.book_render_data['total_length'] += l
|
||||||
|
if name in data['spine']:
|
||||||
|
self.book_render_data['spine_length'] += l
|
||||||
|
ans['has_maths'] = hm = check_for_maths(root)
|
||||||
|
if hm:
|
||||||
|
self.book_render_data['has_maths'] = True
|
||||||
|
return ans
|
||||||
data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
|
data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
|
||||||
self.commit()
|
self.commit()
|
||||||
for name in excluded_names:
|
for name in excluded_names:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user