mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Edit book: Add a check for too large HTML files when running the check book tool. Fixes #1264680 [[Feature Request]: Edit Book 300/100 kb warning on HTML](https://bugs.launchpad.net/calibre/+bug/1264680)
This commit is contained in:
parent
3bfd3bc07f
commit
c497dc1097
@ -12,7 +12,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||
from calibre.ebooks.oeb.polish.container import guess_type
|
||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag
|
||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size
|
||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||
from calibre.ebooks.oeb.polish.check.links import check_links
|
||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||
@ -37,6 +37,7 @@ def run_checks(container):
|
||||
items = raster_images
|
||||
if items is not None:
|
||||
items.append((name, mt, container.open(name, 'rb').read()))
|
||||
errors.extend(run_checkers(check_html_size, html_items))
|
||||
errors.extend(run_checkers(check_xml_parsing, xml_items))
|
||||
errors.extend(run_checkers(check_xml_parsing, html_items))
|
||||
errors.extend(run_checkers(check_raster_images, raster_images))
|
||||
|
@ -11,11 +11,11 @@ import re
|
||||
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
||||
import cssutils
|
||||
|
||||
from calibre import force_unicode
|
||||
from calibre import force_unicode, human_readable
|
||||
from calibre.ebooks.html_entities import html5_entities
|
||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||
|
||||
HTML_ENTITTIES = frozenset(html5_entities)
|
||||
@ -61,6 +61,16 @@ class NamedEntities(BaseError):
|
||||
f.write(nraw.encode('utf-8'))
|
||||
return True
|
||||
|
||||
class TooLarge(BaseError):
|
||||
|
||||
level = INFO
|
||||
MAX_SIZE = 260 *1024
|
||||
HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems'
|
||||
' on some ebook readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE)
|
||||
|
||||
def __init__(self, name):
|
||||
BaseError.__init__(self, _('File too large'), name)
|
||||
|
||||
class BadEntity(BaseError):
|
||||
|
||||
HELP = _('This is an invalid (unrecognized) entity. Replace it with whatever'
|
||||
@ -103,6 +113,12 @@ class EntitityProcessor(object):
|
||||
self.bad_entities.append((m.start(), m.group()))
|
||||
return b' ' * len(m.group())
|
||||
|
||||
def check_html_size(name, mt, raw):
|
||||
errors = []
|
||||
if len(raw) > TooLarge.MAX_SIZE:
|
||||
errors.append(TooLarge(name))
|
||||
return errors
|
||||
|
||||
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
||||
|
||||
def check_xml_parsing(name, mt, raw):
|
||||
|
Loading…
x
Reference in New Issue
Block a user