mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Edit book: Add a check for too large HTML files when running the check book tool. Fixes #1264680 [[Feature Request]: Edit Book 300/100 kb warning on HTML](https://bugs.launchpad.net/calibre/+bug/1264680)
This commit is contained in:
parent
3bfd3bc07f
commit
c497dc1097
@ -12,7 +12,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
|||||||
from calibre.ebooks.oeb.polish.container import guess_type
|
from calibre.ebooks.oeb.polish.container import guess_type
|
||||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag
|
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size
|
||||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||||
from calibre.ebooks.oeb.polish.check.links import check_links
|
from calibre.ebooks.oeb.polish.check.links import check_links
|
||||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||||
@ -37,6 +37,7 @@ def run_checks(container):
|
|||||||
items = raster_images
|
items = raster_images
|
||||||
if items is not None:
|
if items is not None:
|
||||||
items.append((name, mt, container.open(name, 'rb').read()))
|
items.append((name, mt, container.open(name, 'rb').read()))
|
||||||
|
errors.extend(run_checkers(check_html_size, html_items))
|
||||||
errors.extend(run_checkers(check_xml_parsing, xml_items))
|
errors.extend(run_checkers(check_xml_parsing, xml_items))
|
||||||
errors.extend(run_checkers(check_xml_parsing, html_items))
|
errors.extend(run_checkers(check_xml_parsing, html_items))
|
||||||
errors.extend(run_checkers(check_raster_images, raster_images))
|
errors.extend(run_checkers(check_raster_images, raster_images))
|
||||||
|
@ -11,11 +11,11 @@ import re
|
|||||||
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
from lxml.etree import XMLParser, fromstring, XMLSyntaxError
|
||||||
import cssutils
|
import cssutils
|
||||||
|
|
||||||
from calibre import force_unicode
|
from calibre import force_unicode, human_readable
|
||||||
from calibre.ebooks.html_entities import html5_entities
|
from calibre.ebooks.html_entities import html5_entities
|
||||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
||||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
||||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR
|
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
|
|
||||||
HTML_ENTITTIES = frozenset(html5_entities)
|
HTML_ENTITTIES = frozenset(html5_entities)
|
||||||
@ -61,6 +61,16 @@ class NamedEntities(BaseError):
|
|||||||
f.write(nraw.encode('utf-8'))
|
f.write(nraw.encode('utf-8'))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
class TooLarge(BaseError):
|
||||||
|
|
||||||
|
level = INFO
|
||||||
|
MAX_SIZE = 260 *1024
|
||||||
|
HELP = _('This HTML file is larger than %s. Too large HTML files can cause performance problems'
|
||||||
|
' on some ebook readers. Consider splitting this file into smaller sections.') % human_readable(MAX_SIZE)
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
BaseError.__init__(self, _('File too large'), name)
|
||||||
|
|
||||||
class BadEntity(BaseError):
|
class BadEntity(BaseError):
|
||||||
|
|
||||||
HELP = _('This is an invalid (unrecognized) entity. Replace it with whatever'
|
HELP = _('This is an invalid (unrecognized) entity. Replace it with whatever'
|
||||||
@ -103,6 +113,12 @@ class EntitityProcessor(object):
|
|||||||
self.bad_entities.append((m.start(), m.group()))
|
self.bad_entities.append((m.start(), m.group()))
|
||||||
return b' ' * len(m.group())
|
return b' ' * len(m.group())
|
||||||
|
|
||||||
|
def check_html_size(name, mt, raw):
|
||||||
|
errors = []
|
||||||
|
if len(raw) > TooLarge.MAX_SIZE:
|
||||||
|
errors.append(TooLarge(name))
|
||||||
|
return errors
|
||||||
|
|
||||||
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
||||||
|
|
||||||
def check_xml_parsing(name, mt, raw):
|
def check_xml_parsing(name, mt, raw):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user