mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Check Book: Add checks for empty HTML/XML/CSS/image files in the book. Fixes #1329971 [Private bug](https://bugs.launchpad.net/calibre/+bug/1329971)
This commit is contained in:
parent
5ee7a620ea
commit
82f9182f95
@ -9,6 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
from calibre import as_unicode
|
||||
from calibre.utils.magick import Image
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
||||
from calibre.ebooks.oeb.polish.check.parsing import EmptyFile
|
||||
|
||||
class InvalidImage(BaseError):
|
||||
|
||||
@ -47,6 +48,8 @@ class CMYKImage(BaseError):
|
||||
return True
|
||||
|
||||
def check_raster_images(name, mt, raw):
|
||||
if not raw:
|
||||
return [EmptyFile(name)]
|
||||
errors = []
|
||||
i = Image()
|
||||
try:
|
||||
|
@ -13,7 +13,8 @@ from calibre.ebooks.oeb.polish.utils import guess_type
|
||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||
from calibre.ebooks.oeb.polish.check.parsing import (
|
||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids)
|
||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag,
|
||||
check_html_size, check_ids, EmptyFile)
|
||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations
|
||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||
@ -46,8 +47,14 @@ def run_checks(container):
|
||||
|
||||
# cssutils is not thread safe
|
||||
for name, mt, raw in stylesheets:
|
||||
if not raw:
|
||||
errors.append(EmptyFile(name))
|
||||
continue
|
||||
errors.extend(check_css_parsing(name, raw))
|
||||
|
||||
for name, mt, raw in html_items:
|
||||
if not raw:
|
||||
continue
|
||||
root = container.parsed(name)
|
||||
for style in root.xpath('//*[local-name()="style"]'):
|
||||
if style.get('type', 'text/css') == 'text/css' and style.text:
|
||||
|
@ -25,6 +25,18 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES
|
||||
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
||||
mismatch_pat = re.compile('tag mismatch:.+?line (\d+).+?line \d+')
|
||||
|
||||
class EmptyFile(BaseError):
|
||||
|
||||
HELP = _('This file is empty, it contains nothing, you should probably remove it.')
|
||||
INDIVIDUAL_FIX = _('Remove this file')
|
||||
|
||||
def __init__(self, name):
|
||||
BaseError.__init__(self, _('The file %s is empty') % name, name)
|
||||
|
||||
def __call__(self, container):
|
||||
container.remove_item(self.name)
|
||||
return True
|
||||
|
||||
class DecodeError(BaseError):
|
||||
|
||||
is_parsing_error = True
|
||||
@ -197,6 +209,8 @@ def check_html_size(name, mt, raw):
|
||||
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
||||
|
||||
def check_xml_parsing(name, mt, raw):
|
||||
if not raw:
|
||||
return [EmptyFile(name)]
|
||||
raw = raw.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
||||
# Get rid of entities as named entities trip up the XML parser
|
||||
eproc = EntitityProcessor(mt)
|
||||
|
Loading…
x
Reference in New Issue
Block a user