diff --git a/src/calibre/ebooks/oeb/polish/check/images.py b/src/calibre/ebooks/oeb/polish/check/images.py index 97b583acda..4d476f0b09 100644 --- a/src/calibre/ebooks/oeb/polish/check/images.py +++ b/src/calibre/ebooks/oeb/polish/check/images.py @@ -9,6 +9,7 @@ __copyright__ = '2013, Kovid Goyal ' from calibre import as_unicode from calibre.utils.magick import Image from calibre.ebooks.oeb.polish.check.base import BaseError, WARN +from calibre.ebooks.oeb.polish.check.parsing import EmptyFile class InvalidImage(BaseError): @@ -47,6 +48,8 @@ class CMYKImage(BaseError): return True def check_raster_images(name, mt, raw): + if not raw: + return [EmptyFile(name)] errors = [] i = Image() try: diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index 466e96afd3..41fd3cd1e4 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -13,7 +13,8 @@ from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.cover import is_raster_image from calibre.ebooks.oeb.polish.check.base import run_checkers from calibre.ebooks.oeb.polish.check.parsing import ( - check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids) + check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, + check_html_size, check_ids, EmptyFile) from calibre.ebooks.oeb.polish.check.images import check_raster_images from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations from calibre.ebooks.oeb.polish.check.fonts import check_fonts @@ -46,8 +47,14 @@ def run_checks(container): # cssutils is not thread safe for name, mt, raw in stylesheets: + if not raw: + errors.append(EmptyFile(name)) + continue errors.extend(check_css_parsing(name, raw)) + for name, mt, raw in html_items: + if not raw: + continue root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: diff --git a/src/calibre/ebooks/oeb/polish/check/parsing.py b/src/calibre/ebooks/oeb/polish/check/parsing.py index 72e5894ebb..ee868e97c7 100644 --- a/src/calibre/ebooks/oeb/polish/check/parsing.py +++ b/src/calibre/ebooks/oeb/polish/check/parsing.py @@ -25,6 +25,18 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES)))) mismatch_pat = re.compile('tag mismatch:.+?line (\d+).+?line \d+') +class EmptyFile(BaseError): + + HELP = _('This file is empty, it contains nothing, you should probably remove it.') + INDIVIDUAL_FIX = _('Remove this file') + + def __init__(self, name): + BaseError.__init__(self, _('The file %s is empty') % name, name) + + def __call__(self, container): + container.remove_item(self.name) + return True + class DecodeError(BaseError): is_parsing_error = True @@ -197,6 +209,8 @@ def check_html_size(name, mt, raw): entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});') def check_xml_parsing(name, mt, raw): + if not raw: + return [EmptyFile(name)] raw = raw.replace(b'\r\n', b'\n').replace(b'\r', b'\n') # Get rid of entities as named entities trip up the XML parser eproc = EntitityProcessor(mt)