mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Check Book: Add checks for empty HTML/XML/CSS/image files in the book. Fixes #1329971 [Private bug](https://bugs.launchpad.net/calibre/+bug/1329971)
This commit is contained in:
parent
5ee7a620ea
commit
82f9182f95
@ -9,6 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
from calibre import as_unicode
|
from calibre import as_unicode
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
||||||
|
from calibre.ebooks.oeb.polish.check.parsing import EmptyFile
|
||||||
|
|
||||||
class InvalidImage(BaseError):
|
class InvalidImage(BaseError):
|
||||||
|
|
||||||
@ -47,6 +48,8 @@ class CMYKImage(BaseError):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def check_raster_images(name, mt, raw):
|
def check_raster_images(name, mt, raw):
|
||||||
|
if not raw:
|
||||||
|
return [EmptyFile(name)]
|
||||||
errors = []
|
errors = []
|
||||||
i = Image()
|
i = Image()
|
||||||
try:
|
try:
|
||||||
|
@ -13,7 +13,8 @@ from calibre.ebooks.oeb.polish.utils import guess_type
|
|||||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||||
from calibre.ebooks.oeb.polish.check.parsing import (
|
from calibre.ebooks.oeb.polish.check.parsing import (
|
||||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids)
|
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag,
|
||||||
|
check_html_size, check_ids, EmptyFile)
|
||||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||||
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations
|
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations
|
||||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||||
@ -46,8 +47,14 @@ def run_checks(container):
|
|||||||
|
|
||||||
# cssutils is not thread safe
|
# cssutils is not thread safe
|
||||||
for name, mt, raw in stylesheets:
|
for name, mt, raw in stylesheets:
|
||||||
|
if not raw:
|
||||||
|
errors.append(EmptyFile(name))
|
||||||
|
continue
|
||||||
errors.extend(check_css_parsing(name, raw))
|
errors.extend(check_css_parsing(name, raw))
|
||||||
|
|
||||||
for name, mt, raw in html_items:
|
for name, mt, raw in html_items:
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
for style in root.xpath('//*[local-name()="style"]'):
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
if style.get('type', 'text/css') == 'text/css' and style.text:
|
if style.get('type', 'text/css') == 'text/css' and style.text:
|
||||||
|
@ -25,6 +25,18 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES
|
|||||||
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
|
||||||
mismatch_pat = re.compile('tag mismatch:.+?line (\d+).+?line \d+')
|
mismatch_pat = re.compile('tag mismatch:.+?line (\d+).+?line \d+')
|
||||||
|
|
||||||
|
class EmptyFile(BaseError):
|
||||||
|
|
||||||
|
HELP = _('This file is empty, it contains nothing, you should probably remove it.')
|
||||||
|
INDIVIDUAL_FIX = _('Remove this file')
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
BaseError.__init__(self, _('The file %s is empty') % name, name)
|
||||||
|
|
||||||
|
def __call__(self, container):
|
||||||
|
container.remove_item(self.name)
|
||||||
|
return True
|
||||||
|
|
||||||
class DecodeError(BaseError):
|
class DecodeError(BaseError):
|
||||||
|
|
||||||
is_parsing_error = True
|
is_parsing_error = True
|
||||||
@ -197,6 +209,8 @@ def check_html_size(name, mt, raw):
|
|||||||
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
entity_pat = re.compile(br'&(#{0,1}[a-zA-Z0-9]{1,8});')
|
||||||
|
|
||||||
def check_xml_parsing(name, mt, raw):
|
def check_xml_parsing(name, mt, raw):
|
||||||
|
if not raw:
|
||||||
|
return [EmptyFile(name)]
|
||||||
raw = raw.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
raw = raw.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
|
||||||
# Get rid of entities as named entities trip up the XML parser
|
# Get rid of entities as named entities trip up the XML parser
|
||||||
eproc = EntitityProcessor(mt)
|
eproc = EntitityProcessor(mt)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user