Edit Book: Check Book: Add an auto fix action to remove all links to a missing resource (such as a deleted stylesheet) automatically. Fixes #1596048 [Edit-book: After removing CSS file the program should also remove all references to that file from HTML files](https://bugs.launchpad.net/calibre/+bug/1596048)

This commit is contained in:
Kovid Goyal 2016-06-29 20:43:52 +05:30
parent 5774d52620
commit 74134dd429
4 changed files with 126 additions and 2 deletions

View File

@ -16,6 +16,7 @@ from Queue import Queue, Empty
from calibre import browser
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.replace import remove_links_to
from calibre.ebooks.oeb.polish.cover import get_raster_cover_name
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO
@ -149,6 +150,16 @@ class Unmanifested(BadLink):
container.add_name_to_manifest(self.name)
return True
class DanglingLink(BadLink):
def __init__(self, text, target_name, name, lnum, col):
BadLink.__init__(self, text, name, lnum, col)
self.INDIVIDUAL_FIX = _('Remove all references to %s from the HTML and CSS in the book') % target_name
self.target_name = target_name
def __call__(self, container):
return bool(remove_links_to(container, lambda name, *a: name == self.target_name))
class Bookmarks(BadLink):
HELP = _(
@ -307,7 +318,7 @@ def check_links(container):
if cname is not None:
a(CaseMismatch(href, cname, name, lnum, col))
else:
a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col))
a(DanglingLink(_('The linked resource %s does not exist') % fl(href), tname, name, lnum, col))
else:
purl = urlparse(href)
if purl.scheme == 'file':

View File

@ -8,7 +8,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from functools import partial
from cssutils.css import CSSRule
from cssutils.css import CSSRule, CSSStyleDeclaration
from css_selectors import parse, SelectorSyntaxError
from calibre import force_unicode
@ -257,3 +257,28 @@ def classes_in_rule_list(css_rules):
classes |= classes_in_rule_list(rule.cssRules)
return classes
def iter_declarations(sheet_or_rule):
if hasattr(sheet_or_rule, 'cssRules'):
for rule in sheet_or_rule.cssRules:
for x in iter_declarations(rule):
yield x
elif hasattr(sheet_or_rule, 'style'):
yield sheet_or_rule.style
elif isinstance(sheet_or_rule, CSSStyleDeclaration):
yield sheet_or_rule
def remove_property_value(prop, predicate):
''' Remove the Values that match the predicate from this property. If all
values of the property would be removed, the property is removed from its
parent instead. Note that this means the property must have a parent (a
CSSStyleDeclaration). '''
removed_vals = []
removed_vals = filter(predicate, prop.propertyValue)
if len(removed_vals) == len(prop.propertyValue):
prop.parent.removeProperty(prop.name)
else:
x = prop.propertyValue.cssText
for v in removed_vals:
x = x.replace(v.cssText, '').strip()
prop.propertyValue.cssText = x
return bool(removed_vals)

View File

@ -8,11 +8,15 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import codecs, shutil, os, posixpath
from future_builtins import map
from functools import partial
from urlparse import urlparse, urlunparse
from collections import Counter, defaultdict
from calibre import sanitize_file_name_unicode
from calibre.ebooks.chardet import strip_encoding_declarations
from calibre.ebooks.oeb.polish.css import iter_declarations, remove_property_value
from calibre.ebooks.oeb.polish.utils import extract
class LinkReplacer(object):
@ -274,3 +278,78 @@ def rationalize_folders(container, folder_type_map):
name_map[name] = new_name
new_names.add(new_name)
return name_map
def remove_links_in_sheet(href_to_name, sheet, predicate):
import_rules_to_remove = []
changed = False
for i, r in enumerate(sheet):
if r.type == r.IMPORT_RULE:
name = href_to_name(r.href)
if predicate(name, r.href, None):
import_rules_to_remove.append(i)
for i in sorted(import_rules_to_remove, reverse=True):
sheet.deleteRule(i)
changed = True
for dec in iter_declarations(sheet):
changed = remove_links_in_declaration(href_to_name, dec, predicate) or changed
return changed
def remove_links_in_declaration(href_to_name, style, predicate):
def check_pval(v):
if v.type == v.URI:
name = href_to_name(v.uri)
return predicate(name, v.uri, None)
return False
changed = False
for p in tuple(style.getProperties(all=True)):
changed = remove_property_value(p, check_pval) or changed
return changed
def remove_links_to(container, predicate):
''' predicate must be a function that takes the arguments (name, href,
fragment=None) and returns True iff the link should be removed '''
from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
stylepath = XPath('//h:style')
styleattrpath = XPath('//*[@style]')
changed = set()
for name, mt in container.mime_map.iteritems():
removed = False
if mt in OEB_DOCS:
root = container.parsed(name)
for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
hname = container.href_to_name(href, name)
frag = href.partition('#')[-1]
if predicate(hname, href, frag):
if attr is None:
el.text = None
else:
if el.tag == XHTML('link') or el.tag == XHTML('img'):
extract(el)
else:
del el.attrib[attr]
removed = True
for tag in stylepath(root):
if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
sheet = container.parse_css(tag.text)
if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
tag.text = sheet.cssText
removed = True
for tag in styleattrpath(root):
style = tag.get('style')
if style:
style = container.parse_css(style, is_declaration=True)
if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
removed = True
tag.set('style', style.cssText)
elif mt in OEB_STYLES:
removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
if removed:
changed.add(name)
tuple(map(container.dirty, changed))
return changed

View File

@ -8,9 +8,12 @@ __copyright__ = '2016, Kovid Goyal <kovid at kovidgoyal.net>'
from functools import partial
from cssutils import parseStyle
from calibre.constants import iswindows
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
from calibre.ebooks.oeb.polish.css import remove_property_value
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule
from calibre.ebooks.oeb.polish.tests.base import BaseTest
@ -200,3 +203,9 @@ class CascadeTest(BaseTest):
s = get_stats('<p style="font-family: X; text-transform:uppercase">abc</p><b style="font-family: X; font-variant: small-caps">d\nef</b>')
self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})
def test_remove_property_value(self):
style = parseStyle('background-image: url(b.png); background: black url(a.png) fixed')
for prop in style.getProperties(all=True):
remove_property_value(prop, lambda val:'png' in val.cssText)
self.assertEqual('background: black fixed', style.cssText)