mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-30 23:00:21 -04:00
Edit Book: Check Book: Add an auto fix action to remove all links to a missing resource (such as a deleted stylesheet) automatically. Fixes #1596048 [Edit-book: After removing CSS file the program should also remove all references to that file from HTML files](https://bugs.launchpad.net/calibre/+bug/1596048)
This commit is contained in:
parent
5774d52620
commit
74134dd429
@ -16,6 +16,7 @@ from Queue import Queue, Empty
|
||||
from calibre import browser
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||
from calibre.ebooks.oeb.polish.container import OEB_FONTS
|
||||
from calibre.ebooks.oeb.polish.replace import remove_links_to
|
||||
from calibre.ebooks.oeb.polish.cover import get_raster_cover_name
|
||||
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO
|
||||
@ -149,6 +150,16 @@ class Unmanifested(BadLink):
|
||||
container.add_name_to_manifest(self.name)
|
||||
return True
|
||||
|
||||
class DanglingLink(BadLink):
|
||||
|
||||
def __init__(self, text, target_name, name, lnum, col):
|
||||
BadLink.__init__(self, text, name, lnum, col)
|
||||
self.INDIVIDUAL_FIX = _('Remove all references to %s from the HTML and CSS in the book') % target_name
|
||||
self.target_name = target_name
|
||||
|
||||
def __call__(self, container):
|
||||
return bool(remove_links_to(container, lambda name, *a: name == self.target_name))
|
||||
|
||||
class Bookmarks(BadLink):
|
||||
|
||||
HELP = _(
|
||||
@ -307,7 +318,7 @@ def check_links(container):
|
||||
if cname is not None:
|
||||
a(CaseMismatch(href, cname, name, lnum, col))
|
||||
else:
|
||||
a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col))
|
||||
a(DanglingLink(_('The linked resource %s does not exist') % fl(href), tname, name, lnum, col))
|
||||
else:
|
||||
purl = urlparse(href)
|
||||
if purl.scheme == 'file':
|
||||
|
@ -8,7 +8,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from functools import partial
|
||||
|
||||
from cssutils.css import CSSRule
|
||||
from cssutils.css import CSSRule, CSSStyleDeclaration
|
||||
from css_selectors import parse, SelectorSyntaxError
|
||||
|
||||
from calibre import force_unicode
|
||||
@ -257,3 +257,28 @@ def classes_in_rule_list(css_rules):
|
||||
classes |= classes_in_rule_list(rule.cssRules)
|
||||
return classes
|
||||
|
||||
def iter_declarations(sheet_or_rule):
|
||||
if hasattr(sheet_or_rule, 'cssRules'):
|
||||
for rule in sheet_or_rule.cssRules:
|
||||
for x in iter_declarations(rule):
|
||||
yield x
|
||||
elif hasattr(sheet_or_rule, 'style'):
|
||||
yield sheet_or_rule.style
|
||||
elif isinstance(sheet_or_rule, CSSStyleDeclaration):
|
||||
yield sheet_or_rule
|
||||
|
||||
def remove_property_value(prop, predicate):
|
||||
''' Remove the Values that match the predicate from this property. If all
|
||||
values of the property would be removed, the property is removed from its
|
||||
parent instead. Note that this means the property must have a parent (a
|
||||
CSSStyleDeclaration). '''
|
||||
removed_vals = []
|
||||
removed_vals = filter(predicate, prop.propertyValue)
|
||||
if len(removed_vals) == len(prop.propertyValue):
|
||||
prop.parent.removeProperty(prop.name)
|
||||
else:
|
||||
x = prop.propertyValue.cssText
|
||||
for v in removed_vals:
|
||||
x = x.replace(v.cssText, '').strip()
|
||||
prop.propertyValue.cssText = x
|
||||
return bool(removed_vals)
|
||||
|
@ -8,11 +8,15 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import codecs, shutil, os, posixpath
|
||||
from future_builtins import map
|
||||
from functools import partial
|
||||
from urlparse import urlparse, urlunparse
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from calibre import sanitize_file_name_unicode
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
from calibre.ebooks.oeb.polish.css import iter_declarations, remove_property_value
|
||||
from calibre.ebooks.oeb.polish.utils import extract
|
||||
|
||||
class LinkReplacer(object):
|
||||
|
||||
@ -274,3 +278,78 @@ def rationalize_folders(container, folder_type_map):
|
||||
name_map[name] = new_name
|
||||
new_names.add(new_name)
|
||||
return name_map
|
||||
|
||||
|
||||
def remove_links_in_sheet(href_to_name, sheet, predicate):
|
||||
import_rules_to_remove = []
|
||||
changed = False
|
||||
for i, r in enumerate(sheet):
|
||||
if r.type == r.IMPORT_RULE:
|
||||
name = href_to_name(r.href)
|
||||
if predicate(name, r.href, None):
|
||||
import_rules_to_remove.append(i)
|
||||
for i in sorted(import_rules_to_remove, reverse=True):
|
||||
sheet.deleteRule(i)
|
||||
changed = True
|
||||
|
||||
for dec in iter_declarations(sheet):
|
||||
changed = remove_links_in_declaration(href_to_name, dec, predicate) or changed
|
||||
return changed
|
||||
|
||||
|
||||
def remove_links_in_declaration(href_to_name, style, predicate):
|
||||
def check_pval(v):
|
||||
if v.type == v.URI:
|
||||
name = href_to_name(v.uri)
|
||||
return predicate(name, v.uri, None)
|
||||
return False
|
||||
|
||||
changed = False
|
||||
|
||||
for p in tuple(style.getProperties(all=True)):
|
||||
changed = remove_property_value(p, check_pval) or changed
|
||||
return changed
|
||||
|
||||
|
||||
def remove_links_to(container, predicate):
|
||||
''' predicate must be a function that takes the arguments (name, href,
|
||||
fragment=None) and returns True iff the link should be removed '''
|
||||
from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
|
||||
stylepath = XPath('//h:style')
|
||||
styleattrpath = XPath('//*[@style]')
|
||||
changed = set()
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
removed = False
|
||||
if mt in OEB_DOCS:
|
||||
root = container.parsed(name)
|
||||
for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
|
||||
hname = container.href_to_name(href, name)
|
||||
frag = href.partition('#')[-1]
|
||||
if predicate(hname, href, frag):
|
||||
if attr is None:
|
||||
el.text = None
|
||||
else:
|
||||
if el.tag == XHTML('link') or el.tag == XHTML('img'):
|
||||
extract(el)
|
||||
else:
|
||||
del el.attrib[attr]
|
||||
removed = True
|
||||
for tag in stylepath(root):
|
||||
if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
|
||||
sheet = container.parse_css(tag.text)
|
||||
if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
|
||||
tag.text = sheet.cssText
|
||||
removed = True
|
||||
for tag in styleattrpath(root):
|
||||
style = tag.get('style')
|
||||
if style:
|
||||
style = container.parse_css(style, is_declaration=True)
|
||||
if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
|
||||
removed = True
|
||||
tag.set('style', style.cssText)
|
||||
elif mt in OEB_STYLES:
|
||||
removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
|
||||
if removed:
|
||||
changed.add(name)
|
||||
tuple(map(container.dirty, changed))
|
||||
return changed
|
||||
|
@ -8,9 +8,12 @@ __copyright__ = '2016, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from functools import partial
|
||||
|
||||
from cssutils import parseStyle
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS
|
||||
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, DEFAULTS
|
||||
from calibre.ebooks.oeb.polish.css import remove_property_value
|
||||
from calibre.ebooks.oeb.polish.container import ContainerBase, href_to_name
|
||||
from calibre.ebooks.oeb.polish.stats import StatsCollector, font_keys, normalize_font_properties, prepare_font_rule
|
||||
from calibre.ebooks.oeb.polish.tests.base import BaseTest
|
||||
@ -200,3 +203,9 @@ class CascadeTest(BaseTest):
|
||||
|
||||
s = get_stats('<p style="font-family: X; text-transform:uppercase">abc</p><b style="font-family: X; font-variant: small-caps">d\nef</b>')
|
||||
self.assertEqual(s.font_stats, {'XB.otf':set('defDEF'), 'X.otf':set('ABC')})
|
||||
|
||||
def test_remove_property_value(self):
|
||||
style = parseStyle('background-image: url(b.png); background: black url(a.png) fixed')
|
||||
for prop in style.getProperties(all=True):
|
||||
remove_property_value(prop, lambda val:'png' in val.cssText)
|
||||
self.assertEqual('background: black fixed', style.cssText)
|
||||
|
Loading…
x
Reference in New Issue
Block a user