Ignore urls inside comments when checking links in stylesheets

This commit is contained in:
Kovid Goyal 2013-12-10 14:26:17 +05:30
parent e0cd21ee50
commit 2b6529336b
2 changed files with 21 additions and 4 deletions

View File

@ -31,7 +31,7 @@ from calibre.ebooks.oeb.base import (
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import PositionFinder from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file from calibre.utils.filenames import nlinks_file, hardlink_file
@ -292,12 +292,14 @@ class Container(object): # {{{
yield (link, el.sourceline, pos) if get_line_numbers else link yield (link, el.sourceline, pos) if get_line_numbers else link
elif media_type.lower() in OEB_STYLES: elif media_type.lower() in OEB_STYLES:
if get_line_numbers: if get_line_numbers:
with self.open(name) as f: with self.open(name, 'rb') as f:
raw = self.decode(f.read()).replace('\r\n', '\n').replace('\r', '\n') raw = self.decode(f.read()).replace('\r\n', '\n').replace('\r', '\n')
position = PositionFinder(raw) position = PositionFinder(raw)
is_in_comment = CommentFinder(raw)
for link, offset in itercsslinks(raw): for link, offset in itercsslinks(raw):
lnum, col = position(offset) if not is_in_comment(offset):
yield link, lnum, col lnum, col = position(offset)
yield link, lnum, col
else: else:
for link in getUrls(self.parsed(name)): for link in getUrls(self.parsed(name)):
yield link yield link

View File

@ -22,3 +22,18 @@ class PositionFinder(object):
except IndexError: except IndexError:
offset = pos offset = pos
return (lnum + 1, offset) return (lnum + 1, offset)
class CommentFinder(object):
def __init__(self, raw, pat=r'(?s)/\*.*?\*/'):
self.starts, self.ends = [], []
for m in re.finditer(pat, raw):
start, end = m.span()
self.starts.append(start), self.ends.append(end)
def __call__(self, offset):
if not self.starts:
return False
q = bisect(self.starts, offset) - 1
return q >= 0 and self.starts[q] <= offset <= self.ends[q]