Ignore urls inside comments when checking links in stylesheets

This commit is contained in:
Kovid Goyal 2013-12-10 14:26:17 +05:30
parent e0cd21ee50
commit 2b6529336b
2 changed files with 21 additions and 4 deletions

View File

@ -31,7 +31,7 @@ from calibre.ebooks.oeb.base import (
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import PositionFinder
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file
@ -292,10 +292,12 @@ class Container(object): # {{{
yield (link, el.sourceline, pos) if get_line_numbers else link
elif media_type.lower() in OEB_STYLES:
if get_line_numbers:
with self.open(name) as f:
with self.open(name, 'rb') as f:
raw = self.decode(f.read()).replace('\r\n', '\n').replace('\r', '\n')
position = PositionFinder(raw)
is_in_comment = CommentFinder(raw)
for link, offset in itercsslinks(raw):
if not is_in_comment(offset):
lnum, col = position(offset)
yield link, lnum, col
else:

View File

@ -22,3 +22,18 @@ class PositionFinder(object):
except IndexError:
offset = pos
return (lnum + 1, offset)
class CommentFinder(object):
def __init__(self, raw, pat=r'(?s)/\*.*?\*/'):
self.starts, self.ends = [], []
for m in re.finditer(pat, raw):
start, end = m.span()
self.starts.append(start), self.ends.append(end)
def __call__(self, offset):
if not self.starts:
return False
q = bisect(self.starts, offset) - 1
return q >= 0 and self.starts[q] <= offset <= self.ends[q]