From a1126014bf1056ab181189a5a0d42e9132e0dced Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 4 Feb 2009 09:46:12 -0500 Subject: [PATCH] Be smarter about not putting @id attributes in bad places --- src/calibre/ebooks/mobi/reader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 760f9407cb..6811f9ccda 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -423,11 +423,13 @@ class MobiReader(object): if self.verbose: print 'Adding anchors...' positions = set([]) - link_pattern = re.compile(r'<[^<>]+filepos=[\'"]{0,1}(\d+)[^<>]*>', re.IGNORECASE) + link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', + re.IGNORECASE) for match in link_pattern.finditer(self.mobi_html): positions.add(int(match.group(1))) pos = 0 self.processed_html = '' + end_tag_re = re.compile(r'<\s*/') for end in sorted(positions): if end == 0: continue @@ -437,7 +439,8 @@ class MobiReader(object): anchor = '' if r > -1 and (r < l or l == end or l == -1): p = self.mobi_html.rfind('<', 0, end + 1) - if p > -1 and self.mobi_html[p + 1] != '/': + if pos < end and p > -1 and \ + not end_tag_re.match(self.mobi_html[p:r]): anchor = ' filepos-id="filepos%d"' end = r else: