From dfc1f39af7530b7e33e4fc4596363ded233905c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Nov 2011 09:19:24 +0530 Subject: [PATCH] MOBI Input: Performance improvement when viewing/converting a file with a lot of links --- src/calibre/ebooks/mobi/reader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 5d12018121..37d4fc4125 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -929,7 +929,7 @@ class MobiReader(object): for match in link_pattern.finditer(self.mobi_html): positions.add(int(match.group(1))) pos = 0 - self.processed_html = '' + processed_html = cStringIO.StringIO() end_tag_re = re.compile(r'<\s*/') for end in sorted(positions): if end == 0: @@ -947,12 +947,14 @@ class MobiReader(object): end = r else: end = r + 1 - self.processed_html += self.mobi_html[pos:end] + (anchor % oend) + processed_html.write(self.mobi_html[pos:end] + (anchor % oend)) pos = end - self.processed_html += self.mobi_html[pos:] + processed_html.write(self.mobi_html[pos:]) + processed_html = processed_html.getvalue() + # Remove anchors placed inside entities self.processed_html = re.sub(r'&([^;]*?)()([^;]*);', - r'&\1\3;\2', self.processed_html) + r'&\1\3;\2', processed_html) def extract_images(self, processed_records, output_dir):