From 2d5501cc9e59edb3e207bc7d5d79a20417ffc48a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 6 Dec 2011 08:52:43 +0530 Subject: [PATCH] E-book viewer: Fix searching for text that is represented as entities in the underlying HTML. Fixes #899573 (Private bug) --- src/calibre/ebooks/oeb/iterator.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 2e423a25a1..bfd2954cd1 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -18,7 +18,8 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.zipfile import safe_replace from calibre.utils.config import DynamicConfig from calibre.utils.logging import Log -from calibre import guess_type, prints, prepare_string_for_xml +from calibre import (guess_type, prints, prepare_string_for_xml, + xml_replace_entities) from calibre.ebooks.oeb.transforms.cover import CoverManager from calibre.constants import filesystem_encoding @@ -96,13 +97,19 @@ class EbookIterator(object): self.ebook_ext = ext.replace('original_', '') def search(self, text, index, backwards=False): - text = text.lower() + text = prepare_string_for_xml(text.lower()) pmap = [(i, path) for i, path in enumerate(self.spine)] if backwards: pmap.reverse() for i, path in pmap: if (backwards and i < index) or (not backwards and i > index): - if text in open(path, 'rb').read().decode(path.encoding).lower(): + with open(path, 'rb') as f: + raw = f.read().decode(path.encoding) + try: + raw = xml_replace_entities(raw) + except: + pass + if text in raw.lower(): return i def find_missing_css_files(self):