diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 6e3ac55a77..3346f205b8 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -384,6 +384,15 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'): except KeyError: return '&'+ent+';' +_ent_pat = re.compile(r'&(\S+);') + +def prepare_string_for_xml(raw, attribute=False): + raw = _ent_pat.sub(entity_to_unicode, raw) + raw = raw.replace('&', '&').replace('<', '<').replace('>', '>') + if attribute: + raw = raw.replace('"', '"').replace("'", ''') + return raw + if isosx: fdir = os.path.expanduser('~/.fonts') try: diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index be096eece3..6e09c7e6d9 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -12,7 +12,7 @@ from urllib import unquote from calibre.customize.conversion import OutputFormatPlugin from calibre.ptempfile import TemporaryDirectory from calibre.constants import __appname__, __version__ -from calibre import strftime, guess_type +from calibre import strftime, guess_type, prepare_string_for_xml from calibre.customize.conversion import OptionRecommendation from lxml import etree @@ -210,6 +210,7 @@ class EPUBOutput(OutputFormatPlugin): id, href = self.oeb.manifest.generate('calibre-logo', 'calibre-logo.png') self.oeb.manifest.add(id, href, 'image/png', data=img_data) + title, author = map(prepare_string_for_xml, (title, author)) html = self.TITLEPAGE%dict(title=title, author=author, date=strftime('%d %b, %Y'), app=__appname__ +' '+__version__, diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index d4b60e3a59..d8d750eade 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -301,30 +301,26 @@ class FlowSplitter(object): # Tree 1 hit_split_point = False - for elem in list(body.iterdescendants(etree.Element)): + for elem in list(body.iterdescendants()): if elem is split_point: hit_split_point = True if before: - x = elem.get('id', None) nix_element(elem) continue if hit_split_point: - x = elem.get('id', None) nix_element(elem) # Tree 2 hit_split_point = False - for elem in list(body2.iterdescendants(etree.Element)): + for elem in list(body2.iterdescendants()): if elem is split_point2: hit_split_point = True if not before: - x = elem.get('id', None) nix_element(elem, top=False) continue if not hit_split_point: - x = elem.get('id', None) nix_element(elem, top=False) body2.text = '\n'