diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index bba5b1e83b..9321e7e9c8 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' @@ -10,7 +11,6 @@ from calibre.customize import (FileTypePlugin, MetadataReaderPlugin, from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_comic_metadata from calibre.ebooks.html.to_zip import HTML2ZIP -from polyglot.builtins import unicode_type plugins = [] @@ -65,23 +65,23 @@ class TXT2TXTZ(FileTypePlugin): images = [] # Textile - for m in re.finditer(unicode_type(r'(?mu)(?:[\[{])?\!(?:\. )?(?P[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt): + for m in re.finditer(r'(?mu)(?:[\[{])?\!(?:\. )?(?P[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt): path = m.group('path') if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) # Markdown inline - for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P[^\)]*)\)'), txt): # noqa + for m in re.finditer(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P[^\)]*)\)', txt): # noqa path = m.group('path') if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) # Markdown reference refs = {} - for m in re.finditer(unicode_type(r'(?mu)^(\ ?\ ?\ ?)\[(?P[^\]]*)\]:\s*(?P[^\s]*)$'), txt): + for m in re.finditer(r'(?mu)^(\ ?\ ?\ ?)\[(?P[^\]]*)\]:\s*(?P[^\s]*)$', txt): if m.group('id') and m.group('path'): refs[m.group('id')] = m.group('path') - for m in re.finditer(unicode_type(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P[^\]]*)\]'), txt): # noqa + for m in re.finditer(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P[^\]]*)\]', txt): # noqa path = refs.get(m.group('id'), None) if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): images.append(path) @@ -93,7 +93,7 @@ class TXT2TXTZ(FileTypePlugin): from calibre.ebooks.metadata.opf2 import metadata_to_opf with open(path_to_ebook, 'rb') as ebf: - txt = ebf.read() + txt = ebf.read().decode('utf-8', 'replace') base_dir = os.path.dirname(path_to_ebook) images = self._get_image_references(txt, base_dir)