diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 6f7b3f7984..c4e0a4d1f9 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -197,7 +197,7 @@ class EPUBMetadataReader(MetadataReaderPlugin): class FB2MetadataReader(MetadataReaderPlugin): name = 'Read FB2 metadata' - file_types = set(['fb2']) + file_types = {'fb2', 'fbz'} description = _('Read metadata from %s files')%'FB2' def get_metadata(self, stream, ftype): @@ -476,7 +476,7 @@ class EPUBMetadataWriter(MetadataWriterPlugin): class FB2MetadataWriter(MetadataWriterPlugin): name = 'Set FB2 metadata' - file_types = set(['fb2']) + file_types = {'fb2', 'fbz'} description = _('Set metadata in %s files')%'FB2' def set_metadata(self, stream, mi, type): diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 699be348b0..2240def4b7 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -32,7 +32,7 @@ class ParserError(ValueError): BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc', - 'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', + 'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md', 'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx'] diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py index 7ab83b4922..a25ec472ac 100644 --- a/src/calibre/ebooks/conversion/plugins/fb2_input.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py @@ -17,8 +17,8 @@ class FB2Input(InputFormatPlugin): name = 'FB2 Input' author = 'Anatoly Shipitsin' - description = 'Convert FB2 files to HTML' - file_types = set(['fb2']) + description = 'Convert FB2 and FBZ files to HTML' + file_types = {'fb2', 'fbz'} recommendations = set([ ('level1_toc', '//h:h1', OptionRecommendation.MED), @@ -37,14 +37,15 @@ class FB2Input(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree - from calibre.ebooks.metadata.fb2 import ensure_namespace + from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode self.log = log log.debug('Parsing XML...') - raw = stream.read().replace('\0', '') + raw = get_fb2_data(stream)[0] + raw = raw.replace(b'\0', b'') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: @@ -173,5 +174,3 @@ class FB2Input(InputFormatPlugin): else: with open(fname, 'wb') as f: f.write(data) - - diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py index abfa72741b..3a1945a20e 100644 --- a/src/calibre/ebooks/metadata/fb2.py +++ b/src/calibre/ebooks/metadata/fb2.py @@ -85,10 +85,27 @@ class Context(object): self.create_tag(parent, 'empty-line', at_start=False) +def get_fb2_data(stream): + from calibre.utils.zipfile import ZipFile, BadZipfile + pos = stream.tell() + try: + zf = ZipFile(stream) + except BadZipfile: + stream.seek(pos) + ans = stream.read() + zip_file_name = None + else: + names = zf.namelist() + names = [x for x in names if x.lower().endswith('.fb2')] or names + zip_file_name = names[0] + ans = zf.open(zip_file_name).read() + return ans, zip_file_name + + def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' - root = _get_fbroot(stream) + root = _get_fbroot(get_fb2_data(stream)[0]) ctx = Context(root) book_title = _parse_book_title(root, ctx) authors = _parse_authors(root, ctx) or [_('Unknown')] @@ -294,9 +311,8 @@ def _parse_language(root, mi, ctx): mi.languages = [language] -def _get_fbroot(stream): +def _get_fbroot(raw): parser = etree.XMLParser(recover=True, no_network=True) - raw = stream.read() raw = xml_to_unicode(raw, strip_encoding_pats=True)[0] root = etree.fromstring(raw, parser=parser) return ensure_namespace(root) @@ -386,7 +402,8 @@ def _set_cover(title_info, mi, ctx): def set_metadata(stream, mi, apply_null=False, update_timestamp=False): stream.seek(0) - root = _get_fbroot(stream) + raw, zip_file_name = get_fb2_data(stream) + root = _get_fbroot(raw) ctx = Context(root) desc = ctx.get_or_create(root, 'description') ti = ctx.get_or_create(desc, 'title-info') @@ -403,14 +420,20 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False): for child in ti: child.tail = indent - stream.seek(0) - stream.truncate() # Apparently there exists FB2 reading software that chokes on the use of # single quotes in xml declaration. Sigh. See # https://www.mobileread.com/forums/showthread.php?p=2273184#post2273184 - stream.write(b'\n') - stream.write(etree.tostring(root, method='xml', encoding='utf-8', - xml_declaration=False)) + raw = b'\n' + raw += etree.tostring(root, method='xml', encoding='utf-8', xml_declaration=False) + + stream.seek(0) + stream.truncate() + if zip_file_name: + from calibre.utils.zipfile import ZipFile + with ZipFile(stream, 'w') as zf: + zf.writestr(zip_file_name, raw) + else: + stream.write(raw) def ensure_namespace(doc):