mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Add support for the FBZ format (zipped FB2)
Fixes #1762634 [Enhancement add support for .fb2.zip files](https://bugs.launchpad.net/calibre/+bug/1762634)
This commit is contained in:
parent
61ef61e3d9
commit
f7159fc0ec
@ -197,7 +197,7 @@ class EPUBMetadataReader(MetadataReaderPlugin):
|
||||
class FB2MetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read FB2 metadata'
|
||||
file_types = set(['fb2'])
|
||||
file_types = {'fb2', 'fbz'}
|
||||
description = _('Read metadata from %s files')%'FB2'
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
@ -476,7 +476,7 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
|
||||
class FB2MetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set FB2 metadata'
|
||||
file_types = set(['fb2'])
|
||||
file_types = {'fb2', 'fbz'}
|
||||
description = _('Set metadata in %s files')%'FB2'
|
||||
|
||||
def set_metadata(self, stream, mi, type):
|
||||
|
@ -32,7 +32,7 @@ class ParserError(ValueError):
|
||||
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
||||
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
|
||||
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
|
||||
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx']
|
||||
|
@ -17,8 +17,8 @@ class FB2Input(InputFormatPlugin):
|
||||
|
||||
name = 'FB2 Input'
|
||||
author = 'Anatoly Shipitsin'
|
||||
description = 'Convert FB2 files to HTML'
|
||||
file_types = set(['fb2'])
|
||||
description = 'Convert FB2 and FBZ files to HTML'
|
||||
file_types = {'fb2', 'fbz'}
|
||||
|
||||
recommendations = set([
|
||||
('level1_toc', '//h:h1', OptionRecommendation.MED),
|
||||
@ -37,14 +37,15 @@ class FB2Input(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata.fb2 import ensure_namespace
|
||||
from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
self.log = log
|
||||
log.debug('Parsing XML...')
|
||||
raw = stream.read().replace('\0', '')
|
||||
raw = get_fb2_data(stream)[0]
|
||||
raw = raw.replace(b'\0', b'')
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
assume_utf8=True, resolve_entities=True)[0]
|
||||
try:
|
||||
@ -173,5 +174,3 @@ class FB2Input(InputFormatPlugin):
|
||||
else:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
|
@ -85,10 +85,27 @@ class Context(object):
|
||||
self.create_tag(parent, 'empty-line', at_start=False)
|
||||
|
||||
|
||||
def get_fb2_data(stream):
|
||||
from calibre.utils.zipfile import ZipFile, BadZipfile
|
||||
pos = stream.tell()
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
except BadZipfile:
|
||||
stream.seek(pos)
|
||||
ans = stream.read()
|
||||
zip_file_name = None
|
||||
else:
|
||||
names = zf.namelist()
|
||||
names = [x for x in names if x.lower().endswith('.fb2')] or names
|
||||
zip_file_name = names[0]
|
||||
ans = zf.open(zip_file_name).read()
|
||||
return ans, zip_file_name
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
''' Return fb2 metadata as a L{MetaInformation} object '''
|
||||
|
||||
root = _get_fbroot(stream)
|
||||
root = _get_fbroot(get_fb2_data(stream)[0])
|
||||
ctx = Context(root)
|
||||
book_title = _parse_book_title(root, ctx)
|
||||
authors = _parse_authors(root, ctx) or [_('Unknown')]
|
||||
@ -294,9 +311,8 @@ def _parse_language(root, mi, ctx):
|
||||
mi.languages = [language]
|
||||
|
||||
|
||||
def _get_fbroot(stream):
|
||||
def _get_fbroot(raw):
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
raw = stream.read()
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
||||
root = etree.fromstring(raw, parser=parser)
|
||||
return ensure_namespace(root)
|
||||
@ -386,7 +402,8 @@ def _set_cover(title_info, mi, ctx):
|
||||
|
||||
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
stream.seek(0)
|
||||
root = _get_fbroot(stream)
|
||||
raw, zip_file_name = get_fb2_data(stream)
|
||||
root = _get_fbroot(raw)
|
||||
ctx = Context(root)
|
||||
desc = ctx.get_or_create(root, 'description')
|
||||
ti = ctx.get_or_create(desc, 'title-info')
|
||||
@ -403,14 +420,20 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
for child in ti:
|
||||
child.tail = indent
|
||||
|
||||
stream.seek(0)
|
||||
stream.truncate()
|
||||
# Apparently there exists FB2 reading software that chokes on the use of
|
||||
# single quotes in xml declaration. Sigh. See
|
||||
# https://www.mobileread.com/forums/showthread.php?p=2273184#post2273184
|
||||
stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
|
||||
xml_declaration=False))
|
||||
raw = b'<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
raw += etree.tostring(root, method='xml', encoding='utf-8', xml_declaration=False)
|
||||
|
||||
stream.seek(0)
|
||||
stream.truncate()
|
||||
if zip_file_name:
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
with ZipFile(stream, 'w') as zf:
|
||||
zf.writestr(zip_file_name, raw)
|
||||
else:
|
||||
stream.write(raw)
|
||||
|
||||
|
||||
def ensure_namespace(doc):
|
||||
|
Loading…
x
Reference in New Issue
Block a user