This commit is contained in:
Kovid Goyal 2008-07-17 11:55:01 -07:00
parent 27a0a596dd
commit 1cf4f8661a

View File

@ -3,15 +3,13 @@ __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
""" """
Convert .fb2 files to .lrf Convert .fb2 files to .lrf
""" """
import os, sys, tempfile, subprocess, shutil, logging, glob import os, sys, tempfile, shutil, logging
from base64 import b64decode
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.lrf import option_parser as lrf_option_parser from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks import ConversionError
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers, __appname__ from calibre import setup_cli_handlers, __appname__
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.resources import fb2_xsl from calibre.resources import fb2_xsl
def option_parser(): def option_parser():
@ -22,25 +20,27 @@ _('''%prog [options] mybook.fb2
%prog converts mybook.fb2 to mybook.lrf''')) %prog converts mybook.fb2 to mybook.lrf'''))
parser.add_option('--debug-html-generation', action='store_true', default=False, parser.add_option('--debug-html-generation', action='store_true', default=False,
dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.')) dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
parser.add_option('--keep-intermediate-files', action='store_true', default=False,
help=_('Keep generated HTML files after completing conversion to LRF.'))
return parser return parser
def extract_embedded_content(doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)
def generate_html(fb2file, encoding, logger): def generate_html(fb2file, encoding, logger):
from lxml import etree from lxml import etree
tdir = tempfile.mkdtemp(prefix=__appname__+'_') tdir = tempfile.mkdtemp(prefix=__appname__+'_fb2_')
ofile = os.path.join(tdir, 'index.xml')
cwd = os.getcwdu() cwd = os.getcwdu()
os.chdir(tdir) os.chdir(tdir)
try: try:
logger.info('Parsing XML...') logger.info('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(recover=True, no_network=True)
try: doc = etree.parse(fb2file, parser)
doc = etree.parse(fb2file, parser) extract_embedded_content(doc)
except:
raise
logger.info('Parsing failed. Trying to clean up XML...')
soup = BeautifulStoneSoup(open(fb2file, 'rb').read())
doc = etree.fromstring(str(soup))
logger.info('Converting XML to HTML...') logger.info('Converting XML to HTML...')
styledoc = etree.fromstring(fb2_xsl) styledoc = etree.fromstring(fb2_xsl)
@ -72,7 +72,7 @@ def process_file(path, options, logger=None):
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext) options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output)) options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title: if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtf))[0] mi.title = os.path.splitext(os.path.basename(fb2))[0]
if (not options.title or options.title == 'Unknown'): if (not options.title or options.title == 'Unknown'):
options.title = mi.title options.title = mi.title
if (not options.author or options.author == 'Unknown') and mi.authors: if (not options.author or options.author == 'Unknown') and mi.authors:
@ -85,7 +85,7 @@ def process_file(path, options, logger=None):
html_process_file(htmlfile, options, logger) html_process_file(htmlfile, options, logger)
finally: finally:
os.chdir(cwd) os.chdir(cwd)
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files: if getattr(options, 'keep_intermediate_files', False):
logger.debug('Intermediate files in '+ tdir) logger.debug('Intermediate files in '+ tdir)
else: else:
shutil.rmtree(tdir) shutil.rmtree(tdir)