mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implemented any2epub
This commit is contained in:
parent
f0d9bded08
commit
896182b201
@ -317,7 +317,8 @@ def main():
|
|||||||
'mechanize', 'ClientForm', 'usbobserver',
|
'mechanize', 'ClientForm', 'usbobserver',
|
||||||
'genshi', 'calibre.web.feeds.recipes.*',
|
'genshi', 'calibre.web.feeds.recipes.*',
|
||||||
'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
|
'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
|
||||||
'keyword', 'codeop', 'pydoc', 'readline'],
|
'keyword', 'codeop', 'pydoc', 'readline',
|
||||||
|
'BeautifulSoup'],
|
||||||
'packages' : ['PIL', 'Authorization', 'lxml'],
|
'packages' : ['PIL', 'Authorization', 'lxml'],
|
||||||
'excludes' : ['IPython'],
|
'excludes' : ['IPython'],
|
||||||
'plist' : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
|
'plist' : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
|
||||||
|
@ -152,7 +152,7 @@ def main(args=sys.argv):
|
|||||||
'win32process', 'win32api', 'msvcrt',
|
'win32process', 'win32api', 'msvcrt',
|
||||||
'win32event', 'calibre.ebooks.lrf.any.*',
|
'win32event', 'calibre.ebooks.lrf.any.*',
|
||||||
'calibre.ebooks.lrf.feeds.*',
|
'calibre.ebooks.lrf.feeds.*',
|
||||||
'genshi',
|
'genshi', 'BeautifulSoup',
|
||||||
'path', 'pydoc', 'IPython.Extensions.*',
|
'path', 'pydoc', 'IPython.Extensions.*',
|
||||||
'calibre.web.feeds.recipes.*',
|
'calibre.web.feeds.recipes.*',
|
||||||
'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
|
'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
|
||||||
|
@ -317,6 +317,11 @@ class LoggingInterface:
|
|||||||
def log_exception(self, msg, *args):
|
def log_exception(self, msg, *args):
|
||||||
self.___log(self.__logger.exception, msg, args, {})
|
self.___log(self.__logger.exception, msg, args, {})
|
||||||
|
|
||||||
|
def walk(dir):
|
||||||
|
''' A nice interface to os.walk '''
|
||||||
|
for record in os.walk(dir):
|
||||||
|
for f in record[-1]:
|
||||||
|
yield os.path.join(record[0], f)
|
||||||
|
|
||||||
def strftime(fmt, t=time.localtime()):
|
def strftime(fmt, t=time.localtime()):
|
||||||
''' A version of strtime that returns unicode strings. '''
|
''' A version of strtime that returns unicode strings. '''
|
||||||
|
@ -44,6 +44,7 @@ def config(defaults=None):
|
|||||||
c.add_opt('output', ['-o', '--output'], default=None,
|
c.add_opt('output', ['-o', '--output'], default=None,
|
||||||
help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
|
help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
|
||||||
|
|
||||||
|
|
||||||
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
|
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
|
||||||
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
|
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
|
||||||
help=_('''\
|
help=_('''\
|
||||||
@ -74,6 +75,16 @@ to auto-generate a Table of Contents.
|
|||||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||||
|
|
||||||
|
layout = c.add_group('page layout', _('Control page layout'))
|
||||||
|
layout('margin_top', ['--margin-top'], default=5.0,
|
||||||
|
help=_('Set the top margin in pts. Default is %default'))
|
||||||
|
layout('margin_bottom', ['--margin-bottom'], default=5.0,
|
||||||
|
help=_('Set the bottom margin in pts. Default is %default'))
|
||||||
|
layout('margin_left', ['--margin-left'], default=5.0,
|
||||||
|
help=_('Set the left margin in pts. Default is %default'))
|
||||||
|
layout('margin_right', ['--margin-right'], default=5.0,
|
||||||
|
help=_('Set the right margin in pts. Default is %default'))
|
||||||
|
|
||||||
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
|
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
|
||||||
help=_('Print generated OPF file to stdout'))
|
help=_('Print generated OPF file to stdout'))
|
||||||
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
|
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
|
||||||
|
154
src/calibre/ebooks/epub/from_any.py
Normal file
154
src/calibre/ebooks/epub/from_any.py
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Convert any ebook format to epub.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, os, re
|
||||||
|
from contextlib import nested
|
||||||
|
|
||||||
|
from calibre import extract, walk
|
||||||
|
from calibre.ebooks.epub import config as common_config
|
||||||
|
from calibre.ebooks.epub.from_html import convert as html2epub
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
|
||||||
|
def lit2opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.lit.reader import LitReader
|
||||||
|
print 'Exploding LIT file:', path
|
||||||
|
reader = LitReader(path)
|
||||||
|
reader.extract_content(tdir, False)
|
||||||
|
for f in walk(tdir):
|
||||||
|
if f.lower().endswith('.opf'):
|
||||||
|
return f
|
||||||
|
|
||||||
|
def mobi2opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.mobi.reader import MobiReader
|
||||||
|
print 'Exploding MOBI file:', path
|
||||||
|
reader = MobiReader(path)
|
||||||
|
reader.extract_content(tdir)
|
||||||
|
files = list(walk(tdir))
|
||||||
|
for f in files:
|
||||||
|
if f.lower().endswith('.opf'):
|
||||||
|
return f
|
||||||
|
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
|
||||||
|
hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
|
||||||
|
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
|
||||||
|
opf = OPFCreator(tdir, mi)
|
||||||
|
opf.create_manifest([(hf[0], None)])
|
||||||
|
opf.create_spine([hf[0]])
|
||||||
|
ans = os.path.join(tdir, 'metadata.opf')
|
||||||
|
opf.render(open(ans, 'wb'))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def fb22opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.lrf.fb2.convert_from import to_html
|
||||||
|
print 'Converting FB2 to HTML...'
|
||||||
|
return to_html(path, tdir)
|
||||||
|
|
||||||
|
def rtf2opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.lrf.rtf.convert_from import generate_html
|
||||||
|
generate_html(path, tdir)
|
||||||
|
return os.path.join(tdir, 'metadata.opf')
|
||||||
|
|
||||||
|
def txt2opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.lrf.txt.convert_from import generate_html
|
||||||
|
generate_html(path, opts.encoding, tdir)
|
||||||
|
return os.path.join(tdir, 'metadata.opf')
|
||||||
|
|
||||||
|
def pdf2opf(path, tdir, opts):
|
||||||
|
from calibre.ebooks.lrf.pdf.convert_from import generate_html
|
||||||
|
generate_html(path, tdir)
|
||||||
|
return os.path.join(tdir, 'metadata.opf')
|
||||||
|
|
||||||
|
MAP = {
|
||||||
|
'lit' : lit2opf,
|
||||||
|
'mobi' : mobi2opf,
|
||||||
|
'prc' : mobi2opf,
|
||||||
|
'fb2' : fb22opf,
|
||||||
|
'rtf' : rtf2opf,
|
||||||
|
'txt' : txt2opf,
|
||||||
|
'pdf' : pdf2opf,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def unarchive(path, tdir):
|
||||||
|
extract(path, tdir)
|
||||||
|
files = list(walk(tdir))
|
||||||
|
|
||||||
|
for ext in ['opf'] + list(MAP.keys()):
|
||||||
|
for f in files:
|
||||||
|
if f.lower().endswith('.'+ext):
|
||||||
|
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
|
||||||
|
continue
|
||||||
|
return f, ext
|
||||||
|
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
|
||||||
|
html_files = [f for f in files if html_pat.search(f) is not None]
|
||||||
|
if not html_files:
|
||||||
|
raise ValueError(_('Could not find an ebook inside the archive'))
|
||||||
|
html_files = [(f, os.stat(f).st_size) for f in html_files]
|
||||||
|
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
|
||||||
|
html_files = [f[0] for f in html_files]
|
||||||
|
for q in ('toc', 'index'):
|
||||||
|
for f in html_files:
|
||||||
|
if os.path.splitext(f)[0].lower() == q:
|
||||||
|
return f, os.path.splitext(f)[1].lower()[1:]
|
||||||
|
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
||||||
|
|
||||||
|
def any2epub(opts, path, notification=None):
|
||||||
|
ext = os.path.splitext(path)[1]
|
||||||
|
if not ext:
|
||||||
|
raise ValueError('Unknown file type: '+path)
|
||||||
|
ext = ext.lower()[1:]
|
||||||
|
|
||||||
|
if opts.output is None:
|
||||||
|
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
|
||||||
|
|
||||||
|
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
|
||||||
|
if ext in ['rar', 'zip']:
|
||||||
|
path, ext = unarchive(path, tdir1)
|
||||||
|
print 'Found %s file in archive'%(ext.upper())
|
||||||
|
|
||||||
|
if ext in MAP.keys():
|
||||||
|
path = MAP[ext](path, tdir2, opts)
|
||||||
|
ext = 'opf'
|
||||||
|
|
||||||
|
|
||||||
|
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
|
||||||
|
raise ValueError('Conversion from %s is not supported'%ext.upper())
|
||||||
|
|
||||||
|
print 'Creating EPUB file...'
|
||||||
|
html2epub(path, opts, notification=notification)
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
return common_config(defaults=defaults)
|
||||||
|
|
||||||
|
|
||||||
|
def formats():
|
||||||
|
return ['html', 'rar', 'zip']+list(MAP.keys())
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
|
||||||
|
return config().option_parser(usage=_('''\
|
||||||
|
%%prog [options] filename
|
||||||
|
|
||||||
|
Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
|
||||||
|
''')%formats()
|
||||||
|
)
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
if len(args) < 2:
|
||||||
|
parser.print_help()
|
||||||
|
print 'No input file specified.'
|
||||||
|
return 1
|
||||||
|
any2epub(opts, args[1])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -29,7 +29,6 @@ def option_parser():
|
|||||||
def convert(opts, recipe_arg, notification=None):
|
def convert(opts, recipe_arg, notification=None):
|
||||||
opts.lrf = False
|
opts.lrf = False
|
||||||
opts.epub = True
|
opts.epub = True
|
||||||
opts.chapter_mark = 'none'
|
|
||||||
if opts.debug:
|
if opts.debug:
|
||||||
opts.verbose = 2
|
opts.verbose = 2
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
@ -40,6 +39,7 @@ def convert(opts, recipe_arg, notification=None):
|
|||||||
recipe_opts = c.parse_string(recipe.html2epub_options)
|
recipe_opts = c.parse_string(recipe.html2epub_options)
|
||||||
c.smart_update(recipe_opts, opts)
|
c.smart_update(recipe_opts, opts)
|
||||||
opts = recipe_opts
|
opts = recipe_opts
|
||||||
|
opts.chapter_mark = 'none'
|
||||||
opf = glob.glob(os.path.join(tdir, '*.opf'))
|
opf = glob.glob(os.path.join(tdir, '*.opf'))
|
||||||
if not opf:
|
if not opf:
|
||||||
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
|
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
|
||||||
|
@ -4,7 +4,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
import os, sys, re, shutil, cStringIO
|
import os, sys, re, shutil, cStringIO
|
||||||
|
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
|
try:
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
except ImportError:
|
||||||
|
import Image as PILImage
|
||||||
|
|
||||||
from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
|
from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
|
||||||
opf_traverse, create_metadata, rebase_toc
|
opf_traverse, create_metadata, rebase_toc
|
||||||
@ -106,8 +111,8 @@ def convert(htmlfile, opts, notification=None):
|
|||||||
cover_src = opts.cover
|
cover_src = opts.cover
|
||||||
|
|
||||||
if cover_src is not None:
|
if cover_src is not None:
|
||||||
cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
|
cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
|
||||||
shutil.copyfile(cover_src, cover_dest)
|
PILImage.open(cover_src).convert('RGB').save(cover_dest)
|
||||||
mi.cover = cover_dest
|
mi.cover = cover_dest
|
||||||
resources.append(cover_dest)
|
resources.append(cover_dest)
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ from calibre.utils.config import Config, StringConfig
|
|||||||
from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
|
from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
@ -280,7 +281,7 @@ class PreProcessor(object):
|
|||||||
return re.search('<H2[^><]*id=BookTitle', raw) is not None
|
return re.search('<H2[^><]*id=BookTitle', raw) is not None
|
||||||
|
|
||||||
def is_pdftohtml(self, src):
|
def is_pdftohtml(self, src):
|
||||||
return src.startswith('<!-- created by calibre\'s pdftohtml -->')
|
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||||
|
|
||||||
def preprocess(self, html):
|
def preprocess(self, html):
|
||||||
if self.is_baen(html):
|
if self.is_baen(html):
|
||||||
@ -335,6 +336,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
pretty_print=self.opts.pretty_print,
|
pretty_print=self.opts.pretty_print,
|
||||||
include_meta_content_type=True)
|
include_meta_content_type=True)
|
||||||
ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
|
ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
|
||||||
|
ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
|
||||||
f.write(ans)
|
f.write(ans)
|
||||||
return f.name
|
return f.name
|
||||||
|
|
||||||
@ -360,6 +362,8 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
body = self.root.xpath('//body')
|
body = self.root.xpath('//body')
|
||||||
if body:
|
if body:
|
||||||
self.body = body[0]
|
self.body = body[0]
|
||||||
|
for a in self.root.xpath('//a[@name]'):
|
||||||
|
a.set('id', a.get('name'))
|
||||||
|
|
||||||
def debug_tree(self, name):
|
def debug_tree(self, name):
|
||||||
'''
|
'''
|
||||||
@ -540,15 +544,19 @@ class Processor(Parser):
|
|||||||
css.append('#%s { %s }'%(id, setting))
|
css.append('#%s { %s }'%(id, setting))
|
||||||
|
|
||||||
for elem in self.root.xpath('//*[@style]'):
|
for elem in self.root.xpath('//*[@style]'):
|
||||||
if 'id' not in elem.keys():
|
id = get_id(elem, counter)
|
||||||
id = get_id(elem, counter)
|
counter += 1
|
||||||
counter += 1
|
|
||||||
css.append('#%s {%s}'%(id, elem.get('style')))
|
css.append('#%s {%s}'%(id, elem.get('style')))
|
||||||
elem.attrib.pop('style')
|
elem.attrib.pop('style')
|
||||||
|
|
||||||
self.raw_css = '\n\n'.join(css)
|
self.raw_css = '\n\n'.join(css)
|
||||||
self.css = unicode(self.raw_css)
|
self.css = unicode(self.raw_css)
|
||||||
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
self.do_layout()
|
||||||
|
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
||||||
|
|
||||||
|
def do_layout(self):
|
||||||
|
self.css += '\nbody {margin-top: 0pt; margin-botton: 0pt; margin-left: 0pt; margin-right: 0pt}\n'
|
||||||
|
self.css += '@page {margin-top: %fpt; margin-botton: %fpt; margin-left: %fpt; margin-right: %fpt}\n'%(self.opts.margin_top, self.opts.margin_bottom, self.opts.margin_left, self.opts.margin_right)
|
||||||
|
|
||||||
def config(defaults=None, config_name='html',
|
def config(defaults=None, config_name='html',
|
||||||
desc=_('Options to control the traversal of HTML')):
|
desc=_('Options to control the traversal of HTML')):
|
||||||
@ -575,6 +583,8 @@ def config(defaults=None, config_name='html',
|
|||||||
help=_('Set the title. Default is to autodetect.'))
|
help=_('Set the title. Default is to autodetect.'))
|
||||||
metadata('authors', ['-a', '--authors'], default=_('Unknown'),
|
metadata('authors', ['-a', '--authors'], default=_('Unknown'),
|
||||||
help=_('The author(s) of the ebook, as a comma separated list.'))
|
help=_('The author(s) of the ebook, as a comma separated list.'))
|
||||||
|
metadata('from_opf', ['--metadata-from'], default=None,
|
||||||
|
help=_('Load metadata from the specified OPF file'))
|
||||||
|
|
||||||
debug = c.add_group('debug', _('Options useful for debugging'))
|
debug = c.add_group('debug', _('Options useful for debugging'))
|
||||||
debug('verbose', ['-v', '--verbose'], default=0, action='count',
|
debug('verbose', ['-v', '--verbose'], default=0, action='count',
|
||||||
@ -648,7 +658,12 @@ def merge_metadata(htmlfile, opf, opts):
|
|||||||
if opf:
|
if opf:
|
||||||
mi = MetaInformation(opf)
|
mi = MetaInformation(opf)
|
||||||
else:
|
else:
|
||||||
mi = get_metadata(open(htmlfile, 'rb'), 'html')
|
try:
|
||||||
|
mi = get_metadata(open(htmlfile, 'rb'), 'html')
|
||||||
|
except:
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
if opts.from_opf is not None and os.access(opts.from_opf, os.R_OK):
|
||||||
|
mi.smart_update(OPF(open(opts.from_opf, 'rb'), os.path.abspath(os.path.dirname(opts.from_opf))))
|
||||||
if opts.title:
|
if opts.title:
|
||||||
mi.title = opts.title
|
mi.title = opts.title
|
||||||
if opts.authors != _('Unknown'):
|
if opts.authors != _('Unknown'):
|
||||||
|
@ -1,16 +1,22 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
||||||
"""
|
"""
|
||||||
Convert .fb2 files to .lrf
|
Convert .fb2 files to .lrf
|
||||||
"""
|
"""
|
||||||
import os, sys, tempfile, shutil, logging
|
import os, sys, shutil, logging
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from calibre import setup_cli_handlers, __appname__
|
from calibre import setup_cli_handlers
|
||||||
from calibre.resources import fb2_xsl
|
from calibre.resources import fb2_xsl
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = lrf_option_parser(
|
parser = lrf_option_parser(
|
||||||
@ -31,29 +37,42 @@ def extract_embedded_content(doc):
|
|||||||
data = b64decode(elem.text.strip())
|
data = b64decode(elem.text.strip())
|
||||||
open(fname, 'wb').write(data)
|
open(fname, 'wb').write(data)
|
||||||
|
|
||||||
def generate_html(fb2file, encoding, logger):
|
def to_html(fb2file, tdir):
|
||||||
from lxml import etree
|
cwd = os.getcwd()
|
||||||
tdir = tempfile.mkdtemp(prefix=__appname__+'_fb2_')
|
|
||||||
cwd = os.getcwdu()
|
|
||||||
os.chdir(tdir)
|
|
||||||
try:
|
try:
|
||||||
logger.info('Parsing XML...')
|
os.chdir(tdir)
|
||||||
|
print 'Parsing XML...'
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
doc = etree.parse(fb2file, parser)
|
doc = etree.parse(fb2file, parser)
|
||||||
extract_embedded_content(doc)
|
extract_embedded_content(doc)
|
||||||
logger.info('Converting XML to HTML...')
|
print 'Converting XML to HTML...'
|
||||||
styledoc = etree.fromstring(fb2_xsl)
|
styledoc = etree.fromstring(fb2_xsl)
|
||||||
|
|
||||||
transform = etree.XSLT(styledoc)
|
transform = etree.XSLT(styledoc)
|
||||||
result = transform(doc)
|
result = transform(doc)
|
||||||
html = os.path.join(tdir, 'index.html')
|
open('index.html', 'wb').write(transform.tostring(result))
|
||||||
f = open(html, 'wb')
|
try:
|
||||||
f.write(transform.tostring(result))
|
mi = get_metadata(open(fb2file, 'rb'))
|
||||||
f.close()
|
except:
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
if not mi.title:
|
||||||
|
mi.title = os.path.splitext(os.path.basename(fb2file))[0]
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = [_('Unknown')]
|
||||||
|
opf = OPFCreator(tdir, mi)
|
||||||
|
opf.create_manifest([('index.html', None)])
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
|
return os.path.join(tdir, 'metadata.opf')
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
return html
|
|
||||||
|
|
||||||
|
def generate_html(fb2file, encoding, logger):
|
||||||
|
tdir = PersistentTemporaryDirectory('_fb22lrf')
|
||||||
|
to_html(fb2file, tdir)
|
||||||
|
return os.path.join(tdir, 'index.html')
|
||||||
|
|
||||||
def process_file(path, options, logger=None):
|
def process_file(path, options, logger=None):
|
||||||
if logger is None:
|
if logger is None:
|
||||||
level = logging.DEBUG if options.verbose else logging.INFO
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
|
@ -9,6 +9,9 @@ from calibre.ebooks import ConversionError
|
|||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.pdf import get_metadata
|
||||||
|
|
||||||
PDFTOHTML = 'pdftohtml'
|
PDFTOHTML = 'pdftohtml'
|
||||||
popen = subprocess.Popen
|
popen = subprocess.Popen
|
||||||
@ -20,7 +23,7 @@ if iswindows and hasattr(sys, 'frozen'):
|
|||||||
if islinux and getattr(sys, 'frozen_path', False):
|
if islinux and getattr(sys, 'frozen_path', False):
|
||||||
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
||||||
|
|
||||||
def generate_html(pathtopdf, logger):
|
def generate_html(pathtopdf, tdir):
|
||||||
'''
|
'''
|
||||||
Convert the pdf into html.
|
Convert the pdf into html.
|
||||||
@return: Path to a temporary file containing the HTML.
|
@return: Path to a temporary file containing the HTML.
|
||||||
@ -29,10 +32,10 @@ def generate_html(pathtopdf, logger):
|
|||||||
pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
|
pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
|
||||||
if not os.access(pathtopdf, os.R_OK):
|
if not os.access(pathtopdf, os.R_OK):
|
||||||
raise ConversionError, 'Cannot read from ' + pathtopdf
|
raise ConversionError, 'Cannot read from ' + pathtopdf
|
||||||
tdir = PersistentTemporaryDirectory('pdftohtml')
|
|
||||||
index = os.path.join(tdir, 'index.html')
|
index = os.path.join(tdir, 'index.html')
|
||||||
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
|
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
|
||||||
cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', pathtopdf, os.path.basename(index))
|
pathtopdf = os.path.abspath(pathtopdf)
|
||||||
|
cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', pathtopdf, os.path.basename(index))
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -44,16 +47,30 @@ def generate_html(pathtopdf, logger):
|
|||||||
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
|
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
logger.info(p.stdout.read())
|
print p.stdout.read()
|
||||||
ret = p.wait()
|
ret = p.wait()
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
err = p.stderr.read()
|
err = p.stderr.read()
|
||||||
raise ConversionError, err
|
raise ConversionError, err
|
||||||
if not os.path.exists(index) or os.stat(index).st_size < 100:
|
if not os.path.exists(index) or os.stat(index).st_size < 100:
|
||||||
raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
|
raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
|
||||||
raw = open(index).read(4000)
|
|
||||||
if not '<br' in raw:
|
raw = open(index, 'rb').read()
|
||||||
|
open(index, 'wb').write('<!-- created by calibre\'s pdftohtml -->\n'+raw)
|
||||||
|
if not '<br' in raw[:4000]:
|
||||||
raise ConversionError(os.path.basename(pathtopdf) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
raise ConversionError(os.path.basename(pathtopdf) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
||||||
|
try:
|
||||||
|
mi = get_metadata(open(pathtopdf, 'rb'))
|
||||||
|
except:
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
if not mi.title:
|
||||||
|
mi.title = os.path.splitext(os.path.basename(pathtopdf))[0]
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = [_('Unknown')]
|
||||||
|
opf = OPFCreator(tdir, mi)
|
||||||
|
opf.create_manifest([('index.html', None)])
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
return index
|
return index
|
||||||
@ -72,7 +89,8 @@ def process_file(path, options, logger=None):
|
|||||||
logger = logging.getLogger('pdf2lrf')
|
logger = logging.getLogger('pdf2lrf')
|
||||||
setup_cli_handlers(logger, level)
|
setup_cli_handlers(logger, level)
|
||||||
pdf = os.path.abspath(os.path.expanduser(path))
|
pdf = os.path.abspath(os.path.expanduser(path))
|
||||||
htmlfile = generate_html(pdf, logger)
|
tdir = PersistentTemporaryDirectory('_pdf2lrf')
|
||||||
|
htmlfile = generate_html(pdf, tdir)
|
||||||
if not options.output:
|
if not options.output:
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
|
@ -1,17 +1,20 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import os, sys, tempfile, shutil, logging, glob
|
import os, sys, shutil, logging, glob
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from calibre import setup_cli_handlers, __appname__
|
from calibre import setup_cli_handlers
|
||||||
from calibre.libwand import convert, WandException
|
from calibre.libwand import convert, WandException
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||||
from calibre.ebooks.lrf.rtf.xsl import xhtml
|
from calibre.ebooks.lrf.rtf.xsl import xhtml
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = lrf_option_parser(
|
parser = lrf_option_parser(
|
||||||
@ -44,8 +47,8 @@ def process_file(path, options, logger=None):
|
|||||||
f = open(rtf, 'rb')
|
f = open(rtf, 'rb')
|
||||||
mi = get_metadata(f, 'rtf')
|
mi = get_metadata(f, 'rtf')
|
||||||
f.close()
|
f.close()
|
||||||
html = generate_html(rtf, logger)
|
tdir = PersistentTemporaryDirectory('_rtf2lrf')
|
||||||
tdir = os.path.dirname(html)
|
html = generate_html(rtf, tdir)
|
||||||
cwd = os.getcwdu()
|
cwd = os.getcwdu()
|
||||||
try:
|
try:
|
||||||
if not options.output:
|
if not options.output:
|
||||||
@ -83,12 +86,12 @@ def main(args=sys.argv, logger=None):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def generate_xml(rtfpath):
|
def generate_xml(rtfpath, tdir):
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||||
tdir = tempfile.mkdtemp(prefix=__appname__+'_')
|
|
||||||
ofile = os.path.join(tdir, 'index.xml')
|
ofile = os.path.join(tdir, 'index.xml')
|
||||||
cwd = os.getcwdu()
|
cwd = os.getcwdu()
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
|
rtfpath = os.path.abspath(rtfpath)
|
||||||
try:
|
try:
|
||||||
parser = ParseRtf(
|
parser = ParseRtf(
|
||||||
in_file = rtfpath,
|
in_file = rtfpath,
|
||||||
@ -134,26 +137,27 @@ def generate_xml(rtfpath):
|
|||||||
return ofile
|
return ofile
|
||||||
|
|
||||||
|
|
||||||
def generate_html(rtfpath, logger):
|
def generate_html(rtfpath, tdir):
|
||||||
logger.info('Converting RTF to XML...')
|
print 'Converting RTF to XML...'
|
||||||
|
rtfpath = os.path.abspath(rtfpath)
|
||||||
try:
|
try:
|
||||||
xml = generate_xml(rtfpath)
|
xml = generate_xml(rtfpath, tdir)
|
||||||
except RtfInvalidCodeException:
|
except RtfInvalidCodeException:
|
||||||
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
|
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
|
||||||
tdir = os.path.dirname(xml)
|
tdir = os.path.dirname(xml)
|
||||||
cwd = os.getcwdu()
|
cwd = os.getcwdu()
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
try:
|
try:
|
||||||
logger.info('Parsing XML...')
|
print 'Parsing XML...'
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
try:
|
try:
|
||||||
doc = etree.parse(xml, parser)
|
doc = etree.parse(xml, parser)
|
||||||
except:
|
except:
|
||||||
raise
|
raise
|
||||||
logger.info('Parsing failed. Trying to clean up XML...')
|
print 'Parsing failed. Trying to clean up XML...'
|
||||||
soup = BeautifulStoneSoup(open(xml, 'rb').read())
|
soup = BeautifulStoneSoup(open(xml, 'rb').read())
|
||||||
doc = etree.fromstring(str(soup))
|
doc = etree.fromstring(str(soup))
|
||||||
logger.info('Converting XML to HTML...')
|
print 'Converting XML to HTML...'
|
||||||
styledoc = etree.fromstring(xhtml)
|
styledoc = etree.fromstring(xhtml)
|
||||||
|
|
||||||
transform = etree.XSLT(styledoc)
|
transform = etree.XSLT(styledoc)
|
||||||
@ -161,8 +165,22 @@ def generate_html(rtfpath, logger):
|
|||||||
tdir = os.path.dirname(xml)
|
tdir = os.path.dirname(xml)
|
||||||
html = os.path.join(tdir, 'index.html')
|
html = os.path.join(tdir, 'index.html')
|
||||||
f = open(html, 'wb')
|
f = open(html, 'wb')
|
||||||
f.write(transform.tostring(result))
|
res = transform.tostring(result)
|
||||||
|
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
|
f.write(res)
|
||||||
f.close()
|
f.close()
|
||||||
|
try:
|
||||||
|
mi = get_metadata(open(rtfpath, 'rb'))
|
||||||
|
except:
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
if not mi.title:
|
||||||
|
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = [_('Unknown')]
|
||||||
|
opf = OPFCreator(tdir, mi)
|
||||||
|
opf.create_manifest([('index.html', None)])
|
||||||
|
opf.create_spine(['index.html'])
|
||||||
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
finally:
|
finally:
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
return html
|
return html
|
||||||
|
@ -5,12 +5,14 @@ Convert .txt files to .lrf
|
|||||||
"""
|
"""
|
||||||
import os, sys, codecs, logging
|
import os, sys, codecs, logging
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
from calibre.ebooks import ConversionError
|
from calibre.ebooks import ConversionError
|
||||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
from calibre.ebooks.markdown import markdown
|
from calibre.ebooks.markdown import markdown
|
||||||
from calibre import setup_cli_handlers
|
from calibre import setup_cli_handlers
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = lrf_option_parser(
|
parser = lrf_option_parser(
|
||||||
@ -23,7 +25,7 @@ _('''%prog [options] mybook.txt
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def generate_html(txtfile, encoding, logger):
|
def generate_html(txtfile, encoding, tdir):
|
||||||
'''
|
'''
|
||||||
Convert txtfile to html and return a PersistentTemporaryFile object pointing
|
Convert txtfile to html and return a PersistentTemporaryFile object pointing
|
||||||
to the file with the HTML.
|
to the file with the HTML.
|
||||||
@ -44,15 +46,19 @@ def generate_html(txtfile, encoding, logger):
|
|||||||
else:
|
else:
|
||||||
txt = codecs.open(txtfile, 'rb', enc).read()
|
txt = codecs.open(txtfile, 'rb', enc).read()
|
||||||
|
|
||||||
logger.info('Converting text to HTML...')
|
print 'Converting text to HTML...'
|
||||||
md = markdown.Markdown(
|
md = markdown.Markdown(
|
||||||
extensions=['footnotes', 'tables', 'toc'],
|
extensions=['footnotes', 'tables', 'toc'],
|
||||||
safe_mode=False,
|
safe_mode=False,
|
||||||
)
|
)
|
||||||
html = md.convert(txt)
|
html = '<html><body>'+md.convert(txt)+'</body></html>'
|
||||||
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
|
p = os.path.join(tdir, 'index.html')
|
||||||
p.close()
|
open(p, 'wb').write(html.encode('utf-8'))
|
||||||
codecs.open(p.name, 'wb', 'utf8').write(html)
|
mi = MetaInformation(os.path.splitext(os.path.basename(txtfile))[0], [_('Unknown')])
|
||||||
|
opf = OPFCreator(tdir, mi)
|
||||||
|
opf.create_manifest([(os.path.join(tdir, 'index.html'), None)])
|
||||||
|
opf.create_spine([os.path.join(tdir, 'index.html')])
|
||||||
|
opf.render(open(os.path.join(tdir, 'metadata.opf'), 'wb'))
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def process_file(path, options, logger=None):
|
def process_file(path, options, logger=None):
|
||||||
@ -63,7 +69,8 @@ def process_file(path, options, logger=None):
|
|||||||
txt = os.path.abspath(os.path.expanduser(path))
|
txt = os.path.abspath(os.path.expanduser(path))
|
||||||
if not hasattr(options, 'debug_html_generation'):
|
if not hasattr(options, 'debug_html_generation'):
|
||||||
options.debug_html_generation = False
|
options.debug_html_generation = False
|
||||||
htmlfile = generate_html(txt, options.encoding, logger)
|
tdir = PersistentTemporaryDirectory('_txt2lrf')
|
||||||
|
htmlfile = generate_html(txt, options.encoding, tdir)
|
||||||
options.encoding = 'utf-8'
|
options.encoding = 'utf-8'
|
||||||
if not options.debug_html_generation:
|
if not options.debug_html_generation:
|
||||||
options.force_page_break = 'h2'
|
options.force_page_break = 'h2'
|
||||||
@ -73,9 +80,9 @@ def process_file(path, options, logger=None):
|
|||||||
options.output = os.path.abspath(os.path.expanduser(options.output))
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
if not options.title:
|
if not options.title:
|
||||||
options.title = os.path.splitext(os.path.basename(path))[0]
|
options.title = os.path.splitext(os.path.basename(path))[0]
|
||||||
html_process_file(htmlfile.name, options, logger)
|
html_process_file(htmlfile, options, logger)
|
||||||
else:
|
else:
|
||||||
print open(htmlfile.name, 'rb').read()
|
print open(htmlfile, 'rb').read()
|
||||||
|
|
||||||
def main(args=sys.argv, logger=None):
|
def main(args=sys.argv, logger=None):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
|
@ -200,10 +200,10 @@ class MetaInformation(object):
|
|||||||
Merge the information in C{mi} into self. In case of conflicts, the information
|
Merge the information in C{mi} into self. In case of conflicts, the information
|
||||||
in C{mi} takes precedence, unless the information in mi is NULL.
|
in C{mi} takes precedence, unless the information in mi is NULL.
|
||||||
'''
|
'''
|
||||||
if mi.title and mi.title.lower() != 'unknown':
|
if mi.title and mi.title != _('Unknown'):
|
||||||
self.title = mi.title
|
self.title = mi.title
|
||||||
|
|
||||||
if mi.authors and mi.authors[0].lower() != 'unknown':
|
if mi.authors and mi.authors[0] != _('Unknown'):
|
||||||
self.authors = mi.authors
|
self.authors = mi.authors
|
||||||
|
|
||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
|
@ -12,7 +12,7 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
import Image as PILImage
|
import Image as PILImage
|
||||||
|
|
||||||
from calibre import __appname__
|
from calibre import __appname__, entity_to_unicode
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
@ -263,17 +263,19 @@ class MobiReader(object):
|
|||||||
if ref.type.lower() == 'toc':
|
if ref.type.lower() == 'toc':
|
||||||
toc = ref.href()
|
toc = ref.href()
|
||||||
if toc:
|
if toc:
|
||||||
index = self.processed_html.find('<a name="%s"'%toc.partition('#')[-1])
|
index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1]))
|
||||||
tocobj = None
|
tocobj = None
|
||||||
|
ent_pat = re.compile(r'&(\S+?);')
|
||||||
if index > -1:
|
if index > -1:
|
||||||
raw = '<html><body>'+self.processed_html[index:]
|
raw = '<html><body>'+self.processed_html[index:]
|
||||||
soup = BeautifulSoup(raw)
|
soup = BeautifulSoup(raw)
|
||||||
tocobj = TOC()
|
tocobj = TOC()
|
||||||
for a in soup.findAll('a', href=True):
|
for a in soup.findAll('a', href=True):
|
||||||
try:
|
try:
|
||||||
text = ''.join(a.findAll(text=True)).strip()
|
text = u''.join(a.findAll(text=True)).strip()
|
||||||
except:
|
except:
|
||||||
text = ''
|
text = ''
|
||||||
|
text = ent_pat.sub(entity_to_unicode, text)
|
||||||
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
|
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
|
||||||
if tocobj is not None:
|
if tocobj is not None:
|
||||||
opf.set_toc(tocobj)
|
opf.set_toc(tocobj)
|
||||||
@ -353,7 +355,7 @@ class MobiReader(object):
|
|||||||
r = self.mobi_html.find('>', end)
|
r = self.mobi_html.find('>', end)
|
||||||
if r > -1 and r < l: # Move out of tag
|
if r > -1 and r < l: # Move out of tag
|
||||||
end = r+1
|
end = r+1
|
||||||
self.processed_html += self.mobi_html[pos:end] + '<a name="filepos%d"></a>'%oend
|
self.processed_html += self.mobi_html[pos:end] + '<a id="filepos%d" name="filepos%d"></a>'%(oend, oend)
|
||||||
pos = end
|
pos = end
|
||||||
|
|
||||||
self.processed_html += self.mobi_html[pos:]
|
self.processed_html += self.mobi_html[pos:]
|
||||||
|
@ -43,6 +43,7 @@ entry_points = {
|
|||||||
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
|
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
|
||||||
'fb2-meta = calibre.ebooks.metadata.fb2:main',
|
'fb2-meta = calibre.ebooks.metadata.fb2:main',
|
||||||
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
|
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
|
||||||
|
'any2epub = calibre.ebooks.epub.from_any:main',
|
||||||
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
|
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
|
||||||
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
|
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
|
||||||
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
|
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
|
||||||
@ -174,8 +175,10 @@ def setup_completion(fatal_errors):
|
|||||||
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
|
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
|
||||||
from calibre.ebooks.epub.from_html import option_parser as html2epub
|
from calibre.ebooks.epub.from_html import option_parser as html2epub
|
||||||
from calibre.ebooks.html import option_parser as html2oeb
|
from calibre.ebooks.html import option_parser as html2oeb
|
||||||
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
|
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
|
||||||
|
from calibre.ebooks.epub.from_any import option_parser as any2epub
|
||||||
|
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||||
|
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']
|
||||||
f = open_file('/etc/bash_completion.d/libprs500')
|
f = open_file('/etc/bash_completion.d/libprs500')
|
||||||
f.close()
|
f.close()
|
||||||
os.remove(f.name)
|
os.remove(f.name)
|
||||||
@ -193,9 +196,8 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_exts('mobi2lrf', htmlop, ['mobi', 'prc']))
|
f.write(opts_and_exts('mobi2lrf', htmlop, ['mobi', 'prc']))
|
||||||
f.write(opts_and_exts('fb22lrf', htmlop, ['fb2']))
|
f.write(opts_and_exts('fb22lrf', htmlop, ['fb2']))
|
||||||
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
||||||
f.write(opts_and_exts('any2lrf', htmlop,
|
f.write(opts_and_exts('any2lrf', htmlop, any_formats))
|
||||||
['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
f.write(opts_and_exts('any2lrf', any2epub, any_formats))
|
||||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']))
|
|
||||||
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
|
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
|
||||||
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
|
||||||
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
||||||
|
@ -177,6 +177,12 @@ class Option(object):
|
|||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return self.name == getattr(other, 'name', other)
|
return self.name == getattr(other, 'name', other)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Option: '+self.name
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self)
|
||||||
|
|
||||||
class OptionValues(object):
|
class OptionValues(object):
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user