diff --git a/installer/osx/freeze.py b/installer/osx/freeze.py
index 11aa858b02..f1e7c1bb33 100644
--- a/installer/osx/freeze.py
+++ b/installer/osx/freeze.py
@@ -317,7 +317,8 @@ def main():
'mechanize', 'ClientForm', 'usbobserver',
'genshi', 'calibre.web.feeds.recipes.*',
'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
- 'keyword', 'codeop', 'pydoc', 'readline'],
+ 'keyword', 'codeop', 'pydoc', 'readline',
+ 'BeautifulSoup'],
'packages' : ['PIL', 'Authorization', 'lxml'],
'excludes' : ['IPython'],
'plist' : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py
index a181ba0228..70f15676c2 100644
--- a/installer/windows/freeze.py
+++ b/installer/windows/freeze.py
@@ -152,7 +152,7 @@ def main(args=sys.argv):
'win32process', 'win32api', 'msvcrt',
'win32event', 'calibre.ebooks.lrf.any.*',
'calibre.ebooks.lrf.feeds.*',
- 'genshi',
+ 'genshi', 'BeautifulSoup',
'path', 'pydoc', 'IPython.Extensions.*',
'calibre.web.feeds.recipes.*',
'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 5e7a2df8eb..1a70a6969c 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -317,6 +317,11 @@ class LoggingInterface:
def log_exception(self, msg, *args):
self.___log(self.__logger.exception, msg, args, {})
+def walk(dir):
+ ''' A nice interface to os.walk '''
+ for record in os.walk(dir):
+ for f in record[-1]:
+ yield os.path.join(record[0], f)
def strftime(fmt, t=time.localtime()):
''' A version of strtime that returns unicode strings. '''
diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index 5bb2f0fe7c..2de4879b0f 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -44,6 +44,7 @@ def config(defaults=None):
c.add_opt('output', ['-o', '--output'], default=None,
help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
+
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
help=_('''\
@@ -74,6 +75,16 @@ to auto-generate a Table of Contents.
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
help=_("Don't add auto-detected chapters to the Table of Contents."))
+ layout = c.add_group('page layout', _('Control page layout'))
+ layout('margin_top', ['--margin-top'], default=5.0,
+ help=_('Set the top margin in pts. Default is %default'))
+ layout('margin_bottom', ['--margin-bottom'], default=5.0,
+ help=_('Set the bottom margin in pts. Default is %default'))
+ layout('margin_left', ['--margin-left'], default=5.0,
+ help=_('Set the left margin in pts. Default is %default'))
+ layout('margin_right', ['--margin-right'], default=5.0,
+ help=_('Set the right margin in pts. Default is %default'))
+
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
help=_('Print generated OPF file to stdout'))
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
new file mode 100644
index 0000000000..be1d69dba8
--- /dev/null
+++ b/src/calibre/ebooks/epub/from_any.py
@@ -0,0 +1,154 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert any ebook format to epub.
+'''
+
+import sys, os, re
+from contextlib import nested
+
+from calibre import extract, walk
+from calibre.ebooks.epub import config as common_config
+from calibre.ebooks.epub.from_html import convert as html2epub
+from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator
+
+def lit2opf(path, tdir, opts):
+ from calibre.ebooks.lit.reader import LitReader
+ print 'Exploding LIT file:', path
+ reader = LitReader(path)
+ reader.extract_content(tdir, False)
+ for f in walk(tdir):
+ if f.lower().endswith('.opf'):
+ return f
+
+def mobi2opf(path, tdir, opts):
+ from calibre.ebooks.mobi.reader import MobiReader
+ print 'Exploding MOBI file:', path
+ reader = MobiReader(path)
+ reader.extract_content(tdir)
+ files = list(walk(tdir))
+ for f in files:
+ if f.lower().endswith('.opf'):
+ return f
+ html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
+ hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
+ mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
+ opf = OPFCreator(tdir, mi)
+ opf.create_manifest([(hf[0], None)])
+ opf.create_spine([hf[0]])
+ ans = os.path.join(tdir, 'metadata.opf')
+ opf.render(open(ans, 'wb'))
+ return ans
+
+def fb22opf(path, tdir, opts):
+ from calibre.ebooks.lrf.fb2.convert_from import to_html
+ print 'Converting FB2 to HTML...'
+ return to_html(path, tdir)
+
+def rtf2opf(path, tdir, opts):
+ from calibre.ebooks.lrf.rtf.convert_from import generate_html
+ generate_html(path, tdir)
+ return os.path.join(tdir, 'metadata.opf')
+
+def txt2opf(path, tdir, opts):
+ from calibre.ebooks.lrf.txt.convert_from import generate_html
+ generate_html(path, opts.encoding, tdir)
+ return os.path.join(tdir, 'metadata.opf')
+
+def pdf2opf(path, tdir, opts):
+ from calibre.ebooks.lrf.pdf.convert_from import generate_html
+ generate_html(path, tdir)
+ return os.path.join(tdir, 'metadata.opf')
+
+MAP = {
+ 'lit' : lit2opf,
+ 'mobi' : mobi2opf,
+ 'prc' : mobi2opf,
+ 'fb2' : fb22opf,
+ 'rtf' : rtf2opf,
+ 'txt' : txt2opf,
+ 'pdf' : pdf2opf,
+ }
+
+
+def unarchive(path, tdir):
+ extract(path, tdir)
+ files = list(walk(tdir))
+
+ for ext in ['opf'] + list(MAP.keys()):
+ for f in files:
+ if f.lower().endswith('.'+ext):
+ if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
+ continue
+ return f, ext
+ html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
+ html_files = [f for f in files if html_pat.search(f) is not None]
+ if not html_files:
+ raise ValueError(_('Could not find an ebook inside the archive'))
+ html_files = [(f, os.stat(f).st_size) for f in html_files]
+ html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+ html_files = [f[0] for f in html_files]
+ for q in ('toc', 'index'):
+ for f in html_files:
+ if os.path.splitext(f)[0].lower() == q:
+ return f, os.path.splitext(f)[1].lower()[1:]
+ return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
+
+def any2epub(opts, path, notification=None):
+ ext = os.path.splitext(path)[1]
+ if not ext:
+ raise ValueError('Unknown file type: '+path)
+ ext = ext.lower()[1:]
+
+ if opts.output is None:
+ opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
+
+ with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
+ if ext in ['rar', 'zip']:
+ path, ext = unarchive(path, tdir1)
+ print 'Found %s file in archive'%(ext.upper())
+
+ if ext in MAP.keys():
+ path = MAP[ext](path, tdir2, opts)
+ ext = 'opf'
+
+
+ if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
+ raise ValueError('Conversion from %s is not supported'%ext.upper())
+
+ print 'Creating EPUB file...'
+ html2epub(path, opts, notification=notification)
+
+def config(defaults=None):
+ return common_config(defaults=defaults)
+
+
+def formats():
+ return ['html', 'rar', 'zip']+list(MAP.keys())
+
+def option_parser():
+
+ return config().option_parser(usage=_('''\
+%%prog [options] filename
+
+Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
+''')%formats()
+)
+
+def main(args=sys.argv):
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ if len(args) < 2:
+ parser.print_help()
+ print 'No input file specified.'
+ return 1
+ any2epub(opts, args[1])
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
\ No newline at end of file
diff --git a/src/calibre/ebooks/epub/from_feeds.py b/src/calibre/ebooks/epub/from_feeds.py
index ce72388066..9e90b85469 100644
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@@ -29,7 +29,6 @@ def option_parser():
def convert(opts, recipe_arg, notification=None):
opts.lrf = False
opts.epub = True
- opts.chapter_mark = 'none'
if opts.debug:
opts.verbose = 2
parser = option_parser()
@@ -40,6 +39,7 @@ def convert(opts, recipe_arg, notification=None):
recipe_opts = c.parse_string(recipe.html2epub_options)
c.smart_update(recipe_opts, opts)
opts = recipe_opts
+ opts.chapter_mark = 'none'
opf = glob.glob(os.path.join(tdir, '*.opf'))
if not opf:
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index ae5124d031..1824c83f41 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -4,7 +4,12 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import os, sys, re, shutil, cStringIO
+
from lxml.etree import XPath
+try:
+ from PIL import Image as PILImage
+except ImportError:
+ import Image as PILImage
from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
opf_traverse, create_metadata, rebase_toc
@@ -106,8 +111,8 @@ def convert(htmlfile, opts, notification=None):
cover_src = opts.cover
if cover_src is not None:
- cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
- shutil.copyfile(cover_src, cover_dest)
+ cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
+ PILImage.open(cover_src).convert('RGB').save(cover_dest)
mi.cover = cover_dest
resources.append(cover_dest)
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index c414a97f37..9c9c6b7274 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -23,6 +23,7 @@ from calibre.utils.config import Config, StringConfig
from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.meta import get_metadata
+from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile
@@ -280,7 +281,7 @@ class PreProcessor(object):
return re.search('
<]*id=BookTitle', raw) is not None
def is_pdftohtml(self, src):
- return src.startswith('')
+ return '' in src[:1000]
def preprocess(self, html):
if self.is_baen(html):
@@ -335,6 +336,7 @@ class Parser(PreProcessor, LoggingInterface):
pretty_print=self.opts.pretty_print,
include_meta_content_type=True)
ans = re.compile(r'', re.IGNORECASE).sub('', ans)
+ ans = re.compile(r']*?>', re.IGNORECASE).sub('\n\n', ans)
f.write(ans)
return f.name
@@ -360,6 +362,8 @@ class Parser(PreProcessor, LoggingInterface):
body = self.root.xpath('//body')
if body:
self.body = body[0]
+ for a in self.root.xpath('//a[@name]'):
+ a.set('id', a.get('name'))
def debug_tree(self, name):
'''
@@ -540,15 +544,19 @@ class Processor(Parser):
css.append('#%s { %s }'%(id, setting))
for elem in self.root.xpath('//*[@style]'):
- if 'id' not in elem.keys():
- id = get_id(elem, counter)
- counter += 1
+ id = get_id(elem, counter)
+ counter += 1
css.append('#%s {%s}'%(id, elem.get('style')))
elem.attrib.pop('style')
self.raw_css = '\n\n'.join(css)
self.css = unicode(self.raw_css)
- # TODO: Figure out what to do about CSS imports from linked stylesheets
+ self.do_layout()
+ # TODO: Figure out what to do about CSS imports from linked stylesheets
+
+ def do_layout(self):
+ self.css += '\nbody {margin-top: 0pt; margin-botton: 0pt; margin-left: 0pt; margin-right: 0pt}\n'
+ self.css += '@page {margin-top: %fpt; margin-botton: %fpt; margin-left: %fpt; margin-right: %fpt}\n'%(self.opts.margin_top, self.opts.margin_bottom, self.opts.margin_left, self.opts.margin_right)
def config(defaults=None, config_name='html',
desc=_('Options to control the traversal of HTML')):
@@ -575,6 +583,8 @@ def config(defaults=None, config_name='html',
help=_('Set the title. Default is to autodetect.'))
metadata('authors', ['-a', '--authors'], default=_('Unknown'),
help=_('The author(s) of the ebook, as a comma separated list.'))
+ metadata('from_opf', ['--metadata-from'], default=None,
+ help=_('Load metadata from the specified OPF file'))
debug = c.add_group('debug', _('Options useful for debugging'))
debug('verbose', ['-v', '--verbose'], default=0, action='count',
@@ -648,7 +658,12 @@ def merge_metadata(htmlfile, opf, opts):
if opf:
mi = MetaInformation(opf)
else:
- mi = get_metadata(open(htmlfile, 'rb'), 'html')
+ try:
+ mi = get_metadata(open(htmlfile, 'rb'), 'html')
+ except:
+ mi = MetaInformation(None, None)
+ if opts.from_opf is not None and os.access(opts.from_opf, os.R_OK):
+ mi.smart_update(OPF(open(opts.from_opf, 'rb'), os.path.abspath(os.path.dirname(opts.from_opf))))
if opts.title:
mi.title = opts.title
if opts.authors != _('Unknown'):
diff --git a/src/calibre/ebooks/lrf/fb2/convert_from.py b/src/calibre/ebooks/lrf/fb2/convert_from.py
index 27c55757be..dde1ce78e0 100644
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@@ -1,16 +1,22 @@
+from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin '
"""
Convert .fb2 files to .lrf
"""
-import os, sys, tempfile, shutil, logging
+import os, sys, shutil, logging
from base64 import b64decode
-
+from lxml import etree
+
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers, __appname__
+from calibre import setup_cli_handlers
from calibre.resources import fb2_xsl
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.ebooks.metadata.opf import OPFCreator
+from calibre.ebooks.metadata import MetaInformation
+
def option_parser():
parser = lrf_option_parser(
@@ -31,29 +37,42 @@ def extract_embedded_content(doc):
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)
-def generate_html(fb2file, encoding, logger):
- from lxml import etree
- tdir = tempfile.mkdtemp(prefix=__appname__+'_fb2_')
- cwd = os.getcwdu()
- os.chdir(tdir)
+def to_html(fb2file, tdir):
+ cwd = os.getcwd()
try:
- logger.info('Parsing XML...')
+ os.chdir(tdir)
+ print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(fb2file, parser)
extract_embedded_content(doc)
- logger.info('Converting XML to HTML...')
+ print 'Converting XML to HTML...'
styledoc = etree.fromstring(fb2_xsl)
-
+
transform = etree.XSLT(styledoc)
result = transform(doc)
- html = os.path.join(tdir, 'index.html')
- f = open(html, 'wb')
- f.write(transform.tostring(result))
- f.close()
+ open('index.html', 'wb').write(transform.tostring(result))
+ try:
+ mi = get_metadata(open(fb2file, 'rb'))
+ except:
+ mi = MetaInformation(None, None)
+ if not mi.title:
+ mi.title = os.path.splitext(os.path.basename(fb2file))[0]
+ if not mi.authors:
+ mi.authors = [_('Unknown')]
+ opf = OPFCreator(tdir, mi)
+ opf.create_manifest([('index.html', None)])
+ opf.create_spine(['index.html'])
+ opf.render(open('metadata.opf', 'wb'))
+ return os.path.join(tdir, 'metadata.opf')
finally:
os.chdir(cwd)
- return html
-
+
+
+def generate_html(fb2file, encoding, logger):
+ tdir = PersistentTemporaryDirectory('_fb22lrf')
+ to_html(fb2file, tdir)
+ return os.path.join(tdir, 'index.html')
+
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
diff --git a/src/calibre/ebooks/lrf/pdf/convert_from.py b/src/calibre/ebooks/lrf/pdf/convert_from.py
index 1d1260de2c..5bec2265b7 100644
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@@ -9,6 +9,9 @@ from calibre.ebooks import ConversionError
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator
+from calibre.ebooks.metadata.pdf import get_metadata
PDFTOHTML = 'pdftohtml'
popen = subprocess.Popen
@@ -20,7 +23,7 @@ if iswindows and hasattr(sys, 'frozen'):
if islinux and getattr(sys, 'frozen_path', False):
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
-def generate_html(pathtopdf, logger):
+def generate_html(pathtopdf, tdir):
'''
Convert the pdf into html.
@return: Path to a temporary file containing the HTML.
@@ -29,10 +32,10 @@ def generate_html(pathtopdf, logger):
pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
if not os.access(pathtopdf, os.R_OK):
raise ConversionError, 'Cannot read from ' + pathtopdf
- tdir = PersistentTemporaryDirectory('pdftohtml')
index = os.path.join(tdir, 'index.html')
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
- cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', pathtopdf, os.path.basename(index))
+ pathtopdf = os.path.abspath(pathtopdf)
+ cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', pathtopdf, os.path.basename(index))
cwd = os.getcwd()
try:
@@ -44,16 +47,30 @@ def generate_html(pathtopdf, logger):
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
else:
raise
- logger.info(p.stdout.read())
+ print p.stdout.read()
ret = p.wait()
if ret != 0:
err = p.stderr.read()
raise ConversionError, err
if not os.path.exists(index) or os.stat(index).st_size < 100:
raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
- raw = open(index).read(4000)
- if not '
\n'+raw)
+ if not '
'
+ p = os.path.join(tdir, 'index.html')
+ open(p, 'wb').write(html.encode('utf-8'))
+ mi = MetaInformation(os.path.splitext(os.path.basename(txtfile))[0], [_('Unknown')])
+ opf = OPFCreator(tdir, mi)
+ opf.create_manifest([(os.path.join(tdir, 'index.html'), None)])
+ opf.create_spine([os.path.join(tdir, 'index.html')])
+ opf.render(open(os.path.join(tdir, 'metadata.opf'), 'wb'))
return p
def process_file(path, options, logger=None):
@@ -63,7 +69,8 @@ def process_file(path, options, logger=None):
txt = os.path.abspath(os.path.expanduser(path))
if not hasattr(options, 'debug_html_generation'):
options.debug_html_generation = False
- htmlfile = generate_html(txt, options.encoding, logger)
+ tdir = PersistentTemporaryDirectory('_txt2lrf')
+ htmlfile = generate_html(txt, options.encoding, tdir)
options.encoding = 'utf-8'
if not options.debug_html_generation:
options.force_page_break = 'h2'
@@ -73,9 +80,9 @@ def process_file(path, options, logger=None):
options.output = os.path.abspath(os.path.expanduser(options.output))
if not options.title:
options.title = os.path.splitext(os.path.basename(path))[0]
- html_process_file(htmlfile.name, options, logger)
+ html_process_file(htmlfile, options, logger)
else:
- print open(htmlfile.name, 'rb').read()
+ print open(htmlfile, 'rb').read()
def main(args=sys.argv, logger=None):
parser = option_parser()
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 934617a416..33fea3b3ab 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -200,10 +200,10 @@ class MetaInformation(object):
Merge the information in C{mi} into self. In case of conflicts, the information
in C{mi} takes precedence, unless the information in mi is NULL.
'''
- if mi.title and mi.title.lower() != 'unknown':
+ if mi.title and mi.title != _('Unknown'):
self.title = mi.title
- if mi.authors and mi.authors[0].lower() != 'unknown':
+ if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'comments', 'category',
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index c51e2b93b3..55c6be0ae9 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -12,7 +12,7 @@ try:
except ImportError:
import Image as PILImage
-from calibre import __appname__
+from calibre import __appname__, entity_to_unicode
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
@@ -263,17 +263,19 @@ class MobiReader(object):
if ref.type.lower() == 'toc':
toc = ref.href()
if toc:
- index = self.processed_html.find(' -1:
raw = ''+self.processed_html[index:]
soup = BeautifulSoup(raw)
tocobj = TOC()
for a in soup.findAll('a', href=True):
try:
- text = ''.join(a.findAll(text=True)).strip()
+ text = u''.join(a.findAll(text=True)).strip()
except:
text = ''
+ text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
if tocobj is not None:
opf.set_toc(tocobj)
@@ -353,7 +355,7 @@ class MobiReader(object):
r = self.mobi_html.find('>', end)
if r > -1 and r < l: # Move out of tag
end = r+1
- self.processed_html += self.mobi_html[pos:end] + ''%oend
+ self.processed_html += self.mobi_html[pos:end] + ''%(oend, oend)
pos = end
self.processed_html += self.mobi_html[pos:]
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 04fcab70ba..7a9c869507 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -43,6 +43,7 @@ entry_points = {
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'fb2-meta = calibre.ebooks.metadata.fb2:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
+ 'any2epub = calibre.ebooks.epub.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
@@ -174,8 +175,10 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_html import option_parser as html2epub
from calibre.ebooks.html import option_parser as html2oeb
- from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
-
+ from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
+ from calibre.ebooks.epub.from_any import option_parser as any2epub
+ any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
+ 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']
f = open_file('/etc/bash_completion.d/libprs500')
f.close()
os.remove(f.name)
@@ -193,9 +196,8 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('mobi2lrf', htmlop, ['mobi', 'prc']))
f.write(opts_and_exts('fb22lrf', htmlop, ['fb2']))
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
- f.write(opts_and_exts('any2lrf', htmlop,
- ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
- 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']))
+ f.write(opts_and_exts('any2lrf', htmlop, any_formats))
+ f.write(opts_and_exts('any2lrf', any2epub, any_formats))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py
index 6037f9ab2e..9aef6b1f66 100644
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@@ -177,6 +177,12 @@ class Option(object):
def __eq__(self, other):
return self.name == getattr(other, 'name', other)
+
+ def __repr__(self):
+ return 'Option: '+self.name
+
+ def __str__(self):
+ return repr(self)
class OptionValues(object):