Implemented any2epub

2025-08-30 23:00:21 -04:00 · 2008-09-18 21:48:08 -07:00 · 2008-09-18 21:48:08 -07:00 · 896182b201
commit 896182b201
parent f0d9bded08
16 changed files with 332 additions and 69 deletions
--- a/installer/osx/freeze.py
+++ b/installer/osx/freeze.py
@ -317,7 +317,8 @@ def main():
                                       'mechanize', 'ClientForm', 'usbobserver',
                                       'genshi', 'calibre.web.feeds.recipes.*',
                                       'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
-                                       'keyword', 'codeop', 'pydoc', 'readline'],
+                                       'keyword', 'codeop', 'pydoc', 'readline',
+                                       'BeautifulSoup'],
                         'packages' : ['PIL', 'Authorization', 'lxml'],
                         'excludes' : ['IPython'],
                         'plist'    : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
--- a/installer/windows/freeze.py
+++ b/installer/windows/freeze.py
@ -152,7 +152,7 @@ def main(args=sys.argv):
                                             'win32process', 'win32api', 'msvcrt',
                                             'win32event', 'calibre.ebooks.lrf.any.*',
                                             'calibre.ebooks.lrf.feeds.*',
-                                             'genshi',
+                                             'genshi', 'BeautifulSoup',
                                             'path', 'pydoc', 'IPython.Extensions.*',
                                             'calibre.web.feeds.recipes.*',
                                             'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -317,6 +317,11 @@ class LoggingInterface:
    def log_exception(self, msg, *args):
        self.___log(self.__logger.exception, msg, args, {})

+def walk(dir):
+    ''' A nice interface to os.walk '''
+    for record in os.walk(dir):
+        for f in record[-1]:
+            yield os.path.join(record[0], f)

 def strftime(fmt, t=time.localtime()):
    ''' A version of strtime that returns unicode strings. '''
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -44,6 +44,7 @@ def config(defaults=None):
    c.add_opt('output', ['-o', '--output'], default=None,
             help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
    
+    
    structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
    structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
            help=_('''\
@ -74,6 +75,16 @@ to auto-generate a Table of Contents.
    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
        help=_("Don't add auto-detected chapters to the Table of Contents."))
    
+    layout = c.add_group('page layout', _('Control page layout'))
+    layout('margin_top', ['--margin-top'], default=5.0, 
+           help=_('Set the top margin in pts. Default is %default'))
+    layout('margin_bottom', ['--margin-bottom'], default=5.0, 
+           help=_('Set the bottom margin in pts. Default is %default'))
+    layout('margin_left', ['--margin-left'], default=5.0, 
+           help=_('Set the left margin in pts. Default is %default'))
+    layout('margin_right', ['--margin-right'], default=5.0, 
+           help=_('Set the right margin in pts. Default is %default'))
+    
    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
              help=_('Print generated OPF file to stdout'))
    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -0,0 +1,154 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert any ebook format to epub.
+'''
+
+import sys, os, re
+from contextlib import nested
+
+from calibre import extract, walk
+from calibre.ebooks.epub import config as common_config
+from calibre.ebooks.epub.from_html import convert as html2epub
+from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator
+
+def lit2opf(path, tdir, opts):
+    from calibre.ebooks.lit.reader import LitReader
+    print 'Exploding LIT file:', path
+    reader = LitReader(path)
+    reader.extract_content(tdir, False)
+    for f in walk(tdir):
+        if f.lower().endswith('.opf'):
+            return f
+
+def mobi2opf(path, tdir, opts):
+    from calibre.ebooks.mobi.reader import MobiReader
+    print 'Exploding MOBI file:', path
+    reader = MobiReader(path)
+    reader.extract_content(tdir)
+    files = list(walk(tdir))
+    for f in files:
+        if f.lower().endswith('.opf'):
+            return f
+    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
+    hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
+    mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
+    opf = OPFCreator(tdir, mi)
+    opf.create_manifest([(hf[0], None)])
+    opf.create_spine([hf[0]])
+    ans = os.path.join(tdir, 'metadata.opf')
+    opf.render(open(ans, 'wb'))
+    return ans
+
+def fb22opf(path, tdir, opts):
+    from calibre.ebooks.lrf.fb2.convert_from import to_html
+    print 'Converting FB2 to HTML...'
+    return to_html(path, tdir)
+    
+def rtf2opf(path, tdir, opts):
+    from calibre.ebooks.lrf.rtf.convert_from import generate_html
+    generate_html(path, tdir)
+    return os.path.join(tdir, 'metadata.opf')
+
+def txt2opf(path, tdir, opts):
+    from calibre.ebooks.lrf.txt.convert_from import generate_html
+    generate_html(path, opts.encoding, tdir)
+    return os.path.join(tdir, 'metadata.opf')
+
+def pdf2opf(path, tdir, opts):
+    from calibre.ebooks.lrf.pdf.convert_from import generate_html
+    generate_html(path, tdir)
+    return os.path.join(tdir, 'metadata.opf')
+
+MAP = {
+       'lit'  : lit2opf,
+       'mobi' : mobi2opf,
+       'prc'  : mobi2opf,
+       'fb2'  : fb22opf,
+       'rtf'  : rtf2opf,
+       'txt'  : txt2opf,
+       'pdf'  : pdf2opf,
+       }
+    
+
+def unarchive(path, tdir):
+    extract(path, tdir)
+    files = list(walk(tdir))
+    
+    for ext in ['opf'] + list(MAP.keys()):
+        for f in files:
+            if f.lower().endswith('.'+ext):
+                if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
+                    continue
+                return f, ext
+    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
+    html_files = [f for f in files if html_pat.search(f) is not None]
+    if not html_files:
+        raise ValueError(_('Could not find an ebook inside the archive'))
+    html_files = [(f, os.stat(f).st_size) for f in html_files]
+    html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+    html_files = [f[0] for f in html_files]
+    for q in ('toc', 'index'):
+        for f in html_files:
+            if os.path.splitext(f)[0].lower() == q:
+                return f, os.path.splitext(f)[1].lower()[1:]
+    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
+
+def any2epub(opts, path, notification=None):
+    ext = os.path.splitext(path)[1]
+    if not ext:
+        raise ValueError('Unknown file type: '+path)
+    ext = ext.lower()[1:]
+    
+    if opts.output is None:
+        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
+    
+    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
+        if ext in ['rar', 'zip']:
+            path, ext = unarchive(path, tdir1)
+            print 'Found %s file in archive'%(ext.upper())
+    
+        if ext in MAP.keys():
+            path = MAP[ext](path, tdir2, opts)
+            ext = 'opf'
+            
+    
+        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
+            raise ValueError('Conversion from %s is not supported'%ext.upper())
+        
+        print 'Creating EPUB file...'
+        html2epub(path, opts, notification=notification)
+
+def config(defaults=None):
+    return common_config(defaults=defaults)
+
+
+def formats():
+    return ['html', 'rar', 'zip']+list(MAP.keys())
+
+def option_parser():
+    
+    return config().option_parser(usage=_('''\
+%%prog [options] filename
+
+Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
+''')%formats()
+)
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 2:
+        parser.print_help()
+        print 'No input file specified.'
+        return 1
+    any2epub(opts, args[1])
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -29,7 +29,6 @@ def option_parser():
 def convert(opts, recipe_arg, notification=None):
    opts.lrf  = False
    opts.epub = True
-    opts.chapter_mark = 'none'
    if opts.debug:
        opts.verbose = 2
    parser = option_parser()
@ -40,6 +39,7 @@ def convert(opts, recipe_arg, notification=None):
        recipe_opts = c.parse_string(recipe.html2epub_options)
        c.smart_update(recipe_opts, opts)
        opts = recipe_opts
+        opts.chapter_mark = 'none'
        opf = glob.glob(os.path.join(tdir, '*.opf'))
        if not opf:
            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -4,7 +4,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 import os, sys, re, shutil, cStringIO
+
 from lxml.etree import XPath
+try:
+    from PIL import Image as PILImage
+except ImportError:
+    import Image as PILImage

 from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc
@ -106,8 +111,8 @@ def convert(htmlfile, opts, notification=None):
            cover_src = opts.cover
        
        if cover_src is not None:
-            cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
-            shutil.copyfile(cover_src, cover_dest)
+            cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
+            PILImage.open(cover_src).convert('RGB').save(cover_dest)
            mi.cover = cover_dest
            resources.append(cover_dest)
            
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -23,6 +23,7 @@ from calibre.utils.config import Config, StringConfig
 from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.meta import get_metadata
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile

@ -280,7 +281,7 @@ class PreProcessor(object):
        return re.search('<H2[^><]*id=BookTitle', raw) is not None
    
    def is_pdftohtml(self, src):
-        return src.startswith('<!-- created by calibre\'s pdftohtml -->')
+        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
                          
    def preprocess(self, html):
        if self.is_baen(html):
@ -335,6 +336,7 @@ class Parser(PreProcessor, LoggingInterface):
                                pretty_print=self.opts.pretty_print,
                                include_meta_content_type=True)
            ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
+            ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
            f.write(ans)
            return f.name

@ -360,6 +362,8 @@ class Parser(PreProcessor, LoggingInterface):
        body = self.root.xpath('//body')
        if body:
            self.body = body[0]
+        for a in self.root.xpath('//a[@name]'):
+            a.set('id', a.get('name'))
    
    def debug_tree(self, name):
        '''
@ -540,7 +544,6 @@ class Processor(Parser):
            css.append('#%s { %s }'%(id, setting))
            
        for elem in self.root.xpath('//*[@style]'):
-            if 'id' not in elem.keys():
            id = get_id(elem, counter)
            counter += 1
            css.append('#%s {%s}'%(id, elem.get('style')))
@ -548,8 +551,13 @@ class Processor(Parser):
            
        self.raw_css = '\n\n'.join(css)
        self.css = unicode(self.raw_css)
+        self.do_layout()
        # TODO: Figure out what to do about CSS imports from linked stylesheets
        
+    def do_layout(self):
+        self.css += '\nbody {margin-top: 0pt; margin-botton: 0pt; margin-left: 0pt; margin-right: 0pt}\n'
+        self.css += '@page {margin-top: %fpt; margin-botton: %fpt; margin-left: %fpt; margin-right: %fpt}\n'%(self.opts.margin_top, self.opts.margin_bottom, self.opts.margin_left, self.opts.margin_right)    
+
 def config(defaults=None, config_name='html', 
           desc=_('Options to control the traversal of HTML')):
    if defaults is None:
@ -575,6 +583,8 @@ def config(defaults=None, config_name='html',
             help=_('Set the title. Default is to autodetect.'))
    metadata('authors', ['-a', '--authors'], default=_('Unknown'),
             help=_('The author(s) of the ebook, as a comma separated list.'))
+    metadata('from_opf', ['--metadata-from'], default=None,
+              help=_('Load metadata from the specified OPF file'))
        
    debug = c.add_group('debug', _('Options useful for debugging'))
    debug('verbose', ['-v', '--verbose'], default=0, action='count',
@ -648,7 +658,12 @@ def merge_metadata(htmlfile, opf, opts):
    if opf:
        mi = MetaInformation(opf)
    else:
+        try:
            mi =  get_metadata(open(htmlfile, 'rb'), 'html')
+        except:
+            mi = MetaInformation(None, None)
+    if opts.from_opf is not None and os.access(opts.from_opf, os.R_OK):
+        mi.smart_update(OPF(open(opts.from_opf, 'rb'), os.path.abspath(os.path.dirname(opts.from_opf))))
    if opts.title:
        mi.title = opts.title
    if opts.authors != _('Unknown'):
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -1,16 +1,22 @@
+from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
 """
 Convert .fb2 files to .lrf
 """
-import os, sys, tempfile, shutil, logging
+import os, sys, shutil, logging
 from base64 import b64decode
+from lxml import etree
    
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers, __appname__
+from calibre import setup_cli_handlers
 from calibre.resources import fb2_xsl
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.ebooks.metadata.opf import OPFCreator
+from calibre.ebooks.metadata import MetaInformation
+

 def option_parser():
    parser = lrf_option_parser(
@ -31,28 +37,41 @@ def extract_embedded_content(doc):
            data = b64decode(elem.text.strip())
            open(fname, 'wb').write(data)

-def generate_html(fb2file, encoding, logger):
-    from lxml import etree
-    tdir = tempfile.mkdtemp(prefix=__appname__+'_fb2_')
-    cwd = os.getcwdu()
-    os.chdir(tdir)
+def to_html(fb2file, tdir):
+    cwd = os.getcwd()
    try:
-        logger.info('Parsing XML...')
+        os.chdir(tdir)
+        print 'Parsing XML...'
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.parse(fb2file, parser)
        extract_embedded_content(doc)
-        logger.info('Converting XML to HTML...')
+        print 'Converting XML to HTML...'
        styledoc = etree.fromstring(fb2_xsl)
    
        transform = etree.XSLT(styledoc)
        result = transform(doc)
-        html = os.path.join(tdir, 'index.html')
-        f = open(html, 'wb')
-        f.write(transform.tostring(result))
-        f.close()
+        open('index.html', 'wb').write(transform.tostring(result))
+        try:
+            mi = get_metadata(open(fb2file, 'rb'))
+        except:
+            mi = MetaInformation(None, None)
+        if not mi.title:
+            mi.title = os.path.splitext(os.path.basename(fb2file))[0]
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(tdir, mi)
+        opf.create_manifest([('index.html', None)])
+        opf.create_spine(['index.html'])
+        opf.render(open('metadata.opf', 'wb'))
+        return os.path.join(tdir, 'metadata.opf')
    finally:
        os.chdir(cwd)
-    return html
+
+    
+def generate_html(fb2file, encoding, logger):
+    tdir = PersistentTemporaryDirectory('_fb22lrf')
+    to_html(fb2file, tdir)
+    return os.path.join(tdir, 'index.html')
    
 def process_file(path, options, logger=None):
    if logger is None:
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@ -9,6 +9,9 @@ from calibre.ebooks import ConversionError
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator
+from calibre.ebooks.metadata.pdf import get_metadata

 PDFTOHTML = 'pdftohtml'
 popen = subprocess.Popen
@ -20,7 +23,7 @@ if iswindows and hasattr(sys, 'frozen'):
 if islinux and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')

-def generate_html(pathtopdf, logger):
+def generate_html(pathtopdf, tdir):
    '''
    Convert the pdf into html.
    @return: Path to a temporary file containing the HTML.
@ -29,10 +32,10 @@ def generate_html(pathtopdf, logger):
        pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
    if not os.access(pathtopdf, os.R_OK):
        raise ConversionError, 'Cannot read from ' + pathtopdf
-    tdir = PersistentTemporaryDirectory('pdftohtml')
    index = os.path.join(tdir, 'index.html')
    # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
-    cmd = (PDFTOHTML, '-enc', 'UTF-8',  '-noframes',  '-p',  '-nomerge',  pathtopdf, os.path.basename(index))
+    pathtopdf = os.path.abspath(pathtopdf)
+    cmd = (PDFTOHTML, '-enc', 'UTF-8',  '-noframes',  '-p',  '-nomerge',  '-nodrm', pathtopdf, os.path.basename(index))
    cwd = os.getcwd()
    
    try:
@ -44,16 +47,30 @@ def generate_html(pathtopdf, logger):
                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
            else:
                raise
-        logger.info(p.stdout.read())
+        print p.stdout.read()
        ret = p.wait()
        if ret != 0:
            err = p.stderr.read()
            raise ConversionError, err
        if not os.path.exists(index) or os.stat(index).st_size < 100:
            raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
-        raw = open(index).read(4000)
-        if not '<br' in raw:
+        
+        raw = open(index, 'rb').read()
+        open(index, 'wb').write('<!-- created by calibre\'s pdftohtml -->\n'+raw)
+        if not '<br' in raw[:4000]:
            raise ConversionError(os.path.basename(pathtopdf) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
+        try:
+            mi = get_metadata(open(pathtopdf, 'rb'))
+        except:
+            mi = MetaInformation(None, None)
+        if not mi.title:
+            mi.title = os.path.splitext(os.path.basename(pathtopdf))[0]
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(tdir, mi)
+        opf.create_manifest([('index.html', None)])
+        opf.create_spine(['index.html'])
+        opf.render(open('metadata.opf', 'wb'))
    finally:
        os.chdir(cwd)
    return index
@ -72,7 +89,8 @@ def process_file(path, options, logger=None):
        logger = logging.getLogger('pdf2lrf')
        setup_cli_handlers(logger, level)
    pdf = os.path.abspath(os.path.expanduser(path))
-    htmlfile = generate_html(pdf, logger)
+    tdir = PersistentTemporaryDirectory('_pdf2lrf')
+    htmlfile = generate_html(pdf, tdir)
    if not options.output:
        ext = '.lrs' if options.lrs else '.lrf'        
        options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
--- a/src/calibre/ebooks/lrf/rtf/convert_from.py
+++ b/src/calibre/ebooks/lrf/rtf/convert_from.py
@ -1,17 +1,20 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, sys, tempfile, shutil, logging, glob
+import os, sys, shutil, logging, glob

 from lxml import etree

 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers, __appname__
+from calibre import setup_cli_handlers
 from calibre.libwand import convert, WandException
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.lrf.rtf.xsl import xhtml
 from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator 

 def option_parser():
    parser = lrf_option_parser(
@ -44,8 +47,8 @@ def process_file(path, options, logger=None):
    f = open(rtf, 'rb')
    mi = get_metadata(f, 'rtf')
    f.close()
-    html = generate_html(rtf, logger)
-    tdir = os.path.dirname(html)
+    tdir = PersistentTemporaryDirectory('_rtf2lrf')
+    html = generate_html(rtf, tdir)
    cwd = os.getcwdu()
    try:
        if not options.output:
@ -83,12 +86,12 @@ def main(args=sys.argv, logger=None):
    return 0
    

-def generate_xml(rtfpath):
+def generate_xml(rtfpath, tdir):
    from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-    tdir = tempfile.mkdtemp(prefix=__appname__+'_')
    ofile = os.path.join(tdir, 'index.xml')
    cwd = os.getcwdu()
    os.chdir(tdir)
+    rtfpath = os.path.abspath(rtfpath)
    try:
        parser = ParseRtf(
            in_file    = rtfpath,
@ -134,26 +137,27 @@ def generate_xml(rtfpath):
    return ofile


-def generate_html(rtfpath, logger):
-    logger.info('Converting RTF to XML...')
+def generate_html(rtfpath, tdir):
+    print 'Converting RTF to XML...'
+    rtfpath = os.path.abspath(rtfpath)
    try:
-        xml = generate_xml(rtfpath)
+        xml = generate_xml(rtfpath, tdir)
    except RtfInvalidCodeException:
        raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
    tdir = os.path.dirname(xml)
    cwd = os.getcwdu()
    os.chdir(tdir)
    try:
-        logger.info('Parsing XML...')
+        print 'Parsing XML...'
        parser = etree.XMLParser(recover=True, no_network=True)
        try:
            doc = etree.parse(xml, parser)
        except:
            raise
-            logger.info('Parsing failed. Trying to clean up XML...')
+            print 'Parsing failed. Trying to clean up XML...'
            soup = BeautifulStoneSoup(open(xml, 'rb').read())
            doc = etree.fromstring(str(soup))
-        logger.info('Converting XML to HTML...')
+        print 'Converting XML to HTML...'
        styledoc = etree.fromstring(xhtml)
        
        transform = etree.XSLT(styledoc)
@ -161,8 +165,22 @@ def generate_html(rtfpath, logger):
        tdir = os.path.dirname(xml)
        html = os.path.join(tdir, 'index.html')
        f = open(html, 'wb')
-        f.write(transform.tostring(result))
+        res = transform.tostring(result)
+        res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
+        f.write(res)
        f.close()
+        try:
+            mi = get_metadata(open(rtfpath, 'rb'))
+        except:
+            mi = MetaInformation(None, None)
+        if not mi.title:
+            mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(tdir, mi)
+        opf.create_manifest([('index.html', None)])
+        opf.create_spine(['index.html'])
+        opf.render(open('metadata.opf', 'wb'))
    finally:
        os.chdir(cwd)
    return html
--- a/src/calibre/ebooks/lrf/txt/convert_from.py
+++ b/src/calibre/ebooks/lrf/txt/convert_from.py
@ -5,12 +5,14 @@ Convert .txt files to .lrf
 """
 import os, sys, codecs, logging

-from calibre.ptempfile import PersistentTemporaryFile
+from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks import ConversionError
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre.ebooks.markdown import markdown
 from calibre import setup_cli_handlers
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf import OPFCreator

 def option_parser():
    parser = lrf_option_parser(
@ -23,7 +25,7 @@ _('''%prog [options] mybook.txt
    return parser
    

-def generate_html(txtfile, encoding, logger):
+def generate_html(txtfile, encoding, tdir):
    '''
    Convert txtfile to html and return a PersistentTemporaryFile object pointing
    to the file with the HTML.
@ -44,15 +46,19 @@ def generate_html(txtfile, encoding, logger):
    else:
        txt = codecs.open(txtfile, 'rb', enc).read()
    
-    logger.info('Converting text to HTML...')
+    print 'Converting text to HTML...'
    md = markdown.Markdown(
                       extensions=['footnotes', 'tables', 'toc'],
                       safe_mode=False,
                       )
-    html = md.convert(txt)
-    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
-    p.close()
-    codecs.open(p.name, 'wb', 'utf8').write(html)
+    html = '<html><body>'+md.convert(txt)+'</body></html>'
+    p = os.path.join(tdir, 'index.html')
+    open(p, 'wb').write(html.encode('utf-8'))
+    mi = MetaInformation(os.path.splitext(os.path.basename(txtfile))[0], [_('Unknown')])
+    opf = OPFCreator(tdir, mi)
+    opf.create_manifest([(os.path.join(tdir, 'index.html'), None)])
+    opf.create_spine([os.path.join(tdir, 'index.html')])
+    opf.render(open(os.path.join(tdir, 'metadata.opf'), 'wb'))
    return p
        
 def process_file(path, options, logger=None):
@ -63,7 +69,8 @@ def process_file(path, options, logger=None):
    txt = os.path.abspath(os.path.expanduser(path))
    if not hasattr(options, 'debug_html_generation'):
        options.debug_html_generation = False
-    htmlfile = generate_html(txt, options.encoding, logger)
+    tdir = PersistentTemporaryDirectory('_txt2lrf')
+    htmlfile = generate_html(txt, options.encoding, tdir)
    options.encoding = 'utf-8'
    if not options.debug_html_generation:
        options.force_page_break = 'h2'
@ -73,9 +80,9 @@ def process_file(path, options, logger=None):
        options.output = os.path.abspath(os.path.expanduser(options.output))
        if not options.title:
            options.title = os.path.splitext(os.path.basename(path))[0]
-        html_process_file(htmlfile.name, options, logger)
+        html_process_file(htmlfile, options, logger)
    else:
-        print open(htmlfile.name, 'rb').read()        
+        print open(htmlfile, 'rb').read()        

 def main(args=sys.argv, logger=None):
    parser = option_parser()    
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -200,10 +200,10 @@ class MetaInformation(object):
        Merge the information in C{mi} into self. In case of conflicts, the information
        in C{mi} takes precedence, unless the information in mi is NULL.
        '''
-        if mi.title and mi.title.lower() != 'unknown':
+        if mi.title and mi.title != _('Unknown'):
            self.title = mi.title
            
-        if mi.authors and mi.authors[0].lower() != 'unknown':
+        if mi.authors and mi.authors[0] != _('Unknown'):
            self.authors = mi.authors
            
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -12,7 +12,7 @@ try:
 except ImportError:
    import Image as PILImage

-from calibre import __appname__
+from calibre import __appname__, entity_to_unicode
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
@ -263,17 +263,19 @@ class MobiReader(object):
                if ref.type.lower() == 'toc':
                    toc = ref.href()
        if toc:
-            index = self.processed_html.find('<a name="%s"'%toc.partition('#')[-1])
+            index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1]))
            tocobj = None
+            ent_pat = re.compile(r'&(\S+?);')
            if index > -1:
                raw = '<html><body>'+self.processed_html[index:]
                soup = BeautifulSoup(raw)
                tocobj = TOC()
                for a in soup.findAll('a', href=True):
                    try:
-                        text = ''.join(a.findAll(text=True)).strip()
+                        text = u''.join(a.findAll(text=True)).strip()
                    except:
                        text = ''
+                    text = ent_pat.sub(entity_to_unicode, text)
                    tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
            if tocobj is not None:
                opf.set_toc(tocobj)
@ -353,7 +355,7 @@ class MobiReader(object):
            r = self.mobi_html.find('>', end)
            if r > -1 and r < l: # Move out of tag
                end = r+1
-            self.processed_html += self.mobi_html[pos:end] + '<a name="filepos%d"></a>'%oend 
+            self.processed_html += self.mobi_html[pos:end] + '<a id="filepos%d" name="filepos%d"></a>'%(oend, oend) 
            pos = end
            
        self.processed_html += self.mobi_html[pos:]
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -43,6 +43,7 @@ entry_points = {
                             'fb22lrf   = calibre.ebooks.lrf.fb2.convert_from:main',
                             'fb2-meta  = calibre.ebooks.metadata.fb2:main',
                             'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
+                             'any2epub  = calibre.ebooks.epub.from_any:main',
                             'lrf2lrs   = calibre.ebooks.lrf.lrfparser:main',
                             'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
                             'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
@ -175,7 +176,9 @@ def setup_completion(fatal_errors):
        from calibre.ebooks.epub.from_html import option_parser as html2epub
        from calibre.ebooks.html import option_parser as html2oeb
        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
-
+        from calibre.ebooks.epub.from_any import option_parser as any2epub 
+        any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
+             'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2'] 
        f = open_file('/etc/bash_completion.d/libprs500')
        f.close()
        os.remove(f.name)
@ -193,9 +196,8 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('mobi2lrf', htmlop, ['mobi', 'prc']))
        f.write(opts_and_exts('fb22lrf', htmlop, ['fb2']))
        f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
-        f.write(opts_and_exts('any2lrf', htmlop,
-            ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
-             'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']))
+        f.write(opts_and_exts('any2lrf', htmlop, any_formats))
+        f.write(opts_and_exts('any2lrf', any2epub, any_formats))
        f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
        f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
        f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -178,6 +178,12 @@ class Option(object):
    def __eq__(self, other):
        return self.name == getattr(other, 'name', other)
    
+    def __repr__(self):
+        return 'Option: '+self.name
+    
+    def __str__(self):
+        return repr(self)
+        
 class OptionValues(object):
    
    def copy(self):