Implemented any2epub

2025-07-09 03:04:10 -04:00 · 2008-09-18 21:48:08 -07:00 · 2008-09-18 21:48:08 -07:00 · 896182b201
commit 896182b201
parent f0d9bded08
16 changed files with 332 additions and 69 deletions
--- a/installer/osx/freeze.py
+++ b/installer/osx/freeze.py
@ -317,7 +317,8 @@ def main():
                                       'mechanize', 'ClientForm', 'usbobserver',
                                       'genshi', 'calibre.web.feeds.recipes.*',
                                       'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
-                                       'keyword', 'codeop', 'pydoc', 'readline'],
+                                       'keyword', 'codeop', 'pydoc', 'readline',
                                       'BeautifulSoup'],
                         'packages' : ['PIL', 'Authorization', 'lxml'],
                         'excludes' : ['IPython'],
                         'plist'    : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
--- a/installer/windows/freeze.py
+++ b/installer/windows/freeze.py
@ -152,7 +152,7 @@ def main(args=sys.argv):
                                             'win32process', 'win32api', 'msvcrt',
                                             'win32event', 'calibre.ebooks.lrf.any.*',
                                             'calibre.ebooks.lrf.feeds.*',
-                                             'genshi',
+                                             'genshi', 'BeautifulSoup',
                                             'path', 'pydoc', 'IPython.Extensions.*',
                                             'calibre.web.feeds.recipes.*',
                                             'PyQt4.QtWebKit', 'PyQt4.QtNetwork',
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -317,6 +317,11 @@ class LoggingInterface:
    def log_exception(self, msg, *args):
        self.___log(self.__logger.exception, msg, args, {})
 def walk(dir):
    ''' A nice interface to os.walk '''
    for record in os.walk(dir):
        for f in record[-1]:
            yield os.path.join(record[0], f)
 def strftime(fmt, t=time.localtime()):
    ''' A version of strtime that returns unicode strings. '''
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -44,6 +44,7 @@ def config(defaults=None):
    c.add_opt('output', ['-o', '--output'], default=None,
             help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
    structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
    structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
            help=_('''\
@ -74,6 +75,16 @@ to auto-generate a Table of Contents.
    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
        help=_("Don't add auto-detected chapters to the Table of Contents."))
    layout = c.add_group('page layout', _('Control page layout'))
    layout('margin_top', ['--margin-top'], default=5.0, 
           help=_('Set the top margin in pts. Default is %default'))
    layout('margin_bottom', ['--margin-bottom'], default=5.0, 
           help=_('Set the bottom margin in pts. Default is %default'))
    layout('margin_left', ['--margin-left'], default=5.0, 
           help=_('Set the left margin in pts. Default is %default'))
    layout('margin_right', ['--margin-right'], default=5.0, 
           help=_('Set the right margin in pts. Default is %default'))
    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
              help=_('Print generated OPF file to stdout'))
    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -0,0 +1,154 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert any ebook format to epub.
 '''
 import sys, os, re
 from contextlib import nested
 from calibre import extract, walk
 from calibre.ebooks.epub import config as common_config
 from calibre.ebooks.epub.from_html import convert as html2epub
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator
 def lit2opf(path, tdir, opts):
    from calibre.ebooks.lit.reader import LitReader
    print 'Exploding LIT file:', path
    reader = LitReader(path)
    reader.extract_content(tdir, False)
    for f in walk(tdir):
        if f.lower().endswith('.opf'):
            return f
 def mobi2opf(path, tdir, opts):
    from calibre.ebooks.mobi.reader import MobiReader
    print 'Exploding MOBI file:', path
    reader = MobiReader(path)
    reader.extract_content(tdir)
    files = list(walk(tdir))
    for f in files:
        if f.lower().endswith('.opf'):
            return f
    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
    hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
    mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
    opf = OPFCreator(tdir, mi)
    opf.create_manifest([(hf[0], None)])
    opf.create_spine([hf[0]])
    ans = os.path.join(tdir, 'metadata.opf')
    opf.render(open(ans, 'wb'))
    return ans
 def fb22opf(path, tdir, opts):
    from calibre.ebooks.lrf.fb2.convert_from import to_html
    print 'Converting FB2 to HTML...'
    return to_html(path, tdir)
 def rtf2opf(path, tdir, opts):
    from calibre.ebooks.lrf.rtf.convert_from import generate_html
    generate_html(path, tdir)
    return os.path.join(tdir, 'metadata.opf')
 def txt2opf(path, tdir, opts):
    from calibre.ebooks.lrf.txt.convert_from import generate_html
    generate_html(path, opts.encoding, tdir)
    return os.path.join(tdir, 'metadata.opf')
 def pdf2opf(path, tdir, opts):
    from calibre.ebooks.lrf.pdf.convert_from import generate_html
    generate_html(path, tdir)
    return os.path.join(tdir, 'metadata.opf')
 MAP = {
       'lit'  : lit2opf,
       'mobi' : mobi2opf,
       'prc'  : mobi2opf,
       'fb2'  : fb22opf,
       'rtf'  : rtf2opf,
       'txt'  : txt2opf,
       'pdf'  : pdf2opf,
       }
 def unarchive(path, tdir):
    extract(path, tdir)
    files = list(walk(tdir))
    for ext in ['opf'] + list(MAP.keys()):
        for f in files:
            if f.lower().endswith('.'+ext):
                if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
                    continue
                return f, ext
    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
    html_files = [f for f in files if html_pat.search(f) is not None]
    if not html_files:
        raise ValueError(_('Could not find an ebook inside the archive'))
    html_files = [(f, os.stat(f).st_size) for f in html_files]
    html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
    html_files = [f[0] for f in html_files]
    for q in ('toc', 'index'):
        for f in html_files:
            if os.path.splitext(f)[0].lower() == q:
                return f, os.path.splitext(f)[1].lower()[1:]
    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
 def any2epub(opts, path, notification=None):
    ext = os.path.splitext(path)[1]
    if not ext:
        raise ValueError('Unknown file type: '+path)
    ext = ext.lower()[1:]
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
        if ext in ['rar', 'zip']:
            path, ext = unarchive(path, tdir1)
            print 'Found %s file in archive'%(ext.upper())
        if ext in MAP.keys():
            path = MAP[ext](path, tdir2, opts)
            ext = 'opf'
        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
            raise ValueError('Conversion from %s is not supported'%ext.upper())
        print 'Creating EPUB file...'
        html2epub(path, opts, notification=notification)
 def config(defaults=None):
    return common_config(defaults=defaults)
 def formats():
    return ['html', 'rar', 'zip']+list(MAP.keys())
 def option_parser():
    return config().option_parser(usage=_('''\
 %%prog [options] filename
 Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
 ''')%formats()
 )
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        print 'No input file specified.'
        return 1
    any2epub(opts, args[1])
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -29,7 +29,6 @@ def option_parser():
 def convert(opts, recipe_arg, notification=None):
    opts.lrf  = False
    opts.epub = True
    opts.chapter_mark = 'none'
    if opts.debug:
        opts.verbose = 2
    parser = option_parser()
@ -40,6 +39,7 @@ def convert(opts, recipe_arg, notification=None):
        recipe_opts = c.parse_string(recipe.html2epub_options)
        c.smart_update(recipe_opts, opts)
        opts = recipe_opts
        opts.chapter_mark = 'none'
        opf = glob.glob(os.path.join(tdir, '*.opf'))
        if not opf:
            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -4,7 +4,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 import os, sys, re, shutil, cStringIO
 from lxml.etree import XPath
 try:
    from PIL import Image as PILImage
 except ImportError:
    import Image as PILImage
 from calibre.ebooks.html import Processor, get_text, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc
@ -106,8 +111,8 @@ def convert(htmlfile, opts, notification=None):
            cover_src = opts.cover
        if cover_src is not None:
-            cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
+            cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
-            shutil.copyfile(cover_src, cover_dest)
+            PILImage.open(cover_src).convert('RGB').save(cover_dest)
            mi.cover = cover_dest
            resources.append(cover_dest)
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -23,6 +23,7 @@ from calibre.utils.config import Config, StringConfig
 from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile
@ -280,7 +281,7 @@ class PreProcessor(object):
        return re.search('<H2[^><]*id=BookTitle', raw) is not None
    def is_pdftohtml(self, src):
-        return src.startswith('<!-- created by calibre\'s pdftohtml -->')
+        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
    def preprocess(self, html):
        if self.is_baen(html):
@ -335,6 +336,7 @@ class Parser(PreProcessor, LoggingInterface):
                                pretty_print=self.opts.pretty_print,
                                include_meta_content_type=True)
            ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans)
            ans = re.compile(r'<head[^<>]*?>', re.IGNORECASE).sub('<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n', ans)
            f.write(ans)
            return f.name
@ -360,6 +362,8 @@ class Parser(PreProcessor, LoggingInterface):
        body = self.root.xpath('//body')
        if body:
            self.body = body[0]
        for a in self.root.xpath('//a[@name]'):
            a.set('id', a.get('name'))
    def debug_tree(self, name):
        '''
@ -540,15 +544,19 @@ class Processor(Parser):
            css.append('#%s { %s }'%(id, setting))
        for elem in self.root.xpath('//*[@style]'):
-            if 'id' not in elem.keys():
+            id = get_id(elem, counter)
-                id = get_id(elem, counter)
+            counter += 1
                counter += 1 
            css.append('#%s {%s}'%(id, elem.get('style')))
            elem.attrib.pop('style')
        self.raw_css = '\n\n'.join(css)
        self.css = unicode(self.raw_css)
-        # TODO: Figure out what to do about CSS imports from linked stylesheets    
+        self.do_layout()
        # TODO: Figure out what to do about CSS imports from linked stylesheets
    def do_layout(self):
        self.css += '\nbody {margin-top: 0pt; margin-botton: 0pt; margin-left: 0pt; margin-right: 0pt}\n'
        self.css += '@page {margin-top: %fpt; margin-botton: %fpt; margin-left: %fpt; margin-right: %fpt}\n'%(self.opts.margin_top, self.opts.margin_bottom, self.opts.margin_left, self.opts.margin_right)    
 def config(defaults=None, config_name='html', 
           desc=_('Options to control the traversal of HTML')):
@ -575,6 +583,8 @@ def config(defaults=None, config_name='html',
             help=_('Set the title. Default is to autodetect.'))
    metadata('authors', ['-a', '--authors'], default=_('Unknown'),
             help=_('The author(s) of the ebook, as a comma separated list.'))
    metadata('from_opf', ['--metadata-from'], default=None,
              help=_('Load metadata from the specified OPF file'))
    debug = c.add_group('debug', _('Options useful for debugging'))
    debug('verbose', ['-v', '--verbose'], default=0, action='count',
@ -648,7 +658,12 @@ def merge_metadata(htmlfile, opf, opts):
    if opf:
        mi = MetaInformation(opf)
    else:
-        mi =  get_metadata(open(htmlfile, 'rb'), 'html')
+        try:
            mi =  get_metadata(open(htmlfile, 'rb'), 'html')
        except:
            mi = MetaInformation(None, None)
    if opts.from_opf is not None and os.access(opts.from_opf, os.R_OK):
        mi.smart_update(OPF(open(opts.from_opf, 'rb'), os.path.abspath(os.path.dirname(opts.from_opf))))
    if opts.title:
        mi.title = opts.title
    if opts.authors != _('Unknown'):
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -1,16 +1,22 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
 """
 Convert .fb2 files to .lrf
 """
-import os, sys, tempfile, shutil, logging
+import os, sys, shutil, logging
 from base64 import b64decode
-
+from lxml import etree
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers, __appname__
+from calibre import setup_cli_handlers
 from calibre.resources import fb2_xsl
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.metadata.opf import OPFCreator
 from calibre.ebooks.metadata import MetaInformation
 def option_parser():
    parser = lrf_option_parser(
@ -31,29 +37,42 @@ def extract_embedded_content(doc):
            data = b64decode(elem.text.strip())
            open(fname, 'wb').write(data)
-def generate_html(fb2file, encoding, logger):
+def to_html(fb2file, tdir):
-    from lxml import etree
+    cwd = os.getcwd()
    tdir = tempfile.mkdtemp(prefix=__appname__+'_fb2_')
    cwd = os.getcwdu()
    os.chdir(tdir)
    try:
-        logger.info('Parsing XML...')
+        os.chdir(tdir)
        print 'Parsing XML...'
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.parse(fb2file, parser)
        extract_embedded_content(doc)
-        logger.info('Converting XML to HTML...')
+        print 'Converting XML to HTML...'
        styledoc = etree.fromstring(fb2_xsl)
-
+    
        transform = etree.XSLT(styledoc)
        result = transform(doc)
-        html = os.path.join(tdir, 'index.html')
+        open('index.html', 'wb').write(transform.tostring(result))
-        f = open(html, 'wb')
+        try:
-        f.write(transform.tostring(result))
+            mi = get_metadata(open(fb2file, 'rb'))
-        f.close()
+        except:
            mi = MetaInformation(None, None)
        if not mi.title:
            mi.title = os.path.splitext(os.path.basename(fb2file))[0]
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(tdir, mi)
        opf.create_manifest([('index.html', None)])
        opf.create_spine(['index.html'])
        opf.render(open('metadata.opf', 'wb'))
        return os.path.join(tdir, 'metadata.opf')
    finally:
        os.chdir(cwd)
-    return html
+
-        
+    
 def generate_html(fb2file, encoding, logger):
    tdir = PersistentTemporaryDirectory('_fb22lrf')
    to_html(fb2file, tdir)
    return os.path.join(tdir, 'index.html')
 def process_file(path, options, logger=None):
    if logger is None:
        level = logging.DEBUG if options.verbose else logging.INFO
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@ -9,6 +9,9 @@ from calibre.ebooks import ConversionError
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator
 from calibre.ebooks.metadata.pdf import get_metadata
 PDFTOHTML = 'pdftohtml'
 popen = subprocess.Popen
@ -20,7 +23,7 @@ if iswindows and hasattr(sys, 'frozen'):
 if islinux and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
-def generate_html(pathtopdf, logger):
+def generate_html(pathtopdf, tdir):
    '''
    Convert the pdf into html.
    @return: Path to a temporary file containing the HTML.
@ -29,10 +32,10 @@ def generate_html(pathtopdf, logger):
        pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
    if not os.access(pathtopdf, os.R_OK):
        raise ConversionError, 'Cannot read from ' + pathtopdf
    tdir = PersistentTemporaryDirectory('pdftohtml')
    index = os.path.join(tdir, 'index.html')
    # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
-    cmd = (PDFTOHTML, '-enc', 'UTF-8',  '-noframes',  '-p',  '-nomerge',  pathtopdf, os.path.basename(index))
+    pathtopdf = os.path.abspath(pathtopdf)
    cmd = (PDFTOHTML, '-enc', 'UTF-8',  '-noframes',  '-p',  '-nomerge',  '-nodrm', pathtopdf, os.path.basename(index))
    cwd = os.getcwd()
    try:
@ -44,16 +47,30 @@ def generate_html(pathtopdf, logger):
                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
            else:
                raise
-        logger.info(p.stdout.read())
+        print p.stdout.read()
        ret = p.wait()
        if ret != 0:
            err = p.stderr.read()
            raise ConversionError, err
        if not os.path.exists(index) or os.stat(index).st_size < 100:
            raise ConversionError(os.path.basename(pathtopdf) + _(' does not allow copying of text.'), True)
-        raw = open(index).read(4000)
+        
-        if not '<br' in raw:
+        raw = open(index, 'rb').read()
        open(index, 'wb').write('<!-- created by calibre\'s pdftohtml -->\n'+raw)
        if not '<br' in raw[:4000]:
            raise ConversionError(os.path.basename(pathtopdf) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
        try:
            mi = get_metadata(open(pathtopdf, 'rb'))
        except:
            mi = MetaInformation(None, None)
        if not mi.title:
            mi.title = os.path.splitext(os.path.basename(pathtopdf))[0]
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(tdir, mi)
        opf.create_manifest([('index.html', None)])
        opf.create_spine(['index.html'])
        opf.render(open('metadata.opf', 'wb'))
    finally:
        os.chdir(cwd)
    return index
@ -72,7 +89,8 @@ def process_file(path, options, logger=None):
        logger = logging.getLogger('pdf2lrf')
        setup_cli_handlers(logger, level)
    pdf = os.path.abspath(os.path.expanduser(path))
-    htmlfile = generate_html(pdf, logger)
+    tdir = PersistentTemporaryDirectory('_pdf2lrf')
    htmlfile = generate_html(pdf, tdir)
    if not options.output:
        ext = '.lrs' if options.lrs else '.lrf'        
        options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
--- a/src/calibre/ebooks/lrf/rtf/convert_from.py
+++ b/src/calibre/ebooks/lrf/rtf/convert_from.py
@ -1,17 +1,20 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, sys, tempfile, shutil, logging, glob
+import os, sys, shutil, logging, glob
 from lxml import etree
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers, __appname__
+from calibre import setup_cli_handlers
 from calibre.libwand import convert, WandException
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.lrf.rtf.xsl import xhtml
 from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator 
 def option_parser():
    parser = lrf_option_parser(
@ -44,8 +47,8 @@ def process_file(path, options, logger=None):
    f = open(rtf, 'rb')
    mi = get_metadata(f, 'rtf')
    f.close()
-    html = generate_html(rtf, logger)
+    tdir = PersistentTemporaryDirectory('_rtf2lrf')
-    tdir = os.path.dirname(html)
+    html = generate_html(rtf, tdir)
    cwd = os.getcwdu()
    try:
        if not options.output:
@ -83,12 +86,12 @@ def main(args=sys.argv, logger=None):
    return 0
-def generate_xml(rtfpath):
+def generate_xml(rtfpath, tdir):
    from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
    tdir = tempfile.mkdtemp(prefix=__appname__+'_')
    ofile = os.path.join(tdir, 'index.xml')
    cwd = os.getcwdu()
    os.chdir(tdir)
    rtfpath = os.path.abspath(rtfpath)
    try:
        parser = ParseRtf(
            in_file    = rtfpath,
@ -134,26 +137,27 @@ def generate_xml(rtfpath):
    return ofile
-def generate_html(rtfpath, logger):
+def generate_html(rtfpath, tdir):
-    logger.info('Converting RTF to XML...')
+    print 'Converting RTF to XML...'
    rtfpath = os.path.abspath(rtfpath)
    try:
-        xml = generate_xml(rtfpath)
+        xml = generate_xml(rtfpath, tdir)
    except RtfInvalidCodeException:
        raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
    tdir = os.path.dirname(xml)
    cwd = os.getcwdu()
    os.chdir(tdir)
    try:
-        logger.info('Parsing XML...')
+        print 'Parsing XML...'
        parser = etree.XMLParser(recover=True, no_network=True)
        try:
            doc = etree.parse(xml, parser)
        except:
            raise
-            logger.info('Parsing failed. Trying to clean up XML...')
+            print 'Parsing failed. Trying to clean up XML...'
            soup = BeautifulStoneSoup(open(xml, 'rb').read())
            doc = etree.fromstring(str(soup))
-        logger.info('Converting XML to HTML...')
+        print 'Converting XML to HTML...'
        styledoc = etree.fromstring(xhtml)
        transform = etree.XSLT(styledoc)
@ -161,8 +165,22 @@ def generate_html(rtfpath, logger):
        tdir = os.path.dirname(xml)
        html = os.path.join(tdir, 'index.html')
        f = open(html, 'wb')
-        f.write(transform.tostring(result))
+        res = transform.tostring(result)
        res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
        f.write(res)
        f.close()
        try:
            mi = get_metadata(open(rtfpath, 'rb'))
        except:
            mi = MetaInformation(None, None)
        if not mi.title:
            mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(tdir, mi)
        opf.create_manifest([('index.html', None)])
        opf.create_spine(['index.html'])
        opf.render(open('metadata.opf', 'wb'))
    finally:
        os.chdir(cwd)
    return html
--- a/src/calibre/ebooks/lrf/txt/convert_from.py
+++ b/src/calibre/ebooks/lrf/txt/convert_from.py
@ -5,12 +5,14 @@ Convert .txt files to .lrf
 """
 import os, sys, codecs, logging
-from calibre.ptempfile import PersistentTemporaryFile
+from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks import ConversionError
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre.ebooks.markdown import markdown
 from calibre import setup_cli_handlers
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator
 def option_parser():
    parser = lrf_option_parser(
@ -23,7 +25,7 @@ _('''%prog [options] mybook.txt
    return parser
-def generate_html(txtfile, encoding, logger):
+def generate_html(txtfile, encoding, tdir):
    '''
    Convert txtfile to html and return a PersistentTemporaryFile object pointing
    to the file with the HTML.
@ -44,15 +46,19 @@ def generate_html(txtfile, encoding, logger):
    else:
        txt = codecs.open(txtfile, 'rb', enc).read()
-    logger.info('Converting text to HTML...')
+    print 'Converting text to HTML...'
    md = markdown.Markdown(
                       extensions=['footnotes', 'tables', 'toc'],
                       safe_mode=False,
                       )
-    html = md.convert(txt)
+    html = '<html><body>'+md.convert(txt)+'</body></html>'
-    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
+    p = os.path.join(tdir, 'index.html')
-    p.close()
+    open(p, 'wb').write(html.encode('utf-8'))
-    codecs.open(p.name, 'wb', 'utf8').write(html)
+    mi = MetaInformation(os.path.splitext(os.path.basename(txtfile))[0], [_('Unknown')])
    opf = OPFCreator(tdir, mi)
    opf.create_manifest([(os.path.join(tdir, 'index.html'), None)])
    opf.create_spine([os.path.join(tdir, 'index.html')])
    opf.render(open(os.path.join(tdir, 'metadata.opf'), 'wb'))
    return p
 def process_file(path, options, logger=None):
@ -63,7 +69,8 @@ def process_file(path, options, logger=None):
    txt = os.path.abspath(os.path.expanduser(path))
    if not hasattr(options, 'debug_html_generation'):
        options.debug_html_generation = False
-    htmlfile = generate_html(txt, options.encoding, logger)
+    tdir = PersistentTemporaryDirectory('_txt2lrf')
    htmlfile = generate_html(txt, options.encoding, tdir)
    options.encoding = 'utf-8'
    if not options.debug_html_generation:
        options.force_page_break = 'h2'
@ -73,9 +80,9 @@ def process_file(path, options, logger=None):
        options.output = os.path.abspath(os.path.expanduser(options.output))
        if not options.title:
            options.title = os.path.splitext(os.path.basename(path))[0]
-        html_process_file(htmlfile.name, options, logger)
+        html_process_file(htmlfile, options, logger)
    else:
-        print open(htmlfile.name, 'rb').read()        
+        print open(htmlfile, 'rb').read()        
 def main(args=sys.argv, logger=None):
    parser = option_parser()    
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -200,10 +200,10 @@ class MetaInformation(object):
        Merge the information in C{mi} into self. In case of conflicts, the information
        in C{mi} takes precedence, unless the information in mi is NULL.
        '''
-        if mi.title and mi.title.lower() != 'unknown':
+        if mi.title and mi.title != _('Unknown'):
            self.title = mi.title
-        if mi.authors and mi.authors[0].lower() != 'unknown':
+        if mi.authors and mi.authors[0] != _('Unknown'):
            self.authors = mi.authors
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -12,7 +12,7 @@ try:
 except ImportError:
    import Image as PILImage
-from calibre import __appname__
+from calibre import __appname__, entity_to_unicode
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
@ -263,17 +263,19 @@ class MobiReader(object):
                if ref.type.lower() == 'toc':
                    toc = ref.href()
        if toc:
-            index = self.processed_html.find('<a name="%s"'%toc.partition('#')[-1])
+            index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1]))
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if index > -1:
                raw = '<html><body>'+self.processed_html[index:]
                soup = BeautifulSoup(raw)
                tocobj = TOC()
                for a in soup.findAll('a', href=True):
                    try:
-                        text = ''.join(a.findAll(text=True)).strip()
+                        text = u''.join(a.findAll(text=True)).strip()
                    except:
                        text = ''
                    text = ent_pat.sub(entity_to_unicode, text)
                    tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
            if tocobj is not None:
                opf.set_toc(tocobj)
@ -353,7 +355,7 @@ class MobiReader(object):
            r = self.mobi_html.find('>', end)
            if r > -1 and r < l: # Move out of tag
                end = r+1
-            self.processed_html += self.mobi_html[pos:end] + '<a name="filepos%d"></a>'%oend 
+            self.processed_html += self.mobi_html[pos:end] + '<a id="filepos%d" name="filepos%d"></a>'%(oend, oend) 
            pos = end
        self.processed_html += self.mobi_html[pos:]
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -43,6 +43,7 @@ entry_points = {
                             'fb22lrf   = calibre.ebooks.lrf.fb2.convert_from:main',
                             'fb2-meta  = calibre.ebooks.metadata.fb2:main',
                             'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
                             'any2epub  = calibre.ebooks.epub.from_any:main',
                             'lrf2lrs   = calibre.ebooks.lrf.lrfparser:main',
                             'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
                             'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
@ -174,8 +175,10 @@ def setup_completion(fatal_errors):
        from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
        from calibre.ebooks.epub.from_html import option_parser as html2epub
        from calibre.ebooks.html import option_parser as html2oeb
-        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub 
+        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
-
+        from calibre.ebooks.epub.from_any import option_parser as any2epub 
        any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
             'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2'] 
        f = open_file('/etc/bash_completion.d/libprs500')
        f.close()
        os.remove(f.name)
@ -193,9 +196,8 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('mobi2lrf', htmlop, ['mobi', 'prc']))
        f.write(opts_and_exts('fb22lrf', htmlop, ['fb2']))
        f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
-        f.write(opts_and_exts('any2lrf', htmlop,
+        f.write(opts_and_exts('any2lrf', htmlop, any_formats))
-            ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
+        f.write(opts_and_exts('any2lrf', any2epub, any_formats))
             'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2']))
        f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
        f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
        f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -177,6 +177,12 @@ class Option(object):
    def __eq__(self, other):
        return self.name == getattr(other, 'name', other)
    def __repr__(self):
        return 'Option: '+self.name
    def __str__(self):
        return repr(self)
 class OptionValues(object):