From 5c5a4d867662e088c42fc75a8e54b397479215f7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 19 Apr 2009 18:20:26 -0700
Subject: [PATCH] Plugin for RTF input

---
 src/calibre/customize/builtins.py            |   3 +-
 src/calibre/ebooks/epub/from_any.py          |  10 -
 src/calibre/ebooks/lrf/rtf/convert_from.py   | 190 -------------------
 src/calibre/ebooks/{lrf => }/rtf/__init__.py |   0
 src/calibre/ebooks/rtf/input.py              | 101 ++++++++++
 src/calibre/ebooks/{lrf => }/rtf/xsl.py      |   0
 src/calibre/ebooks/rtf2xml/ParseRtf.py       |   6 +-
 src/calibre/ebooks/rtf2xml/pict.py           |   6 +-
 8 files changed, 110 insertions(+), 206 deletions(-)
 delete mode 100644 src/calibre/ebooks/lrf/rtf/convert_from.py
 rename src/calibre/ebooks/{lrf => }/rtf/__init__.py (100%)
 create mode 100644 src/calibre/ebooks/rtf/input.py
 rename src/calibre/ebooks/{lrf => }/rtf/xsl.py (100%)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index a67224872b..51a0e4c75f 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -283,6 +283,7 @@ from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.odt.input import ODTInput
+from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@@ -291,7 +292,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
         TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
-        FB2Input, ODTInput]
+        FB2Input, ODTInput, RTFInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
index 196ed59646..68112592d2 100644
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@@ -19,11 +19,6 @@ from calibre.utils.zipfile import ZipFile
 from calibre.customize.ui import run_plugins_on_preprocess
 
 
-def rtf2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.rtf.convert_from import generate_html
-    generate_html(path, tdir)
-    return os.path.join(tdir, 'metadata.opf')
-
 def epub2opf(path, tdir, opts):
     zf = ZipFile(path)
     zf.extractall(tdir)
@@ -42,11 +37,6 @@ def epub2opf(path, tdir, opts):
         raise ValueError('%s is not a valid EPUB file'%path)
     return opf
 
-def odt2epub(path, tdir, opts):
-    from calibre.ebooks.odt.to_oeb import Extract
-    opts.encoding = 'utf-8'
-    return Extract()(path, tdir)
-
 SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
                   'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
 
diff --git a/src/calibre/ebooks/lrf/rtf/convert_from.py b/src/calibre/ebooks/lrf/rtf/convert_from.py
deleted file mode 100644
index e4dd153d2a..0000000000
--- a/src/calibre/ebooks/lrf/rtf/convert_from.py
+++ /dev/null
@@ -1,190 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, sys, shutil, logging, glob
-
-from lxml import etree
-
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers
-from calibre.libwand import convert, WandException
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre.ebooks.lrf.rtf.xsl import xhtml
-from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf import OPFCreator 
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''%prog [options] mybook.rtf
-
-
-%prog converts mybook.rtf to mybook.lrf''')
-        )
-    parser.add_option('--keep-intermediate-files', action='store_true', default=False)
-    return parser
-
-def convert_images(html, logger):
-    wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
-    for wmf in wmfs:
-        target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
-        try:
-            convert(wmf, target)
-            html = html.replace(os.path.basename(wmf), os.path.basename(target))
-        except WandException, err:
-            logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
-            continue
-    return html
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('rtf2lrf')
-        setup_cli_handlers(logger, level)
-    rtf = os.path.abspath(os.path.expanduser(path))
-    f = open(rtf, 'rb')
-    mi = get_metadata(f, 'rtf')
-    f.close()
-    tdir = PersistentTemporaryDirectory('_rtf2lrf')
-    html = generate_html(rtf, tdir)
-    cwd = os.getcwdu()
-    try:
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(rtf))[0]
-        if (not options.title or options.title == 'Unknown'):
-            options.title = mi.title
-        if (not options.author or options.author == 'Unknown') and mi.author:
-            options.author = mi.author
-        if (not options.category or options.category == 'Unknown') and mi.category:
-            options.category = mi.category
-        if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
-            options.freetext = mi.comments
-        os.chdir(tdir)
-        html_process_file(html, options, logger)
-    finally:
-        os.chdir(cwd)
-        if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
-            logger.debug('Intermediate files in '+ tdir)
-        else:
-            shutil.rmtree(tdir)
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No rtf file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-    
-
-def generate_xml(rtfpath, tdir):
-    from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-    ofile = os.path.join(tdir, 'index.xml')
-    cwd = os.getcwdu()
-    os.chdir(tdir)
-    rtfpath = os.path.abspath(rtfpath)
-    try:
-        parser = ParseRtf(
-            in_file    = rtfpath,
-            out_file   = ofile,
-            # Convert symbol fonts to unicode equivelents. Default
-            # is 1
-            convert_symbol = 1,
-    
-            # Convert Zapf fonts to unicode equivelents. Default
-            # is 1.
-            convert_zapf = 1,
-    
-            # Convert Wingding fonts to unicode equivelents.
-            # Default is 1.
-            convert_wingdings = 1,
-    
-            # Convert RTF caps to real caps.
-            # Default is 1.
-            convert_caps = 1,
-    
-            # Indent resulting XML.
-            # Default is 0 (no indent).
-            indent = 1,
-    
-            # Form lists from RTF. Default is 1.
-            form_lists = 1,
-    
-            # Convert headings to sections. Default is 0.
-            headings_to_sections = 1,
-    
-            # Group paragraphs with the same style name. Default is 1.
-            group_styles = 1,
-    
-            # Group borders. Default is 1.
-            group_borders = 1,
-    
-            # Write or do not write paragraphs. Default is 0.
-            empty_paragraphs = 0,
-        )
-        parser.parse_rtf()
-    finally:
-        os.chdir(cwd)
-    return ofile
-
-
-def generate_html(rtfpath, tdir):
-    print 'Converting RTF to XML...'
-    rtfpath = os.path.abspath(rtfpath)
-    try:
-        xml = generate_xml(rtfpath, tdir)
-    except RtfInvalidCodeException:
-        raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
-    tdir = os.path.dirname(xml)
-    cwd = os.getcwdu()
-    os.chdir(tdir)
-    try:
-        print 'Parsing XML...'
-        parser = etree.XMLParser(recover=True, no_network=True)
-        try:
-            doc = etree.parse(xml, parser)
-        except:
-            raise
-            print 'Parsing failed. Trying to clean up XML...'
-            soup = BeautifulStoneSoup(open(xml, 'rb').read())
-            doc = etree.fromstring(str(soup))
-        print 'Converting XML to HTML...'
-        styledoc = etree.fromstring(xhtml)
-        
-        transform = etree.XSLT(styledoc)
-        result = transform(doc)
-        tdir = os.path.dirname(xml)
-        html = os.path.join(tdir, 'index.html')
-        f = open(html, 'wb')
-        res = transform.tostring(result)
-        res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
-        f.write(res)
-        f.close()
-        try:
-            mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
-        except:
-            mi = MetaInformation(None, None)
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
-        if not mi.authors:
-            mi.authors = [_('Unknown')]
-        opf = OPFCreator(tdir, mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        opf.render(open('metadata.opf', 'wb'))
-    finally:
-        os.chdir(cwd)
-    return html
-            
-if __name__ == '__main__':
-    sys.exit(main())    
-        
\ No newline at end of file
diff --git a/src/calibre/ebooks/lrf/rtf/__init__.py b/src/calibre/ebooks/rtf/__init__.py
similarity index 100%
rename from src/calibre/ebooks/lrf/rtf/__init__.py
rename to src/calibre/ebooks/rtf/__init__.py
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
new file mode 100644
index 0000000000..764d47ff41
--- /dev/null
+++ b/src/calibre/ebooks/rtf/input.py
@@ -0,0 +1,101 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os
+
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class RTFInput(InputFormatPlugin):
+
+    name        = 'RTF Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert RTF files to HTML'
+    file_types  = set(['rtf'])
+
+    def generate_xml(self, stream):
+        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
+        ofile = 'out.xml'
+        parser = ParseRtf(
+            in_file    = stream,
+            out_file   = ofile,
+            # Convert symbol fonts to unicode equivelents. Default
+            # is 1
+            convert_symbol = 1,
+
+            # Convert Zapf fonts to unicode equivelents. Default
+            # is 1.
+            convert_zapf = 1,
+
+            # Convert Wingding fonts to unicode equivelents.
+            # Default is 1.
+            convert_wingdings = 1,
+
+            # Convert RTF caps to real caps.
+            # Default is 1.
+            convert_caps = 1,
+
+            # Indent resulting XML.
+            # Default is 0 (no indent).
+            indent = 1,
+
+            # Form lists from RTF. Default is 1.
+            form_lists = 1,
+
+            # Convert headings to sections. Default is 0.
+            headings_to_sections = 1,
+
+            # Group paragraphs with the same style name. Default is 1.
+            group_styles = 1,
+
+            # Group borders. Default is 1.
+            group_borders = 1,
+
+            # Write or do not write paragraphs. Default is 0.
+            empty_paragraphs = 0,
+        )
+        parser.parse_rtf()
+        ans = open('out.xml').read()
+        os.remove('out.xml')
+        return ans
+
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        from calibre.ebooks.rtf.xsl import xhtml
+        from calibre.ebooks.metadata.meta import get_metadata
+        from calibre.ebooks.metadata.opf import OPFCreator
+        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
+        self.log = log
+        self.log('Converting RTF to XML...')
+        try:
+            xml = self.generate_xml(stream)
+        except RtfInvalidCodeException:
+            raise ValueError(_('This RTF file has a feature calibre does not '
+            'support. Convert it to HTML first and then try it.'))
+        self.log('Parsing XML...')
+        parser = etree.XMLParser(recover=True, no_network=True)
+        doc = etree.fromstring(xml, parser=parser)
+        self.log('Converting XML to HTML...')
+        styledoc = etree.fromstring(xhtml)
+
+        transform = etree.XSLT(styledoc)
+        result = transform(doc)
+        html = 'index.xhtml'
+        with open(html, 'wb') as f:
+            res = transform.tostring(result)
+            res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
+            f.write(res)
+        stream.seek(0)
+        mi = get_metadata(stream, 'rtf')
+        if not mi.title:
+            mi.title = _('Unknown')
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(os.getcwd(), mi)
+        opf.create_manifest([('index.xhtml', None)])
+        opf.create_spine(['index.xhtml'])
+        opf.render(open('metadata.opf', 'wb'))
+        return os.path.abspath('metadata.opf')
+
diff --git a/src/calibre/ebooks/lrf/rtf/xsl.py b/src/calibre/ebooks/rtf/xsl.py
similarity index 100%
rename from src/calibre/ebooks/lrf/rtf/xsl.py
rename to src/calibre/ebooks/rtf/xsl.py
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index 5b008df615..cba0f900db 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -149,9 +149,10 @@ class ParseRtf:
         self.__group_borders = group_borders
         self.__empty_paragraphs = empty_paragraphs
         self.__no_dtd = no_dtd
-        
+
     def __check_file(self, the_file, type):
         """Check to see if files exist"""
+        if hasattr(the_file, 'read'): return
         if the_file == None:
             if type == "file_to_parse":
                 message = "You must provide a file for the script to work"
@@ -545,13 +546,12 @@ class ParseRtf:
     def __make_temp_file(self,file):
         """Make a temporary file to parse"""
         write_file="rtf_write_file"
-        read_obj = open(file,'r')
+        read_obj = file if hasattr(file, 'read') else open(file,'r')
         write_obj = open(write_file, 'w')
         line = "dummy"
         while line:
             line = read_obj.read(1000)
             write_obj.write(line )
-        read_obj.close()
         write_obj.close()
         return write_file
     """
diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py
index b1931b8c2e..6c88dd54e4 100755
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@@ -58,10 +58,12 @@ class Pict:
         return line[18:]
     def __make_dir(self):
         """ Make a dirctory to put the image data in"""
-        base_name = os.path.basename(self.__orig_file)
+        base_name = os.path.basename(getattr(self.__orig_file, 'name',
+            self.__orig_file))
         base_name = os.path.splitext(base_name)[0]
         if self.__out_file:
-            dir_name = os.path.dirname(self.__out_file)
+            dir_name = os.path.dirname(getattr(self.__out_file, 'name',
+                self.__out_file))
         else:
             dir_name = os.path.dirname(self.__orig_file)
         # self.__output_to_file_func()