From f526555572a2cca0bfc08f498664a546a6f4c4a4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 25 May 2009 10:43:29 -0400
Subject: [PATCH 01/10] Tweak line length factor for pdf line wrapping.

---
 src/calibre/ebooks/conversion/preprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 76fc36708e..2dc404e586 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -176,7 +176,7 @@ class HTMLPreProcessor(object):
         elif self.is_pdftohtml(html):
             line_length_rules = [
                 # Un wrap using punctuation
-                (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .4), re.UNICODE), wrap_lines),
+                (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines),
             ]
 
             rules = self.PDFTOHTML + line_length_rules

From b92c2dc002927626a01cfd53066e3e8b4dd469be Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 25 May 2009 21:31:51 -0400
Subject: [PATCH 02/10] Baisc RTF output.

---
 src/calibre/customize/builtins.py |   2 +
 src/calibre/ebooks/pml/output.py  |   3 +-
 src/calibre/ebooks/rtf/output.py  |  36 +++++++
 src/calibre/ebooks/rtf/rtfml.py   | 171 ++++++++++++++++++++++++++++++
 src/calibre/ebooks/txt/output.py  |   1 -
 5 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/ebooks/rtf/output.py
 create mode 100644 src/calibre/ebooks/rtf/rtfml.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index ab9460d3be..d107413e38 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -337,6 +337,7 @@ from calibre.ebooks.pdb.output import PDBOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.output import PMLOutput
 from calibre.ebooks.rb.output import RBOutput
+from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.txt.output import TXTOutput
 
 from calibre.customize.profiles import input_profiles, output_profiles
@@ -382,6 +383,7 @@ plugins += [
     PDFOutput,
     PMLOutput,
     RBOutput,
+    RTFOutput,
     TXTOutput,
 ]
 plugins += [
diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py
index 9d07718654..8be8cc18ee 100644
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@@ -6,7 +6,8 @@ __docformat__ = 'restructuredtext en'
 
 import os
 
-import Image, cStringIO
+import Image
+import cStringIO
 
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
diff --git a/src/calibre/ebooks/rtf/output.py b/src/calibre/ebooks/rtf/output.py
new file mode 100644
index 0000000000..fab7ecad5d
--- /dev/null
+++ b/src/calibre/ebooks/rtf/output.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.ebooks.rtf.rtfml import RTFMLizer
+from calibre.customize.conversion import OutputFormatPlugin
+
+class RTFOutput(OutputFormatPlugin):
+
+    name = 'RTF Output'
+    author = 'John Schember'
+    file_type = 'rtf'
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables)
+        content = rtfmlitzer.extract_content(oeb_book, opts)
+
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+
+        out_stream.seek(0)
+        out_stream.truncate()
+        out_stream.write(content.encode('cp1252', 'replace'))
+
+        if close:
+            out_stream.close()
diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
new file mode 100644
index 0000000000..ade9291558
--- /dev/null
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into RTF markup
+'''
+
+import os
+import re
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+
+TAGS = {
+    'b': '\\b',
+    'del': '\\deleted',
+    'h1': '\\b \\par \\pard \\hyphpar \\keep',
+    'h2': '\\b \\par \\pard \\hyphpar \\keep',
+    'h3': '\\b \\par \\pard \\hyphpar \\keep',
+    'h4': '\\b \\par \\pard \\hyphpar \\keep',
+    'h5': '\\b \\par \\pard \\hyphpar \\keep',
+    'h6': '\\b \\par \\pard \\hyphpar \\keep',
+    'li': '\\par \\pard \\hyphpar \\keep \t',
+    'p': '\\par \\pard \\hyphpar \\keep \t',
+    #'ol': '\\pn \\pnrestart \\pnlvlblt',
+    'sub': '\\sub',
+    'sup': '\\super',
+    'u': '\\ul',
+    #'ul': '\\pn \\pnrestart \\pndec',
+}
+
+SINGLE_TAGS = {
+    'br': '{\\line }',
+    'div': '{\\line }',
+}
+
+STYLES = [
+    ('display', {'block': '\\par \\pard \\hyphpar \\keep'}),
+    ('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
+    ('font-style', {'italic': '\\i'}),
+#    ('page-break-before', {'always': '\\pagebb '}),
+    ('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr', 'justify': '\\qj'}),
+    ('text-decoration', {'line-through': '\\strike', 'underline': '\\ul'}),
+]
+
+BLOCK_TAGS = [
+    'p',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'li',
+]
+
+BLOCK_STYLES = [
+    'block'
+]
+
+'''
+TODO:
+    * Tables
+    * Images
+    * Fonts
+'''
+class RTFMLizer(object):
+    
+    def __init__(self, ignore_tables=False):
+        self.ignore_tables = ignore_tables
+
+    def extract_content(self, oeb_book, opts):
+        oeb_book.logger.info('Converting XHTML to RTF markup...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        return self.mlize_spine()
+
+    def mlize_spine(self):
+        output = self.header()
+        for item in self.oeb_book.spine:
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+        output += self.footer()
+        output = self.clean_text(output)
+
+        return output
+
+    def header(self):
+        return u'{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033'
+
+    def footer(self):
+        return ' }'
+    
+    def clean_text(self, text):
+        # Remove excess spaces at beginning and end of lines
+        text = re.sub('(?m)^[ ]+', '', text)
+        text = re.sub('(?m)[ ]+$', '', text)
+
+        # Remove excessive newlines
+        #text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
+        text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
+
+        # Remove excessive spaces
+        text = re.sub('[ ]{2,}', ' ', text)
+
+        text = re.sub(r'(\{\\line \}){3,}', r'{\\line }{\\line }', text)
+        text = re.sub(r'(\{\\line \})+\{\\par', r'{\\par', text)
+
+        return text
+
+    def dump_text(self, elem, stylizer, tag_stack=[]):
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return u''
+
+        text = u''
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return u''
+
+        tag = barename(elem.tag)
+        tag_count = 0
+        
+        # Are we in a paragraph block?
+        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
+            if 'block' not in tag_stack:
+                tag_count += 1
+                tag_stack.append('block')
+
+        single_tag = SINGLE_TAGS.get(tag, None)
+        if single_tag:
+            text += single_tag
+
+        rtf_tag = TAGS.get(tag, None)
+        if rtf_tag and rtf_tag not in tag_stack:
+            tag_count += 1
+            text += '{%s\n' % rtf_tag
+            tag_stack.append(rtf_tag)
+
+        # Processes style information
+        for s in STYLES:
+            style_tag = s[1].get(style[s[0]], None)
+            if style_tag and style_tag not in tag_stack:
+                tag_count += 1
+                text += '{%s\n' % style_tag
+                tag_stack.append(style_tag)
+
+        # Proccess tags that contain text.
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            text += '%s' % elem.text
+
+        for item in elem:
+            text += self.dump_text(item, stylizer, tag_stack)
+
+        for i in range(0, tag_count):
+            end_tag =  tag_stack.pop()
+            if end_tag != 'block':
+                text += u'}'
+
+        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+            if 'block' in tag_stack:
+                text += '%s ' % elem.tail
+            else:
+                text += '{\\par \\pard \\hyphpar \\keep %s}' % elem.tail
+     
+        return text
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index adf357181c..6afc5452b2 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -9,7 +9,6 @@ import os
 from calibre.customize.conversion import OutputFormatPlugin, \
     OptionRecommendation
 from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
-from calibre.ebooks.metadata import authors_to_string
 
 class TXTOutput(OutputFormatPlugin):
 

From 1fd7c704d22efe67d50b5288131af74666df638b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 26 May 2009 17:41:46 -0400
Subject: [PATCH 03/10] Fix bug in ml classes where some tags were missed.

---
 src/calibre/ebooks/fb2/fb2ml.py | 30 ++++++++--------
 src/calibre/ebooks/pml/pmlml.py | 62 ++++++++++++++++-----------------
 2 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index f10cf95e87..81600b9624 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -98,25 +98,27 @@ class FB2MLizer(object):
             return u''
         
         tag = barename(elem.tag)
+        tag_count = 0
+
         if tag == 'img':
             fb2_text += '<image xlink:herf="#%s" />' % os.path.basename(elem.attrib['src'])
         
-        tag_count = 0
-        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            fb2_tag = TAG_MAP.get(tag, 'p')
-            if fb2_tag and fb2_tag not in tag_stack:
+
+        fb2_tag = TAG_MAP.get(tag, 'p')
+        if fb2_tag and fb2_tag not in tag_stack:
+            tag_count += 1
+            fb2_text += '<%s>' % fb2_tag
+            tag_stack.append(fb2_tag)
+
+        # Processes style information
+        for s in STYLES:
+            style_tag = s[1].get(style[s[0]], None)
+            if style_tag:
                 tag_count += 1
-                fb2_text += '<%s>' % fb2_tag
-                tag_stack.append(fb2_tag)
-
-            # Processes style information
-            for s in STYLES:
-                style_tag = s[1].get(style[s[0]], None)
-                if style_tag:
-                    tag_count += 1
-                    fb2_text += '<%s>' % style_tag
-                    tag_stack.append(style_tag)
+                fb2_text += '<%s>' % style_tag
+                tag_stack.append(style_tag)
 
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
             fb2_text += elem.text
         
         for item in elem:
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 01f777caae..2f2feeb981 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -153,39 +153,39 @@ class PMLMLizer(object):
         #if style['page-break-before'] == 'always':
         #    text += '\\p'
         
+        pml_tag = TAG_MAP.get(tag, None)
+        if pml_tag and pml_tag not in tag_stack:
+            tag_count += 1
+            text += '\\%s' % pml_tag
+            tag_stack.append(pml_tag)
+
+        # Special processing of tags that require an argument.
+        # Anchors links
+        if tag in LINK_TAGS and 'q' not in tag_stack:
+            href = elem.get('href')
+            if href and '://' not in href:
+                if '#' in href:
+                    href = href.partition('#')[2]
+                href = os.path.splitext(os.path.basename(href))[0]
+                tag_count += 1
+                text += '\\q="#%s"' % href
+                tag_stack.append('q')
+        # Anchor ids
+        id_name = elem.get('id')
+        if id_name:
+            text += '\\Q="%s"' % os.path.splitext(id_name)[0]
+
+        # Processes style information
+        for s in STYLES:
+            style_tag = s[1].get(style[s[0]], None)
+            if style_tag and style_tag not in tag_stack:
+                tag_count += 1
+                text += '\\%s' % style_tag
+                tag_stack.append(style_tag)
+        # margin
+
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            pml_tag = TAG_MAP.get(tag, None)
-            if pml_tag and pml_tag not in tag_stack:
-                tag_count += 1
-                text += '\\%s' % pml_tag
-                tag_stack.append(pml_tag)
-                
-            # Special processing of tags that require an argument.
-            # Anchors links
-            if tag in LINK_TAGS and 'q' not in tag_stack:
-                href = elem.get('href')
-                if href and '://' not in href:
-                    if '#' in href:
-                        href = href.partition('#')[2]
-                    href = os.path.splitext(os.path.basename(href))[0]
-                    tag_count += 1
-                    text += '\\q="#%s"' % href
-                    tag_stack.append('q')
-            # Anchor ids
-            id_name = elem.get('id')
-            if id_name:
-                text += '\\Q="%s"' % os.path.splitext(id_name)[0]
-
-            # Processes style information
-            for s in STYLES:
-                style_tag = s[1].get(style[s[0]], None)
-                if style_tag and style_tag not in tag_stack:
-                    tag_count += 1
-                    text += '\\%s' % style_tag
-                    tag_stack.append(style_tag)
-            # margin
-
             text += self.elem_text(elem, tag_stack)
             
         for item in elem:

From 1a42ba554d1d515deaa6931fd12ce8271c53d6e7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 26 May 2009 20:15:20 -0400
Subject: [PATCH 04/10] RTF Output: Image support.

---
 src/calibre/ebooks/rtf/rtfml.py | 52 ++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index ade9291558..3a14c44f8d 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -11,7 +11,11 @@ Transform OEB content into RTF markup
 import os
 import re
 
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+import Image
+import cStringIO
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
+    OEB_IMAGES
 from calibre.ebooks.oeb.stylizer import Stylizer
 
 TAGS = {
@@ -64,7 +68,6 @@ BLOCK_STYLES = [
 '''
 TODO:
     * Tables
-    * Images
     * Fonts
 '''
 class RTFMLizer(object):
@@ -84,6 +87,7 @@ class RTFMLizer(object):
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         output += self.footer()
+        output = self.insert_images(output)
         output = self.clean_text(output)
 
         return output
@@ -93,7 +97,36 @@ class RTFMLizer(object):
 
     def footer(self):
         return ' }'
-    
+
+    def insert_images(self, text):
+        for item in self.oeb_book.manifest:
+            if item.media_type in OEB_IMAGES:
+                src = os.path.basename(item.href)
+                data, width, height = self.image_to_hexstring(item.data)
+                text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, '\n\n{\\*\\shppict{\\pict\\picw%i\\pich%i\\jpegblip \n%s}}\n\n' % (width, height, data))
+        return text
+
+    def image_to_hexstring(self, data):
+        im = Image.open(cStringIO.StringIO(data))
+        data = cStringIO.StringIO()
+        im.save(data, 'JPEG')
+        data = data.getvalue()
+        
+        raw_hex = ''
+        for char in data:
+            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')
+
+        hex_string = ''
+        col = 1
+        for char in raw_hex:
+            if col == 129:
+                hex_string += '\n'
+                col = 1
+            col += 1
+            hex_string += char
+
+        return (hex_string, im.size[0], im.size[1])
+
     def clean_text(self, text):
         # Remove excess spaces at beginning and end of lines
         text = re.sub('(?m)^[ ]+', '', text)
@@ -125,13 +158,24 @@ class RTFMLizer(object):
 
         tag = barename(elem.tag)
         tag_count = 0
-        
+
         # Are we in a paragraph block?
         if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
             if 'block' not in tag_stack:
                 tag_count += 1
                 tag_stack.append('block')
 
+        # Process tags that need special processing and that do not have inner
+        # text. Usually these require an argument
+        if tag == 'img':
+            src = os.path.basename(elem.get('src'))
+            block_start = ''
+            block_end = ''
+            if 'block' not in tag_stack:
+                block_start = '{\\par \\pard \\hyphpar \\keep '
+                block_end = '}'
+            text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
+
         single_tag = SINGLE_TAGS.get(tag, None)
         if single_tag:
             text += single_tag

From 458f02e6fd2392e8cbb7303e0f7c628b46d770b6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 27 May 2009 18:17:09 -0400
Subject: [PATCH 05/10] Comments.

---
 src/calibre/ebooks/rtf/rtfml.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index 3a14c44f8d..be05938f82 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -116,6 +116,8 @@ class RTFMLizer(object):
         for char in data:
             raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')
 
+        # Images must be broken up so that they are no longer than 129 chars
+        # per line
         hex_string = ''
         col = 1
         for char in raw_hex:

From 9f5e16cc128b53fc682d1d8e688412b19dccd94b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 27 May 2009 21:24:45 -0400
Subject: [PATCH 06/10] RTF Output: Rendering tweaks.

---
 src/calibre/ebooks/rtf/rtfml.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index be05938f82..89ec4ea980 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -37,16 +37,19 @@ TAGS = {
 }
 
 SINGLE_TAGS = {
-    'br': '{\\line }',
-    'div': '{\\line }',
+    'br': '\n{\\line }\n',
+    'div': '\n{\\line }\n',
+}
+
+SINGLE_TAGS_END = {
+    'div': '\n{\\line }\n',
 }
 
 STYLES = [
     ('display', {'block': '\\par \\pard \\hyphpar \\keep'}),
     ('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
     ('font-style', {'italic': '\\i'}),
-#    ('page-break-before', {'always': '\\pagebb '}),
-    ('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr', 'justify': '\\qj'}),
+    ('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
     ('text-decoration', {'line-through': '\\strike', 'underline': '\\ul'}),
 ]
 
@@ -141,8 +144,11 @@ class RTFMLizer(object):
         # Remove excessive spaces
         text = re.sub('[ ]{2,}', ' ', text)
 
-        text = re.sub(r'(\{\\line \}){3,}', r'{\\line }{\\line }', text)
-        text = re.sub(r'(\{\\line \})+\{\\par', r'{\\par', text)
+        text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
+        text = re.sub(r'(\{\\line \}\s*)+\{\\par', r'{\\par', text)
+
+        # Remove non-breaking spaces
+        text = text.replace(u'\xa0', ' ')
 
         return text
 
@@ -208,6 +214,10 @@ class RTFMLizer(object):
             if end_tag != 'block':
                 text += u'}'
 
+        single_tag_end = SINGLE_TAGS_END.get(tag, None)
+        if single_tag_end:
+            text += single_tag_end
+
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
             if 'block' in tag_stack:
                 text += '%s ' % elem.tail

From 3d4ae1920acb29a2cf4768182c9eebd56dd4b76e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 28 May 2009 07:12:45 -0400
Subject: [PATCH 07/10] RTF Output: Metadata and more render tweaks.

---
 src/calibre/ebooks/rtf/rtfml.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index 89ec4ea980..78bd96d2d9 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -17,23 +17,22 @@ import cStringIO
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
     OEB_IMAGES
 from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ebooks.metadata import authors_to_string
 
 TAGS = {
     'b': '\\b',
     'del': '\\deleted',
-    'h1': '\\b \\par \\pard \\hyphpar \\keep',
-    'h2': '\\b \\par \\pard \\hyphpar \\keep',
-    'h3': '\\b \\par \\pard \\hyphpar \\keep',
-    'h4': '\\b \\par \\pard \\hyphpar \\keep',
-    'h5': '\\b \\par \\pard \\hyphpar \\keep',
-    'h6': '\\b \\par \\pard \\hyphpar \\keep',
-    'li': '\\par \\pard \\hyphpar \\keep \t',
-    'p': '\\par \\pard \\hyphpar \\keep \t',
-    #'ol': '\\pn \\pnrestart \\pnlvlblt',
+    'h1': '\\b \\par \\pard \\hyphpar',
+    'h2': '\\b \\par \\pard \\hyphpar',
+    'h3': '\\b \\par \\pard \\hyphpar',
+    'h4': '\\b \\par \\pard \\hyphpar',
+    'h5': '\\b \\par \\pard \\hyphpar',
+    'h6': '\\b \\par \\pard \\hyphpar',
+    'li': '\\par \\pard \\hyphpar \t',
+    'p': '\\par \\pard \\hyphpar \t',
     'sub': '\\sub',
     'sup': '\\super',
     'u': '\\ul',
-    #'ul': '\\pn \\pnrestart \\pndec',
 }
 
 SINGLE_TAGS = {
@@ -46,7 +45,7 @@ SINGLE_TAGS_END = {
 }
 
 STYLES = [
-    ('display', {'block': '\\par \\pard \\hyphpar \\keep'}),
+    ('display', {'block': '\\par \\pard \\hyphpar'}),
     ('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
     ('font-style', {'italic': '\\i'}),
     ('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
@@ -96,7 +95,7 @@ class RTFMLizer(object):
         return output
 
     def header(self):
-        return u'{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033'
+        return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
 
     def footer(self):
         return ' }'
@@ -145,7 +144,7 @@ class RTFMLizer(object):
         text = re.sub('[ ]{2,}', ' ', text)
 
         text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
-        text = re.sub(r'(\{\\line \}\s*)+\{\\par', r'{\\par', text)
+        #text = re.compile(r'(\{\\line \}\s*)+(?P<brackets>}*)\s*\{\\par').sub(lambda mo: r'%s{\\par' % mo.group('brackets'), text)
 
         # Remove non-breaking spaces
         text = text.replace(u'\xa0', ' ')
@@ -180,7 +179,7 @@ class RTFMLizer(object):
             block_start = ''
             block_end = ''
             if 'block' not in tag_stack:
-                block_start = '{\\par \\pard \\hyphpar \\keep '
+                block_start = '{\\par \\pard \\hyphpar '
                 block_end = '}'
             text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
 
@@ -222,6 +221,6 @@ class RTFMLizer(object):
             if 'block' in tag_stack:
                 text += '%s ' % elem.tail
             else:
-                text += '{\\par \\pard \\hyphpar \\keep %s}' % elem.tail
+                text += '{\\par \\pard \\hyphpar %s}' % elem.tail
      
         return text

From 1c2f1b0f006a14695a3cf277f609a2f44c7c1a67 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 28 May 2009 08:12:50 -0400
Subject: [PATCH 08/10] RTF Input: Fix bug preventing it from running. RTF
 Output: produce files that can be read by RTF input.

---
 src/calibre/ebooks/rtf/input.py  | 2 +-
 src/calibre/ebooks/rtf/output.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index fce73668a2..22bb5263d5 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -70,7 +70,7 @@ class RTFInput(InputFormatPlugin):
         self.log = log
         self.log('Converting RTF to XML...')
         try:
-            xml = self.generate_xml(stream)
+            xml = self.generate_xml(stream.name)
         except RtfInvalidCodeException:
             raise ValueError(_('This RTF file has a feature calibre does not '
             'support. Convert it to HTML first and then try it.'))
diff --git a/src/calibre/ebooks/rtf/output.py b/src/calibre/ebooks/rtf/output.py
index fab7ecad5d..4ef7706762 100644
--- a/src/calibre/ebooks/rtf/output.py
+++ b/src/calibre/ebooks/rtf/output.py
@@ -30,7 +30,7 @@ class RTFOutput(OutputFormatPlugin):
 
         out_stream.seek(0)
         out_stream.truncate()
-        out_stream.write(content.encode('cp1252', 'replace'))
+        out_stream.write(content.encode('ascii', 'replace'))
 
         if close:
             out_stream.close()

From da140445a0a8f61e752065897b8e18a37a52db30 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 28 May 2009 08:52:53 -0400
Subject: [PATCH 09/10] ml's include cover page if present.

---
 src/calibre/ebooks/fb2/fb2ml.py | 6 ++++++
 src/calibre/ebooks/pml/pmlml.py | 6 ++++++
 src/calibre/ebooks/rb/rbml.py   | 6 ++++++
 src/calibre/ebooks/rtf/rtfml.py | 7 +++++++
 4 files changed, 25 insertions(+)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 81600b9624..3a5806b143 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -44,6 +44,12 @@ class FB2MLizer(object):
         
     def fb2mlize_spine(self):
         output = self.fb2_header()
+        if 'titlepage' in self.oeb_book.guide:
+            href = self.oeb_book.guide['titlepage'].href
+            item = self.oeb_book.manifest.hrefs[href]
+            if item.spine_position is None:
+                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         for item in self.oeb_book.spine:
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer)
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 2f2feeb981..ef735a56b1 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -78,6 +78,12 @@ class PMLMLizer(object):
         
     def pmlmlize_spine(self):
         output = u''
+        if 'titlepage' in self.oeb_book.guide:
+            href = self.oeb_book.guide['titlepage'].href
+            item = self.oeb_book.manifest.hrefs[href]
+            if item.spine_position is None:
+                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         for item in self.oeb_book.spine:
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             output += self.add_page_anchor(item.href)
diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py
index f18803e8d0..3563ba2538 100644
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@@ -65,6 +65,12 @@ class RBMLizer(object):
 
     def mlize_spine(self):
         output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
+        if 'titlepage' in self.oeb_book.guide:
+            href = self.oeb_book.guide['titlepage'].href
+            item = self.oeb_book.manifest.hrefs[href]
+            if item.spine_position is None:
+                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         for item in self.oeb_book.spine:
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             output += self.add_page_anchor(item.href)
diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index 78bd96d2d9..3ed855adb8 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -85,6 +85,13 @@ class RTFMLizer(object):
 
     def mlize_spine(self):
         output = self.header()
+        if 'titlepage' in self.oeb_book.guide:
+            href = self.oeb_book.guide['titlepage'].href
+            item = self.oeb_book.manifest.hrefs[href]
+            if item.spine_position is None:
+                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+                output += '{\\page } '
         for item in self.oeb_book.spine:
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer)

From 08af3996bf071701c96ae6427b14b2d7b6ca33b6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 28 May 2009 08:57:29 -0400
Subject: [PATCH 10/10] RTF Output: ensure proper line breaks.

---
 src/calibre/ebooks/rtf/rtfml.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index 3ed855adb8..cb8e9af883 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -112,7 +112,7 @@ class RTFMLizer(object):
             if item.media_type in OEB_IMAGES:
                 src = os.path.basename(item.href)
                 data, width, height = self.image_to_hexstring(item.data)
-                text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, '\n\n{\\*\\shppict{\\pict\\picw%i\\pich%i\\jpegblip \n%s}}\n\n' % (width, height, data))
+                text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, '\n\n{\\*\\shppict{\\pict\\picw%i\\pich%i\\jpegblip \n%s\n}}\n\n' % (width, height, data))
         return text
 
     def image_to_hexstring(self, data):
@@ -155,6 +155,7 @@ class RTFMLizer(object):
 
         # Remove non-breaking spaces
         text = text.replace(u'\xa0', ' ')
+        text = text.replace('\n\r', '\n')
 
         return text