RTF Input: Add option to ignore WMF images

RTF Input: Add option to ignore WMF images iinstead of replacing them
with a placeholder. Fixes #1213599 [RTF input conversion creates too large an image when failing to convert .wmf file](https://bugs.launchpad.net/calibre/+bug/1213599)
This commit is contained in:
Kovid Goyal 2013-08-19 09:43:17 +05:30
parent 6c7ff4e4e6
commit 079c685f1a
3 changed files with 101 additions and 19 deletions

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, glob, re, textwrap
from calibre.customize.conversion import InputFormatPlugin
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
border_style_map = {
'single' : 'solid',
@ -45,6 +45,11 @@ class RTFInput(InputFormatPlugin):
description = 'Convert RTF files to HTML'
file_types = set(['rtf'])
options = {
OptionRecommendation(name='ignore_wmf', recommended_value=False,
help=_('Ignore WMF images instead of replacing them with a placeholder image.')),
}
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = u'dataxml.xml'
@ -59,51 +64,51 @@ class RTFInput(InputFormatPlugin):
except:
self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf(
in_file = stream,
out_file = ofile,
in_file=stream,
out_file=ofile,
# Convert symbol fonts to unicode equivalents. Default
# is 1
convert_symbol = 1,
convert_symbol=1,
# Convert Zapf fonts to unicode equivalents. Default
# is 1.
convert_zapf = 1,
convert_zapf=1,
# Convert Wingding fonts to unicode equivalents.
# Default is 1.
convert_wingdings = 1,
convert_wingdings=1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
convert_caps=1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = indent_out,
indent=indent_out,
# Form lists from RTF. Default is 1.
form_lists = 1,
form_lists=1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
headings_to_sections=1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
group_styles=1,
# Group borders. Default is 1.
group_borders = 1,
group_borders=1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1,
empty_paragraphs=1,
# Debug
deb_dir = debug_dir,
deb_dir=debug_dir,
# Default encoding
default_encoding = getattr(self.opts, 'input_encoding', 'cp1252') or 'cp1252',
default_encoding=getattr(self.opts, 'input_encoding', 'cp1252') or 'cp1252',
# Run level
run_level = run_lev,
run_level=run_lev,
)
parser.parse_rtf()
with open(ofile, 'rb') as f:
@ -156,6 +161,9 @@ class RTFInput(InputFormatPlugin):
return self.replace_wmf(name)
def replace_wmf(self, name):
if self.opts.ignore_wmf:
os.remove(name)
return '__REMOVE_ME__'
from calibre.ebooks import calibre_cover
if self.default_img is None:
self.default_img = calibre_cover('Conversion of WMF images is not supported',
@ -177,7 +185,6 @@ class RTFInput(InputFormatPlugin):
f.write(data)
return name
def write_inline_css(self, ic, border_styles):
font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
enumerate(ic.font_sizes)]
@ -273,14 +280,14 @@ class RTFInput(InputFormatPlugin):
self.log('Converting XML to HTML...')
inline_class = InlineClass(self.log)
styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
extensions = { ('calibre', 'inline-class') : inline_class }
extensions = {('calibre', 'inline-class') : inline_class}
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
html = u'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
# clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
# res = re.sub('\s*<body>', '<body>', res)
@ -300,4 +307,15 @@ class RTFInput(InputFormatPlugin):
opf.render(open(u'metadata.opf', 'wb'))
return os.path.abspath(u'metadata.opf')
def postprocess_book(self, oeb, opts, log):
for item in oeb.spine:
for img in item.data.xpath('//*[local-name()="img" and @src="__REMOVE_ME__"]'):
p = img.getparent()
idx = p.index(img)
p.remove(img)
if img.tail:
if idx == 0:
p.text = (p.text or '') + img.tail
else:
p[idx-1].tail = (p[idx-1].tail or '') + img.tail

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.gui2.convert.rtf_input_ui import Ui_Form
from calibre.gui2.convert import Widget
class PluginWidget(Widget, Ui_Form):
TITLE = _('RTF Input')
HELP = _('Options specific to')+' RTF '+_('input')
COMMIT_NAME = 'rtf_input'
ICON = I('mimetypes/rtf.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['ignore_wmf', ])
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>518</width>
<height>353</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<widget class="QCheckBox" name="opt_ignore_wmf">
<property name="text">
<string>Ignore &amp;WMF images in the RTF file</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>213</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>