mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Plugin for RTF input
This commit is contained in:
parent
c7498b0d50
commit
5c5a4d8676
@ -283,6 +283,7 @@ from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lit.input import LITInput
|
||||
from calibre.ebooks.fb2.input import FB2Input
|
||||
from calibre.ebooks.odt.input import ODTInput
|
||||
from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
@ -291,7 +292,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
||||
FB2Input, ODTInput]
|
||||
FB2Input, ODTInput, RTFInput]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
|
@ -19,11 +19,6 @@ from calibre.utils.zipfile import ZipFile
|
||||
from calibre.customize.ui import run_plugins_on_preprocess
|
||||
|
||||
|
||||
def rtf2opf(path, tdir, opts):
|
||||
from calibre.ebooks.lrf.rtf.convert_from import generate_html
|
||||
generate_html(path, tdir)
|
||||
return os.path.join(tdir, 'metadata.opf')
|
||||
|
||||
def epub2opf(path, tdir, opts):
|
||||
zf = ZipFile(path)
|
||||
zf.extractall(tdir)
|
||||
@ -42,11 +37,6 @@ def epub2opf(path, tdir, opts):
|
||||
raise ValueError('%s is not a valid EPUB file'%path)
|
||||
return opf
|
||||
|
||||
def odt2epub(path, tdir, opts):
|
||||
from calibre.ebooks.odt.to_oeb import Extract
|
||||
opts.encoding = 'utf-8'
|
||||
return Extract()(path, tdir)
|
||||
|
||||
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
|
||||
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
|
||||
|
||||
|
@ -1,190 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import os, sys, shutil, logging, glob
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||
from calibre import setup_cli_handlers
|
||||
from calibre.libwand import convert, WandException
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.ebooks.lrf.rtf.xsl import xhtml
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf import OPFCreator
|
||||
|
||||
def option_parser():
|
||||
parser = lrf_option_parser(
|
||||
_('''%prog [options] mybook.rtf
|
||||
|
||||
|
||||
%prog converts mybook.rtf to mybook.lrf''')
|
||||
)
|
||||
parser.add_option('--keep-intermediate-files', action='store_true', default=False)
|
||||
return parser
|
||||
|
||||
def convert_images(html, logger):
|
||||
wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
|
||||
for wmf in wmfs:
|
||||
target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
|
||||
try:
|
||||
convert(wmf, target)
|
||||
html = html.replace(os.path.basename(wmf), os.path.basename(target))
|
||||
except WandException, err:
|
||||
logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
|
||||
continue
|
||||
return html
|
||||
|
||||
def process_file(path, options, logger=None):
|
||||
if logger is None:
|
||||
level = logging.DEBUG if options.verbose else logging.INFO
|
||||
logger = logging.getLogger('rtf2lrf')
|
||||
setup_cli_handlers(logger, level)
|
||||
rtf = os.path.abspath(os.path.expanduser(path))
|
||||
f = open(rtf, 'rb')
|
||||
mi = get_metadata(f, 'rtf')
|
||||
f.close()
|
||||
tdir = PersistentTemporaryDirectory('_rtf2lrf')
|
||||
html = generate_html(rtf, tdir)
|
||||
cwd = os.getcwdu()
|
||||
try:
|
||||
if not options.output:
|
||||
ext = '.lrs' if options.lrs else '.lrf'
|
||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||
if not mi.title:
|
||||
mi.title = os.path.splitext(os.path.basename(rtf))[0]
|
||||
if (not options.title or options.title == 'Unknown'):
|
||||
options.title = mi.title
|
||||
if (not options.author or options.author == 'Unknown') and mi.author:
|
||||
options.author = mi.author
|
||||
if (not options.category or options.category == 'Unknown') and mi.category:
|
||||
options.category = mi.category
|
||||
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
|
||||
options.freetext = mi.comments
|
||||
os.chdir(tdir)
|
||||
html_process_file(html, options, logger)
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
|
||||
logger.debug('Intermediate files in '+ tdir)
|
||||
else:
|
||||
shutil.rmtree(tdir)
|
||||
|
||||
def main(args=sys.argv, logger=None):
|
||||
parser = option_parser()
|
||||
options, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
print
|
||||
print 'No rtf file specified'
|
||||
return 1
|
||||
process_file(args[1], options, logger)
|
||||
return 0
|
||||
|
||||
|
||||
def generate_xml(rtfpath, tdir):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = os.path.join(tdir, 'index.xml')
|
||||
cwd = os.getcwdu()
|
||||
os.chdir(tdir)
|
||||
rtfpath = os.path.abspath(rtfpath)
|
||||
try:
|
||||
parser = ParseRtf(
|
||||
in_file = rtfpath,
|
||||
out_file = ofile,
|
||||
# Convert symbol fonts to unicode equivelents. Default
|
||||
# is 1
|
||||
convert_symbol = 1,
|
||||
|
||||
# Convert Zapf fonts to unicode equivelents. Default
|
||||
# is 1.
|
||||
convert_zapf = 1,
|
||||
|
||||
# Convert Wingding fonts to unicode equivelents.
|
||||
# Default is 1.
|
||||
convert_wingdings = 1,
|
||||
|
||||
# Convert RTF caps to real caps.
|
||||
# Default is 1.
|
||||
convert_caps = 1,
|
||||
|
||||
# Indent resulting XML.
|
||||
# Default is 0 (no indent).
|
||||
indent = 1,
|
||||
|
||||
# Form lists from RTF. Default is 1.
|
||||
form_lists = 1,
|
||||
|
||||
# Convert headings to sections. Default is 0.
|
||||
headings_to_sections = 1,
|
||||
|
||||
# Group paragraphs with the same style name. Default is 1.
|
||||
group_styles = 1,
|
||||
|
||||
# Group borders. Default is 1.
|
||||
group_borders = 1,
|
||||
|
||||
# Write or do not write paragraphs. Default is 0.
|
||||
empty_paragraphs = 0,
|
||||
)
|
||||
parser.parse_rtf()
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
return ofile
|
||||
|
||||
|
||||
def generate_html(rtfpath, tdir):
|
||||
print 'Converting RTF to XML...'
|
||||
rtfpath = os.path.abspath(rtfpath)
|
||||
try:
|
||||
xml = generate_xml(rtfpath, tdir)
|
||||
except RtfInvalidCodeException:
|
||||
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
|
||||
tdir = os.path.dirname(xml)
|
||||
cwd = os.getcwdu()
|
||||
os.chdir(tdir)
|
||||
try:
|
||||
print 'Parsing XML...'
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
try:
|
||||
doc = etree.parse(xml, parser)
|
||||
except:
|
||||
raise
|
||||
print 'Parsing failed. Trying to clean up XML...'
|
||||
soup = BeautifulStoneSoup(open(xml, 'rb').read())
|
||||
doc = etree.fromstring(str(soup))
|
||||
print 'Converting XML to HTML...'
|
||||
styledoc = etree.fromstring(xhtml)
|
||||
|
||||
transform = etree.XSLT(styledoc)
|
||||
result = transform(doc)
|
||||
tdir = os.path.dirname(xml)
|
||||
html = os.path.join(tdir, 'index.html')
|
||||
f = open(html, 'wb')
|
||||
res = transform.tostring(result)
|
||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
f.write(res)
|
||||
f.close()
|
||||
try:
|
||||
mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
|
||||
except:
|
||||
mi = MetaInformation(None, None)
|
||||
if not mi.title:
|
||||
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(tdir, mi)
|
||||
opf.create_manifest([('index.html', None)])
|
||||
opf.create_spine(['index.html'])
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
return html
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
101
src/calibre/ebooks/rtf/input.py
Normal file
101
src/calibre/ebooks/rtf/input.py
Normal file
@ -0,0 +1,101 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class RTFInput(InputFormatPlugin):
|
||||
|
||||
name = 'RTF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert RTF files to HTML'
|
||||
file_types = set(['rtf'])
|
||||
|
||||
def generate_xml(self, stream):
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||
ofile = 'out.xml'
|
||||
parser = ParseRtf(
|
||||
in_file = stream,
|
||||
out_file = ofile,
|
||||
# Convert symbol fonts to unicode equivelents. Default
|
||||
# is 1
|
||||
convert_symbol = 1,
|
||||
|
||||
# Convert Zapf fonts to unicode equivelents. Default
|
||||
# is 1.
|
||||
convert_zapf = 1,
|
||||
|
||||
# Convert Wingding fonts to unicode equivelents.
|
||||
# Default is 1.
|
||||
convert_wingdings = 1,
|
||||
|
||||
# Convert RTF caps to real caps.
|
||||
# Default is 1.
|
||||
convert_caps = 1,
|
||||
|
||||
# Indent resulting XML.
|
||||
# Default is 0 (no indent).
|
||||
indent = 1,
|
||||
|
||||
# Form lists from RTF. Default is 1.
|
||||
form_lists = 1,
|
||||
|
||||
# Convert headings to sections. Default is 0.
|
||||
headings_to_sections = 1,
|
||||
|
||||
# Group paragraphs with the same style name. Default is 1.
|
||||
group_styles = 1,
|
||||
|
||||
# Group borders. Default is 1.
|
||||
group_borders = 1,
|
||||
|
||||
# Write or do not write paragraphs. Default is 0.
|
||||
empty_paragraphs = 0,
|
||||
)
|
||||
parser.parse_rtf()
|
||||
ans = open('out.xml').read()
|
||||
os.remove('out.xml')
|
||||
return ans
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.rtf.xsl import xhtml
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata.opf import OPFCreator
|
||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||
self.log = log
|
||||
self.log('Converting RTF to XML...')
|
||||
try:
|
||||
xml = self.generate_xml(stream)
|
||||
except RtfInvalidCodeException:
|
||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||
'support. Convert it to HTML first and then try it.'))
|
||||
self.log('Parsing XML...')
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
self.log('Converting XML to HTML...')
|
||||
styledoc = etree.fromstring(xhtml)
|
||||
|
||||
transform = etree.XSLT(styledoc)
|
||||
result = transform(doc)
|
||||
html = 'index.xhtml'
|
||||
with open(html, 'wb') as f:
|
||||
res = transform.tostring(result)
|
||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
f.write(res)
|
||||
stream.seek(0)
|
||||
mi = get_metadata(stream, 'rtf')
|
||||
if not mi.title:
|
||||
mi.title = _('Unknown')
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(os.getcwd(), mi)
|
||||
opf.create_manifest([('index.xhtml', None)])
|
||||
opf.create_spine(['index.xhtml'])
|
||||
opf.render(open('metadata.opf', 'wb'))
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
@ -149,9 +149,10 @@ class ParseRtf:
|
||||
self.__group_borders = group_borders
|
||||
self.__empty_paragraphs = empty_paragraphs
|
||||
self.__no_dtd = no_dtd
|
||||
|
||||
|
||||
def __check_file(self, the_file, type):
|
||||
"""Check to see if files exist"""
|
||||
if hasattr(the_file, 'read'): return
|
||||
if the_file == None:
|
||||
if type == "file_to_parse":
|
||||
message = "You must provide a file for the script to work"
|
||||
@ -545,13 +546,12 @@ class ParseRtf:
|
||||
def __make_temp_file(self,file):
|
||||
"""Make a temporary file to parse"""
|
||||
write_file="rtf_write_file"
|
||||
read_obj = open(file,'r')
|
||||
read_obj = file if hasattr(file, 'read') else open(file,'r')
|
||||
write_obj = open(write_file, 'w')
|
||||
line = "dummy"
|
||||
while line:
|
||||
line = read_obj.read(1000)
|
||||
write_obj.write(line )
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
return write_file
|
||||
"""
|
||||
|
@ -58,10 +58,12 @@ class Pict:
|
||||
return line[18:]
|
||||
def __make_dir(self):
|
||||
""" Make a dirctory to put the image data in"""
|
||||
base_name = os.path.basename(self.__orig_file)
|
||||
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
||||
self.__orig_file))
|
||||
base_name = os.path.splitext(base_name)[0]
|
||||
if self.__out_file:
|
||||
dir_name = os.path.dirname(self.__out_file)
|
||||
dir_name = os.path.dirname(getattr(self.__out_file, 'name',
|
||||
self.__out_file))
|
||||
else:
|
||||
dir_name = os.path.dirname(self.__orig_file)
|
||||
# self.__output_to_file_func()
|
||||
|
Loading…
x
Reference in New Issue
Block a user