mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Plugin for RTF input
This commit is contained in:
parent
c7498b0d50
commit
5c5a4d8676
@ -283,6 +283,7 @@ from calibre.ebooks.txt.input import TXTInput
|
|||||||
from calibre.ebooks.lit.input import LITInput
|
from calibre.ebooks.lit.input import LITInput
|
||||||
from calibre.ebooks.fb2.input import FB2Input
|
from calibre.ebooks.fb2.input import FB2Input
|
||||||
from calibre.ebooks.odt.input import ODTInput
|
from calibre.ebooks.odt.input import ODTInput
|
||||||
|
from calibre.ebooks.rtf.input import RTFInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
@ -291,7 +292,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
|||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
||||||
FB2Input, ODTInput]
|
FB2Input, ODTInput, RTFInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -19,11 +19,6 @@ from calibre.utils.zipfile import ZipFile
|
|||||||
from calibre.customize.ui import run_plugins_on_preprocess
|
from calibre.customize.ui import run_plugins_on_preprocess
|
||||||
|
|
||||||
|
|
||||||
def rtf2opf(path, tdir, opts):
|
|
||||||
from calibre.ebooks.lrf.rtf.convert_from import generate_html
|
|
||||||
generate_html(path, tdir)
|
|
||||||
return os.path.join(tdir, 'metadata.opf')
|
|
||||||
|
|
||||||
def epub2opf(path, tdir, opts):
|
def epub2opf(path, tdir, opts):
|
||||||
zf = ZipFile(path)
|
zf = ZipFile(path)
|
||||||
zf.extractall(tdir)
|
zf.extractall(tdir)
|
||||||
@ -42,11 +37,6 @@ def epub2opf(path, tdir, opts):
|
|||||||
raise ValueError('%s is not a valid EPUB file'%path)
|
raise ValueError('%s is not a valid EPUB file'%path)
|
||||||
return opf
|
return opf
|
||||||
|
|
||||||
def odt2epub(path, tdir, opts):
|
|
||||||
from calibre.ebooks.odt.to_oeb import Extract
|
|
||||||
opts.encoding = 'utf-8'
|
|
||||||
return Extract()(path, tdir)
|
|
||||||
|
|
||||||
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
|
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
|
||||||
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
|
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
|
||||||
|
|
||||||
|
@ -1,190 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
import os, sys, shutil, logging, glob
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
|
|
||||||
from calibre import setup_cli_handlers
|
|
||||||
from calibre.libwand import convert, WandException
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
from calibre.ebooks.lrf.rtf.xsl import xhtml
|
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.ebooks.metadata.opf import OPFCreator
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = lrf_option_parser(
|
|
||||||
_('''%prog [options] mybook.rtf
|
|
||||||
|
|
||||||
|
|
||||||
%prog converts mybook.rtf to mybook.lrf''')
|
|
||||||
)
|
|
||||||
parser.add_option('--keep-intermediate-files', action='store_true', default=False)
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def convert_images(html, logger):
|
|
||||||
wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
|
|
||||||
for wmf in wmfs:
|
|
||||||
target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
|
|
||||||
try:
|
|
||||||
convert(wmf, target)
|
|
||||||
html = html.replace(os.path.basename(wmf), os.path.basename(target))
|
|
||||||
except WandException, err:
|
|
||||||
logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
|
|
||||||
continue
|
|
||||||
return html
|
|
||||||
|
|
||||||
def process_file(path, options, logger=None):
|
|
||||||
if logger is None:
|
|
||||||
level = logging.DEBUG if options.verbose else logging.INFO
|
|
||||||
logger = logging.getLogger('rtf2lrf')
|
|
||||||
setup_cli_handlers(logger, level)
|
|
||||||
rtf = os.path.abspath(os.path.expanduser(path))
|
|
||||||
f = open(rtf, 'rb')
|
|
||||||
mi = get_metadata(f, 'rtf')
|
|
||||||
f.close()
|
|
||||||
tdir = PersistentTemporaryDirectory('_rtf2lrf')
|
|
||||||
html = generate_html(rtf, tdir)
|
|
||||||
cwd = os.getcwdu()
|
|
||||||
try:
|
|
||||||
if not options.output:
|
|
||||||
ext = '.lrs' if options.lrs else '.lrf'
|
|
||||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
|
||||||
options.output = os.path.abspath(os.path.expanduser(options.output))
|
|
||||||
if not mi.title:
|
|
||||||
mi.title = os.path.splitext(os.path.basename(rtf))[0]
|
|
||||||
if (not options.title or options.title == 'Unknown'):
|
|
||||||
options.title = mi.title
|
|
||||||
if (not options.author or options.author == 'Unknown') and mi.author:
|
|
||||||
options.author = mi.author
|
|
||||||
if (not options.category or options.category == 'Unknown') and mi.category:
|
|
||||||
options.category = mi.category
|
|
||||||
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
|
|
||||||
options.freetext = mi.comments
|
|
||||||
os.chdir(tdir)
|
|
||||||
html_process_file(html, options, logger)
|
|
||||||
finally:
|
|
||||||
os.chdir(cwd)
|
|
||||||
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
|
|
||||||
logger.debug('Intermediate files in '+ tdir)
|
|
||||||
else:
|
|
||||||
shutil.rmtree(tdir)
|
|
||||||
|
|
||||||
def main(args=sys.argv, logger=None):
|
|
||||||
parser = option_parser()
|
|
||||||
options, args = parser.parse_args(args)
|
|
||||||
if len(args) != 2:
|
|
||||||
parser.print_help()
|
|
||||||
print
|
|
||||||
print 'No rtf file specified'
|
|
||||||
return 1
|
|
||||||
process_file(args[1], options, logger)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def generate_xml(rtfpath, tdir):
|
|
||||||
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
|
||||||
ofile = os.path.join(tdir, 'index.xml')
|
|
||||||
cwd = os.getcwdu()
|
|
||||||
os.chdir(tdir)
|
|
||||||
rtfpath = os.path.abspath(rtfpath)
|
|
||||||
try:
|
|
||||||
parser = ParseRtf(
|
|
||||||
in_file = rtfpath,
|
|
||||||
out_file = ofile,
|
|
||||||
# Convert symbol fonts to unicode equivelents. Default
|
|
||||||
# is 1
|
|
||||||
convert_symbol = 1,
|
|
||||||
|
|
||||||
# Convert Zapf fonts to unicode equivelents. Default
|
|
||||||
# is 1.
|
|
||||||
convert_zapf = 1,
|
|
||||||
|
|
||||||
# Convert Wingding fonts to unicode equivelents.
|
|
||||||
# Default is 1.
|
|
||||||
convert_wingdings = 1,
|
|
||||||
|
|
||||||
# Convert RTF caps to real caps.
|
|
||||||
# Default is 1.
|
|
||||||
convert_caps = 1,
|
|
||||||
|
|
||||||
# Indent resulting XML.
|
|
||||||
# Default is 0 (no indent).
|
|
||||||
indent = 1,
|
|
||||||
|
|
||||||
# Form lists from RTF. Default is 1.
|
|
||||||
form_lists = 1,
|
|
||||||
|
|
||||||
# Convert headings to sections. Default is 0.
|
|
||||||
headings_to_sections = 1,
|
|
||||||
|
|
||||||
# Group paragraphs with the same style name. Default is 1.
|
|
||||||
group_styles = 1,
|
|
||||||
|
|
||||||
# Group borders. Default is 1.
|
|
||||||
group_borders = 1,
|
|
||||||
|
|
||||||
# Write or do not write paragraphs. Default is 0.
|
|
||||||
empty_paragraphs = 0,
|
|
||||||
)
|
|
||||||
parser.parse_rtf()
|
|
||||||
finally:
|
|
||||||
os.chdir(cwd)
|
|
||||||
return ofile
|
|
||||||
|
|
||||||
|
|
||||||
def generate_html(rtfpath, tdir):
|
|
||||||
print 'Converting RTF to XML...'
|
|
||||||
rtfpath = os.path.abspath(rtfpath)
|
|
||||||
try:
|
|
||||||
xml = generate_xml(rtfpath, tdir)
|
|
||||||
except RtfInvalidCodeException:
|
|
||||||
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
|
|
||||||
tdir = os.path.dirname(xml)
|
|
||||||
cwd = os.getcwdu()
|
|
||||||
os.chdir(tdir)
|
|
||||||
try:
|
|
||||||
print 'Parsing XML...'
|
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
|
||||||
try:
|
|
||||||
doc = etree.parse(xml, parser)
|
|
||||||
except:
|
|
||||||
raise
|
|
||||||
print 'Parsing failed. Trying to clean up XML...'
|
|
||||||
soup = BeautifulStoneSoup(open(xml, 'rb').read())
|
|
||||||
doc = etree.fromstring(str(soup))
|
|
||||||
print 'Converting XML to HTML...'
|
|
||||||
styledoc = etree.fromstring(xhtml)
|
|
||||||
|
|
||||||
transform = etree.XSLT(styledoc)
|
|
||||||
result = transform(doc)
|
|
||||||
tdir = os.path.dirname(xml)
|
|
||||||
html = os.path.join(tdir, 'index.html')
|
|
||||||
f = open(html, 'wb')
|
|
||||||
res = transform.tostring(result)
|
|
||||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
|
||||||
f.write(res)
|
|
||||||
f.close()
|
|
||||||
try:
|
|
||||||
mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
|
|
||||||
except:
|
|
||||||
mi = MetaInformation(None, None)
|
|
||||||
if not mi.title:
|
|
||||||
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
|
|
||||||
if not mi.authors:
|
|
||||||
mi.authors = [_('Unknown')]
|
|
||||||
opf = OPFCreator(tdir, mi)
|
|
||||||
opf.create_manifest([('index.html', None)])
|
|
||||||
opf.create_spine(['index.html'])
|
|
||||||
opf.render(open('metadata.opf', 'wb'))
|
|
||||||
finally:
|
|
||||||
os.chdir(cwd)
|
|
||||||
return html
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
||||||
|
|
101
src/calibre/ebooks/rtf/input.py
Normal file
101
src/calibre/ebooks/rtf/input.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class RTFInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'RTF Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert RTF files to HTML'
|
||||||
|
file_types = set(['rtf'])
|
||||||
|
|
||||||
|
def generate_xml(self, stream):
|
||||||
|
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
|
||||||
|
ofile = 'out.xml'
|
||||||
|
parser = ParseRtf(
|
||||||
|
in_file = stream,
|
||||||
|
out_file = ofile,
|
||||||
|
# Convert symbol fonts to unicode equivelents. Default
|
||||||
|
# is 1
|
||||||
|
convert_symbol = 1,
|
||||||
|
|
||||||
|
# Convert Zapf fonts to unicode equivelents. Default
|
||||||
|
# is 1.
|
||||||
|
convert_zapf = 1,
|
||||||
|
|
||||||
|
# Convert Wingding fonts to unicode equivelents.
|
||||||
|
# Default is 1.
|
||||||
|
convert_wingdings = 1,
|
||||||
|
|
||||||
|
# Convert RTF caps to real caps.
|
||||||
|
# Default is 1.
|
||||||
|
convert_caps = 1,
|
||||||
|
|
||||||
|
# Indent resulting XML.
|
||||||
|
# Default is 0 (no indent).
|
||||||
|
indent = 1,
|
||||||
|
|
||||||
|
# Form lists from RTF. Default is 1.
|
||||||
|
form_lists = 1,
|
||||||
|
|
||||||
|
# Convert headings to sections. Default is 0.
|
||||||
|
headings_to_sections = 1,
|
||||||
|
|
||||||
|
# Group paragraphs with the same style name. Default is 1.
|
||||||
|
group_styles = 1,
|
||||||
|
|
||||||
|
# Group borders. Default is 1.
|
||||||
|
group_borders = 1,
|
||||||
|
|
||||||
|
# Write or do not write paragraphs. Default is 0.
|
||||||
|
empty_paragraphs = 0,
|
||||||
|
)
|
||||||
|
parser.parse_rtf()
|
||||||
|
ans = open('out.xml').read()
|
||||||
|
os.remove('out.xml')
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from calibre.ebooks.rtf.xsl import xhtml
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||||
|
self.log = log
|
||||||
|
self.log('Converting RTF to XML...')
|
||||||
|
try:
|
||||||
|
xml = self.generate_xml(stream)
|
||||||
|
except RtfInvalidCodeException:
|
||||||
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
|
'support. Convert it to HTML first and then try it.'))
|
||||||
|
self.log('Parsing XML...')
|
||||||
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
self.log('Converting XML to HTML...')
|
||||||
|
styledoc = etree.fromstring(xhtml)
|
||||||
|
|
||||||
|
transform = etree.XSLT(styledoc)
|
||||||
|
result = transform(doc)
|
||||||
|
html = 'index.xhtml'
|
||||||
|
with open(html, 'wb') as f:
|
||||||
|
res = transform.tostring(result)
|
||||||
|
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
|
f.write(res)
|
||||||
|
stream.seek(0)
|
||||||
|
mi = get_metadata(stream, 'rtf')
|
||||||
|
if not mi.title:
|
||||||
|
mi.title = _('Unknown')
|
||||||
|
if not mi.authors:
|
||||||
|
mi.authors = [_('Unknown')]
|
||||||
|
opf = OPFCreator(os.getcwd(), mi)
|
||||||
|
opf.create_manifest([('index.xhtml', None)])
|
||||||
|
opf.create_spine(['index.xhtml'])
|
||||||
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
@ -152,6 +152,7 @@ class ParseRtf:
|
|||||||
|
|
||||||
def __check_file(self, the_file, type):
|
def __check_file(self, the_file, type):
|
||||||
"""Check to see if files exist"""
|
"""Check to see if files exist"""
|
||||||
|
if hasattr(the_file, 'read'): return
|
||||||
if the_file == None:
|
if the_file == None:
|
||||||
if type == "file_to_parse":
|
if type == "file_to_parse":
|
||||||
message = "You must provide a file for the script to work"
|
message = "You must provide a file for the script to work"
|
||||||
@ -545,13 +546,12 @@ class ParseRtf:
|
|||||||
def __make_temp_file(self,file):
|
def __make_temp_file(self,file):
|
||||||
"""Make a temporary file to parse"""
|
"""Make a temporary file to parse"""
|
||||||
write_file="rtf_write_file"
|
write_file="rtf_write_file"
|
||||||
read_obj = open(file,'r')
|
read_obj = file if hasattr(file, 'read') else open(file,'r')
|
||||||
write_obj = open(write_file, 'w')
|
write_obj = open(write_file, 'w')
|
||||||
line = "dummy"
|
line = "dummy"
|
||||||
while line:
|
while line:
|
||||||
line = read_obj.read(1000)
|
line = read_obj.read(1000)
|
||||||
write_obj.write(line )
|
write_obj.write(line )
|
||||||
read_obj.close()
|
|
||||||
write_obj.close()
|
write_obj.close()
|
||||||
return write_file
|
return write_file
|
||||||
"""
|
"""
|
||||||
|
@ -58,10 +58,12 @@ class Pict:
|
|||||||
return line[18:]
|
return line[18:]
|
||||||
def __make_dir(self):
|
def __make_dir(self):
|
||||||
""" Make a dirctory to put the image data in"""
|
""" Make a dirctory to put the image data in"""
|
||||||
base_name = os.path.basename(self.__orig_file)
|
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
||||||
|
self.__orig_file))
|
||||||
base_name = os.path.splitext(base_name)[0]
|
base_name = os.path.splitext(base_name)[0]
|
||||||
if self.__out_file:
|
if self.__out_file:
|
||||||
dir_name = os.path.dirname(self.__out_file)
|
dir_name = os.path.dirname(getattr(self.__out_file, 'name',
|
||||||
|
self.__out_file))
|
||||||
else:
|
else:
|
||||||
dir_name = os.path.dirname(self.__orig_file)
|
dir_name = os.path.dirname(self.__orig_file)
|
||||||
# self.__output_to_file_func()
|
# self.__output_to_file_func()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user