From d6bfea668477813689a2004d88ffe7e9bc3aa818 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 10 Dec 2007 02:02:32 +0000 Subject: [PATCH] Switch to rtf2xml from unrtf as the RTF conversion engine. --- Makefile | 3 + osx_installer.py | 5 +- src/libprs500/__init__.py | 9 + src/libprs500/ebooks/lrf/rtf/convert_from.py | 102 +++- src/libprs500/ebooks/lrf/rtf/xsl.py | 538 +++++++++++++++++++ src/libprs500/gui2/dialogs/lrf_single.py | 12 +- src/libprs500/trac/download/download.py | 2 +- windows_installer.py | 3 +- 8 files changed, 663 insertions(+), 11 deletions(-) create mode 100644 src/libprs500/ebooks/lrf/rtf/xsl.py diff --git a/Makefile b/Makefile index 6210b41c95..d8e4a4622d 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ clean : gui2 : cd src/libprs500/gui2 && python make.py +test : gui2 + cd src/libprs500/gui2 && python make.py test + translations : cd src/libprs500 && python translations/__init__.py diff --git a/osx_installer.py b/osx_installer.py index 40d5fd91e6..df111fc1cb 100644 --- a/osx_installer.py +++ b/osx_installer.py @@ -233,8 +233,9 @@ setup( 'argv_emulation' : True, 'iconfile' : 'icons/library.icns', 'frameworks': ['libusb.dylib', 'libunrar.dylib'], - 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg', 'mechanize', 'ClientForm'], - 'packages' : ['PIL', 'Authorization',], + 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg', + 'mechanize', 'ClientForm'], + 'packages' : ['PIL', 'Authorization', 'rtf2xml', 'lxml'], 'excludes' : ['pydoc'], 'plist' : { 'CFBundleGetInfoString' : '''libprs500, an E-book management application.''' ''' Visit http://libprs500.kovidgoyal.net for details.''', diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index a9d3cf7ef6..a2a14d2575 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -30,6 +30,15 @@ iswindows = 'win32' in sys.platform.lower() isosx = 'darwin' in sys.platform.lower() islinux = not(iswindows or isosx) +def osx_version(): + if isosx: + import platform + src = platform.mac_ver()[0] + m = re.match(r'(\d+)\.(\d+)\.(\d+)', src) + if m: + return int(m.group(1)), int(m.group(2)), int(m.group(3)) + + # Default translation is NOOP import __builtin__ __builtin__.__dict__['_'] = lambda s: s diff --git a/src/libprs500/ebooks/lrf/rtf/convert_from.py b/src/libprs500/ebooks/lrf/rtf/convert_from.py index b54e107a91..8bbd40f8e6 100644 --- a/src/libprs500/ebooks/lrf/rtf/convert_from.py +++ b/src/libprs500/ebooks/lrf/rtf/convert_from.py @@ -20,16 +20,20 @@ from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_ from libprs500.ebooks import ConversionError from libprs500 import isosx, setup_cli_handlers, __appname__ from libprs500.libwand import convert, WandException +from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup +from libprs500.ebooks.lrf.rtf.xsl import xhtml UNRTF = 'unrtf' if isosx and hasattr(sys, 'frameworks_dir'): UNRTF = os.path.join(getattr(sys, 'frameworks_dir'), UNRTF) def option_parser(): - return lrf_option_parser( + parser = lrf_option_parser( '''Usage: %prog [options] mybook.rtf\n\n''' '''%prog converts mybook.rtf to mybook.lrf''' ) + parser.add_option('--keep-intermediate-files', action='store_true', default=False) + return parser def convert_images(html, logger): wmfs = glob.glob('*.wmf') + glob.glob('*.WMF') @@ -72,14 +76,15 @@ def generate_html(rtfpath, logger): def process_file(path, options, logger=None): if logger is None: level = logging.DEBUG if options.verbose else logging.INFO - logger = logging.getLogger('pdf2lrf') + logger = logging.getLogger('rtf2lrf') setup_cli_handlers(logger, level) rtf = os.path.abspath(os.path.expanduser(path)) f = open(rtf, 'rb') mi = get_metadata(f, 'rtf') f.close() - html = generate_html(rtf, logger) + html = generate_html2(rtf, logger) tdir = os.path.dirname(html) + cwd = os.getcwdu() try: if not options.output: ext = '.lrs' if options.lrs else '.lrf' @@ -95,9 +100,14 @@ def process_file(path, options, logger=None): options.category = mi.category if (not options.freetext or options.freetext == 'Unknown') and mi.comments: options.freetext = mi.comments + os.chdir(tdir) html_process_file(html, options, logger) finally: - shutil.rmtree(tdir) + os.chdir(cwd) + if options.keep_intermediate_files: + logger.debug('Intermediate files in '+ tdir) + else: + shutil.rmtree(tdir) def main(args=sys.argv, logger=None): parser = option_parser() @@ -110,9 +120,89 @@ def main(args=sys.argv, logger=None): process_file(args[1], options, logger) return 0 + +def generate_xml(rtfpath): + from rtf2xml.ParseRtf import ParseRtf + tdir = tempfile.mkdtemp(prefix=__appname__+'_') + ofile = os.path.join(tdir, 'index.xml') + cwd = os.getcwdu() + os.chdir(tdir) + try: + parser = ParseRtf( + in_file = rtfpath, + out_file = ofile, + # Convert symbol fonts to unicode equivelents. Default + # is 1 + convert_symbol = 1, + # Convert Zapf fonts to unicode equivelents. Default + # is 1. + convert_zapf = 1, + + # Convert Wingding fonts to unicode equivelents. + # Default is 1. + convert_wingdings = 1, + + # Convert RTF caps to real caps. + # Default is 1. + convert_caps = 1, + + # Indent resulting XML. + # Default is 0 (no indent). + indent = 1, + + # Form lists from RTF. Default is 1. + form_lists = 1, + + # Convert headings to sections. Default is 0. + headings_to_sections = 1, + + # Group paragraphs with the same style name. Default is 1. + group_styles = 1, + + # Group borders. Default is 1. + group_borders = 1, + + # Write or do not write paragraphs. Default is 0. + empty_paragraphs = 0, + ) + parser.parse_rtf() + finally: + os.chdir(cwd) + return ofile + + +def generate_html2(rtfpath, logger): + from lxml import etree + logger.info('Converting RTF to XML...') + xml = generate_xml(rtfpath) + tdir = os.path.dirname(xml) + cwd = os.getcwdu() + os.chdir(tdir) + try: + logger.info('Parsing XML...') + parser = etree.XMLParser(recover=True, no_network=True) + try: + doc = etree.parse(xml, parser) + except: + raise + logger.info('Parsing failed. Trying to clean up XML...') + soup = BeautifulStoneSoup(open(xml, 'rb').read()) + doc = etree.fromstring(str(soup)) + logger.info('Converting XML to HTML...') + styledoc = etree.fromstring(xhtml) + + transform = etree.XSLT(styledoc) + result = transform(doc) + tdir = os.path.dirname(xml) + html = os.path.join(tdir, 'index.html') + f = open(html, 'wb') + f.write(transform.tostring(result)) + f.close() + finally: + os.chdir(cwd) + return html if __name__ == '__main__': - sys.exit(main()) - + sys.exit(main()) \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/rtf/xsl.py b/src/libprs500/ebooks/lrf/rtf/xsl.py new file mode 100644 index 0000000000..be76bd2d42 --- /dev/null +++ b/src/libprs500/ebooks/lrf/rtf/xsl.py @@ -0,0 +1,538 @@ +######################################################################### +# # +# # +# copyright 2002 Paul Henry Tremblay # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +# General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program; if not, write to the Free Software # +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # +# 02111-1307 USA # +# # +# # +######################################################################### + +xhtml = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generator + + + http://rtf2xml.sourceforge.net/ + + + + + + + + + + + + + + + + + + + + unamed + + + + + + + + + + + + + + + + + + + + + + + - + + - + + + + + + + + + + + + + + + + + unnamed + + + + + + + + div. + + { + + } + + + span.italic{font-style:italic} + span.no-italic{font-style:normal} + span.bold{font-weight:bold} + span.no-bold{font-weight:normal} + span.underline{text-decoration:underline} + span.no-underline{text-decoration:none} + span.italic-bold{font-style:italic;font-weight:bold} + span.italic-underline{font-style:italic;text-decoration:underline} + span.bold-underline{font-weight:bold;text-decoration:underline} + + + + + + + + position:relative; + + padding-top: + + pt; + + + padding-bottom: + + pt; + + + padding-left: + + pt; + + + padding-right: + + pt; + + + text-indent: + + pt; + + + font-weight: + + ; + + + font-style: + + ; + + + text-decoration:underline + ; + + + line-height: + + pt; + + + + + + + + + + + + + + span. + + { + + font-style:italic; + + + font-style:normal; + + + font-weight:bold; + + + font-weight:normal; + + + text-decoration:underline; + + + text-decoration:none; + + + text-decoration:line-through; + + + text-decoration:none; + + + font-size: + + pt; + + } + + + + + + + + + [ + + ] + + + + + + + italic + + + no-italic + + + bold + + + bold + + + no-bold + + + + + underline + + + no-underline + + + + + italic-bold + + + italic-underline + + + bold-underline + + + + + + + + + + + + + + + + + + + footnote + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + true + false + + + + + + + + + + + + + + + + + + + + page-break-after:always + + + + + + + + + +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + no match for element: " + + " + + + + +
+''' \ No newline at end of file diff --git a/src/libprs500/gui2/dialogs/lrf_single.py b/src/libprs500/gui2/dialogs/lrf_single.py index f88048993d..11bee76307 100644 --- a/src/libprs500/gui2/dialogs/lrf_single.py +++ b/src/libprs500/gui2/dialogs/lrf_single.py @@ -25,7 +25,7 @@ from libprs500.gui2 import qstring_to_unicode, error_dialog, \ from libprs500.gui2.widgets import FontFamilyModel from libprs500.ebooks.lrf import option_parser from libprs500.ptempfile import PersistentTemporaryFile -from libprs500 import __appname__ +from libprs500 import __appname__, osx_version font_family_model = None @@ -101,6 +101,16 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): if self.selected_format: self.setWindowTitle(_('Convert %s to LRF')%(self.selected_format,)) + if self.selected_format == 'RTF': + try: + major, minor = osx_version()[:2] + if False and (major == 10 and minor > 4) or major > 10: + self.selected_format = None + d = error_dialog(self, _('RTF conversion not supported'), + _('Conversion of RTF files is not supported on OS X Leopard and higher. This is because unrtf, the underlying program does not work. If you are willing to port unrtf to Leopard, contact me.')) + d.exec_() + except: + pass else: self.setWindowTitle(_('Set conversion defaults')) diff --git a/src/libprs500/trac/download/download.py b/src/libprs500/trac/download/download.py index 74497be040..75cac83ff3 100644 --- a/src/libprs500/trac/download/download.py +++ b/src/libprs500/trac/download/download.py @@ -92,7 +92,7 @@ class Distribution(object): self.command = cmd.strip() if os == 'debian': self.command += '\n'+prefix + 'cp -R /usr/share/pycentral/fonttools/site-packages/FontTools* /usr/lib/python2.5/site-packages/' - self.command += '\n'+prefix+'easy_install -U TTFQuery libprs500 \nlibprs500_postinstall' + self.command += '\n'+prefix+'easy_install -U TTFQuery lxml libprs500 \n'+prefix+'easy_install -f http://sourceforge.net/project/showfiles.php?group_id=68617 rtf2xml\n'+prefix+'libprs500_postinstall' try: self.manual = Markup(self.MANUAL_MAP[os]) except KeyError: diff --git a/windows_installer.py b/windows_installer.py index 5dcff72413..2b23806138 100644 --- a/windows_installer.py +++ b/windows_installer.py @@ -457,7 +457,8 @@ setup( 'dist_dir' : PY2EXE_DIR, 'includes' : ['sip', 'pkg_resources', 'PyQt4.QtSvg', 'mechanize', 'ClientForm', 'wmi', - 'win32file', 'pythoncom'], + 'win32file', 'pythoncom', 'rtf2xml', + 'lxml', 'lxml._elementpath'], 'packages' : ['PIL'], 'excludes' : ["Tkconstants", "Tkinter", "tcl", "_imagingtk", "ImageTk", "FixTk",