From 474f2cadbb385b134d3018a3f79ba8261660f9c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 Jun 2008 09:44:49 -0700 Subject: [PATCH] Add option to render tables as images (avaialble in the Page options section). This should be helpful for HTML files with complex tables. --- osx_installer.py | 13 ++- src/calibre/ebooks/lrf/__init__.py | 6 +- src/calibre/ebooks/lrf/html/convert_from.py | 55 ++++++++--- src/calibre/ebooks/lrf/html/table_as_image.py | 99 +++++++++++++++++++ src/calibre/gui2/dialogs/fetch_metadata.ui | 7 +- src/calibre/gui2/dialogs/lrf_single.py | 1 + src/calibre/gui2/dialogs/lrf_single.ui | 99 ++++++++++++++----- src/calibre/parallel.py | 44 ++++++--- windows_installer.py | 12 ++- 9 files changed, 273 insertions(+), 63 deletions(-) create mode 100644 src/calibre/ebooks/lrf/html/table_as_image.py diff --git a/osx_installer.py b/osx_installer.py index cccb46ad93..6fb8b3a4e8 100644 --- a/osx_installer.py +++ b/osx_installer.py @@ -51,6 +51,7 @@ def _check_symlinks_prescript(): import os scripts = %(sp)s links = %(sp)s +fonts_conf = %(sp)s os.setuid(0) for s, l in zip(scripts, links): if os.path.lexists(l): @@ -59,6 +60,11 @@ for s, l in zip(scripts, links): omask = os.umask(022) os.symlink(s, l) os.umask(omask) +if not os.path.exists('/etc/fonts/fonts.conf'): + print 'Creating default fonts.conf' + if not os.path.exists('/etc/fonts'): + os.makedirs('/etc/fonts') + os.link(fonts_conf, '/etc/fonts/fonts.conf') """ dest_path = %(dest_path)s @@ -66,6 +72,7 @@ for s, l in zip(scripts, links): scripts = %(scripts)s links = [os.path.join(dest_path, i) for i in scripts] scripts = [os.path.join(resources_path, 'loaders', i) for i in scripts] + fonts_conf = os.path.join(resources_path, 'fonts.conf') bad = False for s, l in zip(scripts, links): @@ -76,7 +83,7 @@ for s, l in zip(scripts, links): if bad: auth = Authorization(destroyflags=(kAuthorizationFlagDestroyRights,)) fd, name = tempfile.mkstemp('.py') - os.write(fd, AUTHTOOL %(pp)s (sys.executable, repr(scripts), repr(links))) + os.write(fd, AUTHTOOL %(pp)s (sys.executable, repr(scripts), repr(links), repr(fonts_conf))) os.close(fd) os.chmod(name, 0700) try: @@ -276,10 +283,12 @@ sys.frameworks_dir = os.path.join(os.path.dirname(os.environ['RESOURCEPATH']), ' f.write('src/calibre/gui2/main.py', 'calibre/gui2/main.py') f.close() print + print 'Adding default fonts.conf' + open(os.path.join(self.dist_dir, APPNAME+'.app', 'Contents', 'Resources', 'fonts.conf'), 'wb').write(open('/etc/fonts/fonts.conf').read()) + print print 'Building disk image' BuildAPP.makedmg(os.path.join(self.dist_dir, APPNAME+'.app'), APPNAME+'-'+VERSION) - def main(): sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) sys.argv[1:2] = ['py2app'] diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py index 02882312a8..af93bca6cb 100644 --- a/src/calibre/ebooks/lrf/__init__.py +++ b/src/calibre/ebooks/lrf/__init__.py @@ -120,7 +120,6 @@ def option_parser(usage, gui_mode=False): dest='font_delta') laf.add_option('--ignore-colors', action='store_true', default=False, dest='ignore_colors', help=_('Render all content as black on white instead of the colors specified by the HTML or CSS.')) - page = parser.add_option_group('PAGE OPTIONS') profiles = profile_map.keys() @@ -139,6 +138,11 @@ def option_parser(usage, gui_mode=False): help=_('''Top margin of page. Default is %default px.''')) page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int', help=_('''Bottom margin of page. Default is %default px.''')) + page.add_option('--render-tables-as-images', default=False, action='store_true', + help=_('Render tables in the HTML as images (useful if the document has large or complex tables)')) + page.add_option('--text-size-multiplier-for-rendered-tables', type='float', default=1.0, + help=_('Multiply the size of text in rendered tables by this factor. Default is %default')) + link = parser.add_option_group('LINK PROCESSING OPTIONS') link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ dest='link_levels', diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 10a7137f34..6dc268e9eb 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -383,7 +383,8 @@ class HTMLConverter(object, LoggingInterface): self.target_prefix = path self.previous_text = '\n' self.tops[path] = self.parse_file(soup) - self.processed_files.append(path) + self.processed_files.append(path) + def parse_css(self, style): @@ -494,7 +495,9 @@ class HTMLConverter(object, LoggingInterface): top = self.current_block self.current_block.must_append = True + self.soup = soup self.process_children(soup, {}, {}) + self.soup = None if self.current_para and self.current_block: self.current_para.append_to(self.current_block) @@ -1680,18 +1683,44 @@ class HTMLConverter(object, LoggingInterface): self.previous_text = ' ' self.process_children(tag, tag_css, tag_pseudo_css) elif tagname == 'table' and not self.ignore_tables and not self.in_table: - tag_css = self.tag_css(tag)[0] # Table should not inherit CSS - try: - self.process_table(tag, tag_css) - except Exception, err: - self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err)) - self.log_debug('', exc_info=True) - self.log_debug(_('Bad table:\n%s'), str(tag)[:300]) - self.in_table = False - self.process_children(tag, tag_css, tag_pseudo_css) - finally: - if self.minimize_memory_usage: - tag.extract() + if self.render_tables_as_images: + print 'Rendering table...' + from calibre.ebooks.lrf.html.table_as_image import render_table + pheight = int(self.current_page.pageStyle.attrs['textheight']) + pwidth = int(self.current_page.pageStyle.attrs['textwidth']) + images = render_table(self.soup, tag, tag_css, + os.path.dirname(self.target_prefix), + pwidth, pheight, self.profile.dpi, + self.text_size_multiplier_for_rendered_tables) + for path, width, height in images: + stream = ImageStream(path, encoding='PNG') + im = Image(stream, x0=0, y0=0, x1=width, y1=height,\ + xsize=width, ysize=height) + pb = self.current_block + self.end_current_para() + self.process_alignment(tag_css) + self.current_para.append(Plot(im, xsize=width*720./self.profile.dpi, + ysize=height*720./self.profile.dpi)) + self.current_block.append(self.current_para) + self.current_page.append(self.current_block) + self.current_block = self.book.create_text_block( + textStyle=pb.textStyle, + blockStyle=pb.blockStyle) + self.current_para = Paragraph() + + else: + tag_css = self.tag_css(tag)[0] # Table should not inherit CSS + try: + self.process_table(tag, tag_css) + except Exception, err: + self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err)) + self.log_debug('', exc_info=True) + self.log_debug(_('Bad table:\n%s'), str(tag)[:300]) + self.in_table = False + self.process_children(tag, tag_css, tag_pseudo_css) + finally: + if self.minimize_memory_usage: + tag.extract() else: self.process_children(tag, tag_css, tag_pseudo_css) finally: diff --git a/src/calibre/ebooks/lrf/html/table_as_image.py b/src/calibre/ebooks/lrf/html/table_as_image.py new file mode 100644 index 0000000000..501a049832 --- /dev/null +++ b/src/calibre/ebooks/lrf/html/table_as_image.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +Render HTML tables as images. +''' +import os, tempfile, atexit, shutil +from PyQt4.Qt import QWebPage, QUrl, QApplication, QSize, \ + SIGNAL, QPainter, QImage, QObject, Qt + +__app = None + +class HTMLTableRenderer(QObject): + + def __init__(self, html, base_dir, width, height, dpi, factor): + ''' + `width, height`: page width and height in pixels + `base_dir`: The directory in which the HTML file that contains the table resides + ''' + QObject.__init__(self) + + self.app = None + self.width, self.height, self.dpi = width, height, dpi + self.base_dir = base_dir + self.page = QWebPage() + self.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html) + self.page.mainFrame().setTextSizeMultiplier(factor) + self.page.mainFrame().setHtml(html, + QUrl('file:'+os.path.abspath(self.base_dir))) + self.images = [] + self.tdir = tempfile.mkdtemp(prefix='calibre_render_table') + + def render_html(self, ok): + try: + if not ok: + return + cwidth, cheight = self.page.mainFrame().contentsSize().width(), self.page.mainFrame().contentsSize().height() + self.page.setViewportSize(QSize(cwidth, cheight)) + factor = float(self.width)/cwidth if cwidth > self.width else 1 + cutoff_height = int(self.height/factor)-3 + image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) + image.setDotsPerMeterX(self.dpi*(100/2.54)) + image.setDotsPerMeterX(self.dpi*(100/2.54)) + painter = QPainter(image) + self.page.mainFrame().render(painter) + painter.end() + pos = 0 + while pos < cheight: + img = image.copy(0, pos, cwidth, cutoff_height) + pos += cutoff_height-20 + if cwidth > self.width: + img = img.scaledToWidth(self.width, Qt.SmoothTransform) + f = os.path.join(self.tdir, '%d.png'%pos) + img.save(f) + self.images.append((f, img.width(), img.height())) + finally: + QApplication.quit() + +def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0): + head = '' + for e in soup.findAll(['link', 'style']): + head += unicode(e)+'\n\n' + style = '' + for key, val in css.items(): + style += key + ':%s;'%val + html = u'''\ + + + %s + + + + %s + + + '''%(head, width-10, style, unicode(table)) + from calibre.parallel import Server + s = Server() + result, exception, traceback, log = s.run(1, 'render_table', qapp=True, report_progress=False, + args=[html, base_dir, width, height, dpi, factor]) + if exception: + print 'Failed to render table' + print traceback + print log + images, tdir = result + atexit.register(shutil.rmtree, tdir) + return images + +def do_render(html, base_dir, width, height, dpi, factor): + app = QApplication.instance() + if app is None: + app = QApplication([]) + tr = HTMLTableRenderer(html, base_dir, width, height, dpi, factor) + app.exec_() + return tr.images, tr.tdir \ No newline at end of file diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui index e8b4252d8d..8e5747778f 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.ui +++ b/src/calibre/gui2/dialogs/fetch_metadata.ui @@ -9,14 +9,15 @@ 0 0 830 - 700 + 642 Fetch metadata - :/images/metadata.svg + + :/images/metadata.svg:/images/metadata.svg @@ -107,7 +108,7 @@ - QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok + QDialogButtonBox::Cancel|QDialogButtonBox::Ok diff --git a/src/calibre/gui2/dialogs/lrf_single.py b/src/calibre/gui2/dialogs/lrf_single.py index e5df43a673..bc5bdcf06b 100644 --- a/src/calibre/gui2/dialogs/lrf_single.py +++ b/src/calibre/gui2/dialogs/lrf_single.py @@ -382,6 +382,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): self.cmdline = [unicode(i) for i in cmdline] else: Settings().set('LRF conversion defaults', cmdline) + print self.cmdline QDialog.accept(self) class LRFBulkDialog(LRFSingleDialog): diff --git a/src/calibre/gui2/dialogs/lrf_single.ui b/src/calibre/gui2/dialogs/lrf_single.ui index 9fd3bee155..080970b96f 100644 --- a/src/calibre/gui2/dialogs/lrf_single.ui +++ b/src/calibre/gui2/dialogs/lrf_single.ui @@ -115,7 +115,7 @@ - 0 + 2 @@ -818,6 +818,39 @@ + + + + &Convert tables to images (good for large/complex tables) + + + + + + + &Multiplier for text size in rendered tables: + + + gui_text_size_multiplier_for_rendered_tables + + + + + + + false + + + 2 + + + 0.100000000000000 + + + 1.000000000000000 + + + @@ -1048,8 +1081,8 @@ p, li { white-space: pre-wrap; } setCurrentIndex(int) - 191 - 236 + 184 + 279 368 @@ -1064,8 +1097,8 @@ p, li { white-space: pre-wrap; } setDisabled(bool) - 428 - 89 + 650 + 122 788 @@ -1073,22 +1106,6 @@ p, li { white-space: pre-wrap; } - - gui_header - toggled(bool) - gui_headerformat - setEnabled(bool) - - - 348 - 340 - - - 823 - 372 - - - gui_disable_chapter_detection toggled(bool) @@ -1096,12 +1113,44 @@ p, li { white-space: pre-wrap; } setDisabled(bool) - 321 - 78 + 543 + 122 - 322 - 172 + 544 + 211 + + + + + gui_render_tables_as_images + toggled(bool) + gui_text_size_multiplier_for_rendered_tables + setEnabled(bool) + + + 298 + 398 + + + 660 + 435 + + + + + gui_header + toggled(bool) + gui_headerformat + setEnabled(bool) + + + 330 + 367 + + + 823 + 372 diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index 3d169f4822..a9490f4922 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal ' ''' Used to run jobs in parallel in separate processes. ''' -import re, sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess +import sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess from functools import partial @@ -11,6 +11,7 @@ from calibre.ebooks.lrf.any.convert_from import main as any2lrf from calibre.ebooks.lrf.web.convert_from import main as web2lrf from calibre.ebooks.lrf.feeds.convert_from import main as feeds2lrf from calibre.gui2.lrf_renderer.main import main as lrfviewer +from calibre.ebooks.lrf.html.table_as_image import do_render as render_table from calibre import iswindows, __appname__, islinux try: from calibre.utils.single_qt_application import SingleApplication @@ -31,6 +32,7 @@ PARALLEL_FUNCS = { 'web2lrf' : web2lrf, 'lrfviewer' : lrfviewer, 'feeds2lrf' : partial(feeds2lrf, notification=report_progress), + 'render_table': render_table, } python = sys.executable @@ -88,7 +90,8 @@ class Server(object): - def run(self, job_id, func, args=[], kwdargs={}, monitor=True): + def run(self, job_id, func, args=[], kwdargs={}, monitor=True, + report_progress=True, qapp=True): ''' Run a job in a separate process. @param job_id: A unique (per server) identifier @@ -96,6 +99,8 @@ class Server(object): @param args: A list of arguments to pass of C{func} @param kwdargs: A dictionary of keyword arguments to pass to C{func} @param monitor: If False launch the child process and return. Do not monitor/communicate with it. + @param report_progess: If True progress is reported to the GUI + @param qapp: If True, A QApplication is created. If False, progress reporting will also be disabled. @return: (result, exception, formatted_traceback, log) where log is the combined stdout + stderr of the child process; or None if monitor is True. If a job is killed by a call to L{kill()} then result will be L{KILL_RESULT} @@ -107,14 +112,15 @@ class Server(object): os.mkdir(job_dir) job_data = os.path.join(job_dir, 'job_data.pickle') - cPickle.dump((job_id, func, args, kwdargs), open(job_data, 'wb'), -1) + cPickle.dump((job_id, func, args, kwdargs, report_progress, qapp), + open(job_data, 'wb'), -1) prefix = '' if hasattr(sys, 'frameworks_dir'): fd = getattr(sys, 'frameworks_dir') prefix = 'import sys; sys.frameworks_dir = "%s"; sys.frozen = "macosx_app"; '%fd if fd not in os.environ['PATH']: os.environ['PATH'] += ':'+fd - cmd = prefix + 'from calibre.parallel import run_job; run_job(\'%s\')'%binascii.hexlify(job_data) + cmd = prefix + 'from calibre.parallel import main; main(\'%s\')'%binascii.hexlify(job_data) if not monitor: popen([python, '-c', cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, @@ -145,14 +151,12 @@ class Server(object): return result, exception, traceback, log -def run_job(job_data): - global sa, job_id - if SingleApplication is not None: - sa = SingleApplication('calibre GUI') - job_data = binascii.unhexlify(job_data) - base = os.path.dirname(job_data) +def run_job(base, id, func, args, kwdargs): + global job_id + job_id = id + job_result = os.path.join(base, 'job_result.pickle') - job_id, func, args, kwdargs = cPickle.load(open(job_data, 'rb')) + func = PARALLEL_FUNCS[func] exception, tb = None, None try: @@ -165,14 +169,22 @@ def run_job(job_data): if os.path.exists(os.path.dirname(job_result)): cPickle.dump((result, exception, tb), open(job_result, 'wb')) -def main(): - src = sys.argv[2] - job_data = re.search(r'run_job\(\'([a-f0-9A-F]+)\'\)', src).group(1) - run_job(job_data) +def main(src): + from PyQt4.QtGui import QApplication + job_data = binascii.unhexlify(src) + global sa + job_id, func, args, kwdargs, rp, qapp = cPickle.load(open(job_data, 'rb')) + + if qapp and QApplication.instance() is None: + QApplication([]) + if SingleApplication is not None and rp and QApplication.instance() is not None: + sa = SingleApplication('calibre GUI') + + run_job(os.path.dirname(job_data), job_id, func, args, kwdargs) return 0 if __name__ == '__main__': - sys.exit(main()) + sys.exit(main(sys.argv[2])) diff --git a/windows_installer.py b/windows_installer.py index c38ee3487b..06fa64d8ce 100644 --- a/windows_installer.py +++ b/windows_installer.py @@ -514,6 +514,12 @@ class BuildEXE(build_exe): f.write('src\\calibre\\gui2\\main.py', 'calibre\\gui2\\main.py') f.close() + print + print 'Doing DLL redirection' # See http://msdn.microsoft.com/en-us/library/ms682600(VS.85).aspx + for f in glob.glob(os.path.join('build', 'py2exe', '*.exe')): + open(f + '.local', 'wb').write('\n') + + print print print 'Building Installer' @@ -558,12 +564,12 @@ def main(): 'win32file', 'pythoncom', 'rtf2xml', 'lxml', 'lxml._elementpath', 'genshi', 'path', 'pydoc', 'IPython.Extensions.*', - 'calibre.web.feeds.recipes.*', 'pydoc', + 'calibre.web.feeds.recipes.*', ], 'packages' : ['PIL'], 'excludes' : ["Tkconstants", "Tkinter", "tcl", - "_imagingtk", "ImageTk", "FixTk", - 'pydoc'], + "_imagingtk", "ImageTk", "FixTk" + ], 'dll_excludes' : ['mswsock.dll'], }, },