Add option to render tables as images (avaialble in the Page options section). This should be helpful for HTML files with complex tables.

This commit is contained in:
Kovid Goyal 2008-06-18 09:44:49 -07:00
parent ecb18a05ea
commit 474f2cadbb
9 changed files with 273 additions and 63 deletions

View File

@ -51,6 +51,7 @@ def _check_symlinks_prescript():
import os
scripts = %(sp)s
links = %(sp)s
fonts_conf = %(sp)s
os.setuid(0)
for s, l in zip(scripts, links):
if os.path.lexists(l):
@ -59,6 +60,11 @@ for s, l in zip(scripts, links):
omask = os.umask(022)
os.symlink(s, l)
os.umask(omask)
if not os.path.exists('/etc/fonts/fonts.conf'):
print 'Creating default fonts.conf'
if not os.path.exists('/etc/fonts'):
os.makedirs('/etc/fonts')
os.link(fonts_conf, '/etc/fonts/fonts.conf')
"""
dest_path = %(dest_path)s
@ -66,6 +72,7 @@ for s, l in zip(scripts, links):
scripts = %(scripts)s
links = [os.path.join(dest_path, i) for i in scripts]
scripts = [os.path.join(resources_path, 'loaders', i) for i in scripts]
fonts_conf = os.path.join(resources_path, 'fonts.conf')
bad = False
for s, l in zip(scripts, links):
@ -76,7 +83,7 @@ for s, l in zip(scripts, links):
if bad:
auth = Authorization(destroyflags=(kAuthorizationFlagDestroyRights,))
fd, name = tempfile.mkstemp('.py')
os.write(fd, AUTHTOOL %(pp)s (sys.executable, repr(scripts), repr(links)))
os.write(fd, AUTHTOOL %(pp)s (sys.executable, repr(scripts), repr(links), repr(fonts_conf)))
os.close(fd)
os.chmod(name, 0700)
try:
@ -276,10 +283,12 @@ sys.frameworks_dir = os.path.join(os.path.dirname(os.environ['RESOURCEPATH']), '
f.write('src/calibre/gui2/main.py', 'calibre/gui2/main.py')
f.close()
print
print 'Adding default fonts.conf'
open(os.path.join(self.dist_dir, APPNAME+'.app', 'Contents', 'Resources', 'fonts.conf'), 'wb').write(open('/etc/fonts/fonts.conf').read())
print
print 'Building disk image'
BuildAPP.makedmg(os.path.join(self.dist_dir, APPNAME+'.app'), APPNAME+'-'+VERSION)
def main():
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
sys.argv[1:2] = ['py2app']

View File

@ -120,7 +120,6 @@ def option_parser(usage, gui_mode=False):
dest='font_delta')
laf.add_option('--ignore-colors', action='store_true', default=False, dest='ignore_colors',
help=_('Render all content as black on white instead of the colors specified by the HTML or CSS.'))
page = parser.add_option_group('PAGE OPTIONS')
profiles = profile_map.keys()
@ -139,6 +138,11 @@ def option_parser(usage, gui_mode=False):
help=_('''Top margin of page. Default is %default px.'''))
page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int',
help=_('''Bottom margin of page. Default is %default px.'''))
page.add_option('--render-tables-as-images', default=False, action='store_true',
help=_('Render tables in the HTML as images (useful if the document has large or complex tables)'))
page.add_option('--text-size-multiplier-for-rendered-tables', type='float', default=1.0,
help=_('Multiply the size of text in rendered tables by this factor. Default is %default'))
link = parser.add_option_group('LINK PROCESSING OPTIONS')
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
dest='link_levels',

View File

@ -383,7 +383,8 @@ class HTMLConverter(object, LoggingInterface):
self.target_prefix = path
self.previous_text = '\n'
self.tops[path] = self.parse_file(soup)
self.processed_files.append(path)
self.processed_files.append(path)
def parse_css(self, style):
@ -494,7 +495,9 @@ class HTMLConverter(object, LoggingInterface):
top = self.current_block
self.current_block.must_append = True
self.soup = soup
self.process_children(soup, {}, {})
self.soup = None
if self.current_para and self.current_block:
self.current_para.append_to(self.current_block)
@ -1680,18 +1683,44 @@ class HTMLConverter(object, LoggingInterface):
self.previous_text = ' '
self.process_children(tag, tag_css, tag_pseudo_css)
elif tagname == 'table' and not self.ignore_tables and not self.in_table:
tag_css = self.tag_css(tag)[0] # Table should not inherit CSS
try:
self.process_table(tag, tag_css)
except Exception, err:
self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.log_debug('', exc_info=True)
self.log_debug(_('Bad table:\n%s'), str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
if self.minimize_memory_usage:
tag.extract()
if self.render_tables_as_images:
print 'Rendering table...'
from calibre.ebooks.lrf.html.table_as_image import render_table
pheight = int(self.current_page.pageStyle.attrs['textheight'])
pwidth = int(self.current_page.pageStyle.attrs['textwidth'])
images = render_table(self.soup, tag, tag_css,
os.path.dirname(self.target_prefix),
pwidth, pheight, self.profile.dpi,
self.text_size_multiplier_for_rendered_tables)
for path, width, height in images:
stream = ImageStream(path, encoding='PNG')
im = Image(stream, x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
pb = self.current_block
self.end_current_para()
self.process_alignment(tag_css)
self.current_para.append(Plot(im, xsize=width*720./self.profile.dpi,
ysize=height*720./self.profile.dpi))
self.current_block.append(self.current_para)
self.current_page.append(self.current_block)
self.current_block = self.book.create_text_block(
textStyle=pb.textStyle,
blockStyle=pb.blockStyle)
self.current_para = Paragraph()
else:
tag_css = self.tag_css(tag)[0] # Table should not inherit CSS
try:
self.process_table(tag, tag_css)
except Exception, err:
self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.log_debug('', exc_info=True)
self.log_debug(_('Bad table:\n%s'), str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
if self.minimize_memory_usage:
tag.extract()
else:
self.process_children(tag, tag_css, tag_pseudo_css)
finally:

View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Render HTML tables as images.
'''
import os, tempfile, atexit, shutil
from PyQt4.Qt import QWebPage, QUrl, QApplication, QSize, \
SIGNAL, QPainter, QImage, QObject, Qt
__app = None
class HTMLTableRenderer(QObject):
def __init__(self, html, base_dir, width, height, dpi, factor):
'''
`width, height`: page width and height in pixels
`base_dir`: The directory in which the HTML file that contains the table resides
'''
QObject.__init__(self)
self.app = None
self.width, self.height, self.dpi = width, height, dpi
self.base_dir = base_dir
self.page = QWebPage()
self.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
self.page.mainFrame().setTextSizeMultiplier(factor)
self.page.mainFrame().setHtml(html,
QUrl('file:'+os.path.abspath(self.base_dir)))
self.images = []
self.tdir = tempfile.mkdtemp(prefix='calibre_render_table')
def render_html(self, ok):
try:
if not ok:
return
cwidth, cheight = self.page.mainFrame().contentsSize().width(), self.page.mainFrame().contentsSize().height()
self.page.setViewportSize(QSize(cwidth, cheight))
factor = float(self.width)/cwidth if cwidth > self.width else 1
cutoff_height = int(self.height/factor)-3
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(self.dpi*(100/2.54))
image.setDotsPerMeterX(self.dpi*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
pos = 0
while pos < cheight:
img = image.copy(0, pos, cwidth, cutoff_height)
pos += cutoff_height-20
if cwidth > self.width:
img = img.scaledToWidth(self.width, Qt.SmoothTransform)
f = os.path.join(self.tdir, '%d.png'%pos)
img.save(f)
self.images.append((f, img.width(), img.height()))
finally:
QApplication.quit()
def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
head = ''
for e in soup.findAll(['link', 'style']):
head += unicode(e)+'\n\n'
style = ''
for key, val in css.items():
style += key + ':%s;'%val
html = u'''\
<html>
<head>
%s
</head>
<body style="width: %dpx">
<style type="text/css">
table {%s}
</style>
%s
</body>
</html>
'''%(head, width-10, style, unicode(table))
from calibre.parallel import Server
s = Server()
result, exception, traceback, log = s.run(1, 'render_table', qapp=True, report_progress=False,
args=[html, base_dir, width, height, dpi, factor])
if exception:
print 'Failed to render table'
print traceback
print log
images, tdir = result
atexit.register(shutil.rmtree, tdir)
return images
def do_render(html, base_dir, width, height, dpi, factor):
app = QApplication.instance()
if app is None:
app = QApplication([])
tr = HTMLTableRenderer(html, base_dir, width, height, dpi, factor)
app.exec_()
return tr.images, tr.tdir

View File

@ -9,14 +9,15 @@
<x>0</x>
<y>0</y>
<width>830</width>
<height>700</height>
<height>642</height>
</rect>
</property>
<property name="windowTitle" >
<string>Fetch metadata</string>
</property>
<property name="windowIcon" >
<iconset resource="../images.qrc" >:/images/metadata.svg</iconset>
<iconset resource="../images.qrc" >
<normaloff>:/images/metadata.svg</normaloff>:/images/metadata.svg</iconset>
</property>
<layout class="QVBoxLayout" >
<item>
@ -107,7 +108,7 @@
<item>
<widget class="QDialogButtonBox" name="buttonBox" >
<property name="standardButtons" >
<set>QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok</set>
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>

View File

@ -382,6 +382,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.cmdline = [unicode(i) for i in cmdline]
else:
Settings().set('LRF conversion defaults', cmdline)
print self.cmdline
QDialog.accept(self)
class LRFBulkDialog(LRFSingleDialog):

View File

@ -115,7 +115,7 @@
<item row="0" column="0" >
<widget class="QStackedWidget" name="stack" >
<property name="currentIndex" >
<number>0</number>
<number>2</number>
</property>
<widget class="QWidget" name="metadata_page" >
<property name="geometry" >
@ -818,6 +818,39 @@
</property>
</widget>
</item>
<item row="5" column="0" >
<widget class="QCheckBox" name="gui_render_tables_as_images" >
<property name="text" >
<string>&amp;Convert tables to images (good for large/complex tables)</string>
</property>
</widget>
</item>
<item row="6" column="0" >
<widget class="QLabel" name="label_27" >
<property name="text" >
<string>&amp;Multiplier for text size in rendered tables:</string>
</property>
<property name="buddy" >
<cstring>gui_text_size_multiplier_for_rendered_tables</cstring>
</property>
</widget>
</item>
<item row="6" column="1" >
<widget class="QDoubleSpinBox" name="gui_text_size_multiplier_for_rendered_tables" >
<property name="enabled" >
<bool>false</bool>
</property>
<property name="decimals" >
<number>2</number>
</property>
<property name="minimum" >
<double>0.100000000000000</double>
</property>
<property name="value" >
<double>1.000000000000000</double>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="chapterdetection_page" >
@ -1048,8 +1081,8 @@ p, li { white-space: pre-wrap; }
<slot>setCurrentIndex(int)</slot>
<hints>
<hint type="sourcelabel" >
<x>191</x>
<y>236</y>
<x>184</x>
<y>279</y>
</hint>
<hint type="destinationlabel" >
<x>368</x>
@ -1064,8 +1097,8 @@ p, li { white-space: pre-wrap; }
<slot>setDisabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>428</x>
<y>89</y>
<x>650</x>
<y>122</y>
</hint>
<hint type="destinationlabel" >
<x>788</x>
@ -1073,22 +1106,6 @@ p, li { white-space: pre-wrap; }
</hint>
</hints>
</connection>
<connection>
<sender>gui_header</sender>
<signal>toggled(bool)</signal>
<receiver>gui_headerformat</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>348</x>
<y>340</y>
</hint>
<hint type="destinationlabel" >
<x>823</x>
<y>372</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_disable_chapter_detection</sender>
<signal>toggled(bool)</signal>
@ -1096,12 +1113,44 @@ p, li { white-space: pre-wrap; }
<slot>setDisabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>321</x>
<y>78</y>
<x>543</x>
<y>122</y>
</hint>
<hint type="destinationlabel" >
<x>322</x>
<y>172</y>
<x>544</x>
<y>211</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_render_tables_as_images</sender>
<signal>toggled(bool)</signal>
<receiver>gui_text_size_multiplier_for_rendered_tables</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>298</x>
<y>398</y>
</hint>
<hint type="destinationlabel" >
<x>660</x>
<y>435</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_header</sender>
<signal>toggled(bool)</signal>
<receiver>gui_headerformat</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>330</x>
<y>367</y>
</hint>
<hint type="destinationlabel" >
<x>823</x>
<y>372</y>
</hint>
</hints>
</connection>

View File

@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Used to run jobs in parallel in separate processes.
'''
import re, sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess
import sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess
from functools import partial
@ -11,6 +11,7 @@ from calibre.ebooks.lrf.any.convert_from import main as any2lrf
from calibre.ebooks.lrf.web.convert_from import main as web2lrf
from calibre.ebooks.lrf.feeds.convert_from import main as feeds2lrf
from calibre.gui2.lrf_renderer.main import main as lrfviewer
from calibre.ebooks.lrf.html.table_as_image import do_render as render_table
from calibre import iswindows, __appname__, islinux
try:
from calibre.utils.single_qt_application import SingleApplication
@ -31,6 +32,7 @@ PARALLEL_FUNCS = {
'web2lrf' : web2lrf,
'lrfviewer' : lrfviewer,
'feeds2lrf' : partial(feeds2lrf, notification=report_progress),
'render_table': render_table,
}
python = sys.executable
@ -88,7 +90,8 @@ class Server(object):
def run(self, job_id, func, args=[], kwdargs={}, monitor=True):
def run(self, job_id, func, args=[], kwdargs={}, monitor=True,
report_progress=True, qapp=True):
'''
Run a job in a separate process.
@param job_id: A unique (per server) identifier
@ -96,6 +99,8 @@ class Server(object):
@param args: A list of arguments to pass of C{func}
@param kwdargs: A dictionary of keyword arguments to pass to C{func}
@param monitor: If False launch the child process and return. Do not monitor/communicate with it.
@param report_progess: If True progress is reported to the GUI
@param qapp: If True, A QApplication is created. If False, progress reporting will also be disabled.
@return: (result, exception, formatted_traceback, log) where log is the combined
stdout + stderr of the child process; or None if monitor is True. If a job is killed
by a call to L{kill()} then result will be L{KILL_RESULT}
@ -107,14 +112,15 @@ class Server(object):
os.mkdir(job_dir)
job_data = os.path.join(job_dir, 'job_data.pickle')
cPickle.dump((job_id, func, args, kwdargs), open(job_data, 'wb'), -1)
cPickle.dump((job_id, func, args, kwdargs, report_progress, qapp),
open(job_data, 'wb'), -1)
prefix = ''
if hasattr(sys, 'frameworks_dir'):
fd = getattr(sys, 'frameworks_dir')
prefix = 'import sys; sys.frameworks_dir = "%s"; sys.frozen = "macosx_app"; '%fd
if fd not in os.environ['PATH']:
os.environ['PATH'] += ':'+fd
cmd = prefix + 'from calibre.parallel import run_job; run_job(\'%s\')'%binascii.hexlify(job_data)
cmd = prefix + 'from calibre.parallel import main; main(\'%s\')'%binascii.hexlify(job_data)
if not monitor:
popen([python, '-c', cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE,
@ -145,14 +151,12 @@ class Server(object):
return result, exception, traceback, log
def run_job(job_data):
global sa, job_id
if SingleApplication is not None:
sa = SingleApplication('calibre GUI')
job_data = binascii.unhexlify(job_data)
base = os.path.dirname(job_data)
def run_job(base, id, func, args, kwdargs):
global job_id
job_id = id
job_result = os.path.join(base, 'job_result.pickle')
job_id, func, args, kwdargs = cPickle.load(open(job_data, 'rb'))
func = PARALLEL_FUNCS[func]
exception, tb = None, None
try:
@ -165,14 +169,22 @@ def run_job(job_data):
if os.path.exists(os.path.dirname(job_result)):
cPickle.dump((result, exception, tb), open(job_result, 'wb'))
def main():
src = sys.argv[2]
job_data = re.search(r'run_job\(\'([a-f0-9A-F]+)\'\)', src).group(1)
run_job(job_data)
def main(src):
from PyQt4.QtGui import QApplication
job_data = binascii.unhexlify(src)
global sa
job_id, func, args, kwdargs, rp, qapp = cPickle.load(open(job_data, 'rb'))
if qapp and QApplication.instance() is None:
QApplication([])
if SingleApplication is not None and rp and QApplication.instance() is not None:
sa = SingleApplication('calibre GUI')
run_job(os.path.dirname(job_data), job_id, func, args, kwdargs)
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main(sys.argv[2]))

View File

@ -514,6 +514,12 @@ class BuildEXE(build_exe):
f.write('src\\calibre\\gui2\\main.py', 'calibre\\gui2\\main.py')
f.close()
print
print 'Doing DLL redirection' # See http://msdn.microsoft.com/en-us/library/ms682600(VS.85).aspx
for f in glob.glob(os.path.join('build', 'py2exe', '*.exe')):
open(f + '.local', 'wb').write('\n')
print
print
print 'Building Installer'
@ -558,12 +564,12 @@ def main():
'win32file', 'pythoncom', 'rtf2xml',
'lxml', 'lxml._elementpath', 'genshi',
'path', 'pydoc', 'IPython.Extensions.*',
'calibre.web.feeds.recipes.*', 'pydoc',
'calibre.web.feeds.recipes.*',
],
'packages' : ['PIL'],
'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk",
'pydoc'],
"_imagingtk", "ImageTk", "FixTk"
],
'dll_excludes' : ['mswsock.dll'],
},
},