mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added support for conversion for HTML tables.
Added support for common encodings to txt2lrf.
This commit is contained in:
parent
b26adb541e
commit
806aba6f80
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
|
||||
suit your distribution.
|
||||
"""
|
||||
|
||||
__version__ = "0.3.32"
|
||||
__version__ = "0.3.33"
|
||||
__docformat__ = "epytext"
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -352,7 +352,7 @@ class SetTime(Command):
|
||||
self.day = t[2]
|
||||
self.hour = t[3]
|
||||
self.minute = t[4]
|
||||
# Hack you should actually update the entire time tree is
|
||||
# Hack you should actually update the entire time tree if
|
||||
# second is > 59
|
||||
self.second = t[5] if t[5] < 60 else 59
|
||||
|
||||
|
@ -13,7 +13,11 @@
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
import pkg_resources
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
from PIL import ImageFont
|
||||
except ImportError:
|
||||
import ImageFont
|
||||
|
||||
'''
|
||||
Default fonts used in the PRS500
|
||||
'''
|
||||
@ -26,7 +30,8 @@ FONT_MAP = {
|
||||
def get_font(name, size, encoding='unic'):
|
||||
'''
|
||||
Get an ImageFont object by name.
|
||||
@param size: Size in pts
|
||||
@param size: Font height in pixels. To convert from pts:
|
||||
sz in pixels = (dpi/72) * size in pts
|
||||
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
||||
'''
|
||||
if name in FONT_MAP.keys():
|
||||
|
@ -39,6 +39,7 @@ from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream,
|
||||
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
|
||||
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
|
||||
from libprs500.ebooks.lrf.html.table import Table
|
||||
from libprs500 import extract, filename_to_utf8
|
||||
from libprs500.ptempfile import PersistentTemporaryFile
|
||||
|
||||
@ -303,6 +304,7 @@ class HTMLConverter(object):
|
||||
self.chapter_regex = chapter_regex #: Regex used to search for chapter titles
|
||||
self.link_exclude = link_exclude #: Ignore matching hrefs
|
||||
self.scaled_images = {} #: Temporary files with scaled version of images
|
||||
self.rotated_images = {} #: Temporary files with rotated version of images
|
||||
self.max_link_levels = max_link_levels #: Number of link levels to process recursively
|
||||
self.link_level = link_level #: Current link level
|
||||
self.blockquote_style = book.create_block_style(sidemargin=60,
|
||||
@ -317,6 +319,9 @@ class HTMLConverter(object):
|
||||
self.files = {} #: links that point to other files
|
||||
self.links_processed = False #: Whether links_processed has been called on this object
|
||||
self.font_delta = font_delta
|
||||
# Set by table processing code so that any <a name> within the table
|
||||
# point to the previous element
|
||||
self.anchor_to_previous = None
|
||||
self.cover = cover
|
||||
self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported
|
||||
self.in_ol = False #: Flag indicating we're in an <ol> element
|
||||
@ -478,6 +483,15 @@ class HTMLConverter(object):
|
||||
return text
|
||||
|
||||
def process_links(self):
|
||||
def add_toc_entry(text, target):
|
||||
# TextBlocks in Canvases have a None parent or an Objects Parent
|
||||
if target.parent != None and \
|
||||
hasattr(target.parent, 'objId'):
|
||||
self.book.addTocEntry(ascii_text, tb)
|
||||
elif self.verbose:
|
||||
print "Cannot add link", ascii_text, "to TOC"
|
||||
|
||||
|
||||
def get_target_block(fragment, targets):
|
||||
'''Return the correct block for the <a name> element'''
|
||||
bs = targets[fragment]
|
||||
@ -535,7 +549,7 @@ class HTMLConverter(object):
|
||||
if fragment in self.targets.keys():
|
||||
tb = get_target_block(fragment, self.targets)
|
||||
if self.is_root:
|
||||
self.book.addTocEntry(ascii_text, tb)
|
||||
add_toc_entry(ascii_text, tb)
|
||||
sys.stdout.flush()
|
||||
jb = JumpButton(tb)
|
||||
self.book.append(jb)
|
||||
@ -580,7 +594,7 @@ class HTMLConverter(object):
|
||||
else:
|
||||
tb = conv.top
|
||||
if self.is_root:
|
||||
self.book.addTocEntry(ascii_text, tb)
|
||||
add_toc_entry(ascii_text, tb)
|
||||
jb = JumpButton(tb)
|
||||
self.book.append(jb)
|
||||
cb = CharButton(jb, text=text)
|
||||
@ -727,22 +741,32 @@ class HTMLConverter(object):
|
||||
blockStyle=self.current_block.blockStyle)
|
||||
|
||||
def process_image(self, path, tag_css, width=None, height=None):
|
||||
if self.rotated_images.has_key(path):
|
||||
path = self.rotated_images[path].name
|
||||
if self.scaled_images.has_key(path):
|
||||
path = self.scaled_images[path].name
|
||||
|
||||
im = PILImage.open(path)
|
||||
|
||||
if width == None or height == None:
|
||||
width, height = im.size
|
||||
|
||||
def scale_image(width, height):
|
||||
pt = PersistentTemporaryFile(suffix='.jpeg')
|
||||
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
|
||||
pt.close()
|
||||
self.scaled_images[path] = pt
|
||||
return pt.name
|
||||
|
||||
if self.scaled_images.has_key(path):
|
||||
path = self.scaled_images[path].name
|
||||
|
||||
im = PILImage.open(path)
|
||||
if width == None or height == None:
|
||||
width, height = im.size
|
||||
if width > height:
|
||||
|
||||
if width > self.profile.page_width and width > height:
|
||||
pt = PersistentTemporaryFile(suffix='.jpeg')
|
||||
im = im.rotate(-90)
|
||||
im.convert('RGB').save(pt, 'JPEG')
|
||||
path = pt.name
|
||||
pt.close()
|
||||
self.rotated_images[path] = pt
|
||||
width, height = im.size
|
||||
|
||||
if height > self.profile.page_height:
|
||||
corrf = self.profile.page_height/(1.*height)
|
||||
width, height = floor(corrf*width), self.profile.page_height-1
|
||||
@ -788,7 +812,7 @@ class HTMLConverter(object):
|
||||
self.end_page()
|
||||
self.current_page.append(Canvas(width=self.profile.page_width,
|
||||
height=height))
|
||||
left = int(floor((self.profile.page_width - width)/2.))
|
||||
left = int(floor((self.profile.page_width - width)/2.))
|
||||
self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
|
||||
left, 0)
|
||||
|
||||
@ -824,6 +848,18 @@ class HTMLConverter(object):
|
||||
pass
|
||||
elif tagname == 'a' and self.max_link_levels >= 0:
|
||||
if tag.has_key('name'):
|
||||
if self.anchor_to_previous:
|
||||
self.process_children(tag, tag_css)
|
||||
return
|
||||
for c in self.anchor_to_previous.contents:
|
||||
if isinstance(c, (TextBlock, ImageBlock)):
|
||||
self.targets[tag['name']] = c
|
||||
return
|
||||
tb = self.book.create_text_block()
|
||||
tb.Paragraph(" ")
|
||||
self.anchor_to_previous.append(tb)
|
||||
self.targets[tag['name']] = tb
|
||||
return
|
||||
previous = self.current_block
|
||||
self.process_children(tag, tag_css)
|
||||
target = None
|
||||
@ -867,7 +903,7 @@ class HTMLConverter(object):
|
||||
['png', 'jpg', 'bmp', 'jpeg']:
|
||||
self.process_image(path, tag_css)
|
||||
else:
|
||||
self.add_text('Link: ' + tag['href'], tag_css)
|
||||
self.add_text(self.get_text(tag), tag_css)
|
||||
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
||||
elif tagname == 'img':
|
||||
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
||||
@ -1010,30 +1046,45 @@ class HTMLConverter(object):
|
||||
if tag.has_key('face'):
|
||||
tag_css['font-family'] = tag['face']
|
||||
self.process_children(tag, tag_css)
|
||||
elif tagname in ['br', 'tr']:
|
||||
elif tagname in ['br']:
|
||||
self.current_para.append(CR())
|
||||
self.process_children(tag, tag_css)
|
||||
elif tagname in ['td']:
|
||||
self.current_para.append(' ')
|
||||
self.process_children(tag, tag_css)
|
||||
elif tagname == 'hr':
|
||||
self.end_current_para()
|
||||
self.current_block.append(CR())
|
||||
self.end_current_block()
|
||||
self.current_page.RuledLine(linelength=self.profile.page_width)
|
||||
elif tagname == 'table':
|
||||
tag_css = self.tag_css(tag) # Table should not inherit CSS
|
||||
self.process_table(tag, tag_css)
|
||||
else:
|
||||
self.process_children(tag, tag_css)
|
||||
|
||||
self.process_children(tag, tag_css)
|
||||
if end_page:
|
||||
self.end_page()
|
||||
|
||||
def process_table(self, tag, tag_css):
|
||||
self.end_current_block()
|
||||
colpad = 10
|
||||
table = Table(self, tag, tag_css, rowpad=10, colpad=10)
|
||||
canvases = []
|
||||
for block, xpos, ypos, delta in table.blocks(self.profile.page_width):
|
||||
if not block:
|
||||
canvases.append(Canvas(self.profile.page_width, ypos+colpad,
|
||||
blockrule='block-fixed'))
|
||||
else:
|
||||
canvases[-1].put_object(block, xpos + int(delta/2.), 0)
|
||||
|
||||
for canvas in canvases:
|
||||
self.current_page.append(canvas)
|
||||
self.end_current_block()
|
||||
|
||||
|
||||
def writeto(self, path, lrs=False):
|
||||
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
||||
|
||||
def cleanup(self):
|
||||
for _file in self.scaled_images.values():
|
||||
for _file in self.scaled_images.values() + self.rotated_images.values():
|
||||
_file.__del__()
|
||||
|
||||
|
||||
def process_file(path, options):
|
||||
cwd = os.getcwd()
|
||||
dirpath = None
|
||||
@ -1070,7 +1121,7 @@ def process_file(path, options):
|
||||
tim.save(tf.name)
|
||||
tpath = tf.name
|
||||
else:
|
||||
raise ConversionError, 'Cannot read from: %s', (options.cover,)
|
||||
raise ConversionError, 'Cannot read from: %s'% (options.cover,)
|
||||
|
||||
|
||||
if not options.title:
|
||||
|
@ -2,20 +2,23 @@
|
||||
<head>
|
||||
<style type='text/css'>
|
||||
.toc { page-break-after: always; text-indent: 0em; }
|
||||
.tocpn {text-align: right; }
|
||||
.tocchr {text-align: right; font-variant: small-caps;}
|
||||
</style>
|
||||
</head>
|
||||
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
|
||||
<p>
|
||||
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
||||
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit<br/><span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
||||
</p>
|
||||
<br/>
|
||||
<h2><a name='toc'>Table of Contents</a></h2>
|
||||
<ul style='page-break-after:always'>
|
||||
<li><a href='#lists'>Demonstration of Lists</a></li>
|
||||
<li><a href='#lists'>Lists</a></li>
|
||||
<li><a href='#tables'>Tables</a></li>
|
||||
<li><a href='#text'>Text formatting and ruled lines</a></li>
|
||||
<li><a href='#images'>Inline images</a></li>
|
||||
<li><a href='#recursive'>Recursive link following</a></li>
|
||||
<li><a href='demo_ext.html'>The HTML used to create this file</a>
|
||||
<!--<li><a href='demo_ext.html'>The HTML used to create this file</a>-->
|
||||
</ul>
|
||||
|
||||
<h2><a name='lists'>Lists</a></h2>
|
||||
@ -40,6 +43,53 @@
|
||||
<a href='#toc'>Table of Contents</a>
|
||||
</p>
|
||||
|
||||
<h2><a name='tables'>Tables</a></h2>
|
||||
<p>
|
||||
Because I can!
|
||||
</p>
|
||||
<br/>
|
||||
|
||||
<table>
|
||||
<tr><td colspan=4><h3 style="text-align:center">A matrix</h3></td></tr>
|
||||
<tr><td></td><td style="text-align:center"><b>Column 1</b></td><td style="text-align:center"><b>Column 2</b></td><td style="text-align:center"><b>Column 3</b></td></tr>
|
||||
<tr><td><b>Row 1</b></td><td><p style="text-align:center">(1, 1)</p></tr>
|
||||
<tr><td><b>Row 2</b></td><td></td><td style="text-align:center"><p>(2, 2)</p></td><td></td></tr>
|
||||
<tr><td><b>Row 3</b></td><td></td><td></td><td><p style="text-align:center">(3, 3)</p></td></tr>
|
||||
</table>
|
||||
<br/>
|
||||
<p>
|
||||
html2lrf supports both rowspan and colspan, but no other HTML table attributes, as it uses its own algorithm to determine optimal placement of cells.
|
||||
</p>
|
||||
<br/>
|
||||
<p>
|
||||
The table conversion code is very new and likely to be swarming with bugs, so please report them at <br/><font name="monospace>https://libprs500.kovidgoyal.net/newticket</font>
|
||||
</p>
|
||||
<br/>
|
||||
<p style="page-break-after:always">
|
||||
On the next page you'll see a real life example taken from a Project Gutenberg text with no modifications. It shows off html2lrf's handling of rowspan and colspan.
|
||||
</p>
|
||||
<h3 align="center">Sample Complex Table of Contents</h3>
|
||||
<table summary="TOC">
|
||||
<tr><td colspan="3"> </td><td align="right">PAGE</td></tr>
|
||||
<tr><td class="tocch" colspan="3"><a href="#PREFACE">Preface</a></td><td class="tocpn">v</td></tr>
|
||||
<tr><td class="tocch" colspan="3"><a href="#REFERENCE_WORKS">List of Works of Reference</a></td><td class="tocpn">vii</td></tr>
|
||||
<tr><td class="tocch" colspan="3"><a href="#LIST_OF_ILLUSTRATIONS">List of Illustrations</a></td><td class="tocpn">xi</td></tr>
|
||||
<tr><td class="tocch">Chapter</td><td class="tocchr">I.</td><td class="tocch"><a href="#CHAPTER_I">History of the Foundation</a></td><td class="tocpn">3</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#CHAPTER_II">Exterior of the Church</a></td><td class="tocpn">25</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#CHAPTER_III">Interior of the Church</a></td><td class="tocpn">33</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#CHAPTER_IV">St. Bartholomew-the-Less and the Hospital</a></td><td class="tocpn">63</td></tr>
|
||||
<tr><td class="tocch">Appendix</td><td class="tocchr">I.</td><td class="tocch"><a href="#APPENDIX_I">The Priory Seals</a></td><td class="tocpn">73</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#APPENDIX_II">The Priors and Rectors</a></td><td class="tocpn">77</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#APPENDIX_III">Inventory of Vestments, etc.</a></td><td class="tocpn">79</td></tr>
|
||||
<tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#APPENDIX_IV">The Organ</a></td><td class="tocpn">80</td></tr>
|
||||
<tr><td class="tocch" colspan="3"><a href="#INDEX">Index</a></td><td class="tocpn">83</td></tr>
|
||||
</table>
|
||||
|
||||
<p class='toc'>
|
||||
<hr />
|
||||
<a href='#toc'>Table of Contents</a>
|
||||
</p>
|
||||
|
||||
<h2><a name='text'>Text formatting</a></h2>
|
||||
<p>
|
||||
A simple <i>paragraph</i> of <b>formatted
|
||||
|
306
src/libprs500/ebooks/lrf/html/table.py
Normal file
306
src/libprs500/ebooks/lrf/html/table.py
Normal file
@ -0,0 +1,306 @@
|
||||
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
import math, sys
|
||||
|
||||
from libprs500.ebooks.lrf.fonts import get_font
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
|
||||
CharButton, Plot, Paragraph, \
|
||||
LrsTextTag
|
||||
|
||||
def ceil(num):
|
||||
return int(math.ceil(num))
|
||||
|
||||
def print_xml(elem):
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||
elem = elem.toElement('utf8')
|
||||
ew = ElementWriter(elem, sourceEncoding='utf8')
|
||||
ew.write(sys.stdout)
|
||||
print
|
||||
|
||||
def cattrs(base, extra):
|
||||
new = base.copy()
|
||||
new.update(extra)
|
||||
return new
|
||||
|
||||
def tokens(tb):
|
||||
'''
|
||||
Return the next token. A token is :
|
||||
1. A string
|
||||
a block of text that has the same style
|
||||
'''
|
||||
def process_element(x, attrs):
|
||||
if isinstance(x, CR):
|
||||
yield 2, None
|
||||
elif isinstance(x, Text):
|
||||
yield x.text, cattrs(attrs, {})
|
||||
elif isinstance(x, basestring):
|
||||
yield x, cattrs(attrs, {})
|
||||
elif isinstance(x, (CharButton, LrsTextTag)):
|
||||
if x.contents:
|
||||
yield x.contents[0].text, cattrs(attrs, {})
|
||||
elif isinstance(x, Plot):
|
||||
yield x, None
|
||||
elif isinstance(x, Span):
|
||||
attrs = cattrs(attrs, x.attrs)
|
||||
for y in x.contents:
|
||||
for z in process_element(y, attrs):
|
||||
yield z
|
||||
|
||||
|
||||
for i in tb.contents:
|
||||
if isinstance(i, CR):
|
||||
yield 1, None
|
||||
elif isinstance(i, Paragraph):
|
||||
for j in i.contents:
|
||||
attrs = {}
|
||||
if hasattr(j, 'attrs'):
|
||||
attrs = j.attrs
|
||||
for k in process_element(j, attrs):
|
||||
yield k
|
||||
|
||||
|
||||
class Cell(object):
|
||||
|
||||
def __init__(self, conv, cell, css):
|
||||
self.conv = conv
|
||||
self.cell = cell
|
||||
self.css = css
|
||||
self.text_blocks = []
|
||||
self.rowspan = self.colspan = 1
|
||||
try:
|
||||
self.colspan = int(cell['colspan']) if cell.has_key('colspan') else 1
|
||||
self.rowspan = int(cell['rowspan']) if cell.has_key('rowspan') else 1
|
||||
except:
|
||||
if conv.verbose:
|
||||
print >>sys.stderr, "Error reading row/colspan for ", cell
|
||||
|
||||
pp = conv.current_page
|
||||
conv.book.allow_new_page = False
|
||||
conv.anchor_to_previous = pp
|
||||
conv.current_page = conv.book.create_page()
|
||||
conv.parse_tag(cell, css)
|
||||
conv.end_current_block()
|
||||
for item in conv.current_page.contents:
|
||||
if isinstance(item, TextBlock):
|
||||
self.text_blocks.append(item)
|
||||
conv.current_page = pp
|
||||
conv.book.allow_new_page = True
|
||||
conv.anchor_to_previous = None
|
||||
if not self.text_blocks:
|
||||
tb = conv.book.create_text_block()
|
||||
tb.Paragraph(' ')
|
||||
self.text_blocks.append(tb)
|
||||
for tb in self.text_blocks:
|
||||
tb.parent = None
|
||||
tb.objId = 0
|
||||
# Needed as we have to eventually change this BlockStyle's width and
|
||||
# height attributes. This blockstyle may be shared with other
|
||||
# elements, so doing that causes havoc.
|
||||
tb.blockStyle = conv.book.create_block_style()
|
||||
ts = conv.book.create_text_style(**tb.textStyle.attrs)
|
||||
ts.attrs['parindent'] = 0
|
||||
tb.textStyle = ts
|
||||
if ts.attrs['align'] == 'foot':
|
||||
if isinstance(tb.contents[-1], Paragraph):
|
||||
tb.contents[-1].append(' ')
|
||||
|
||||
|
||||
|
||||
|
||||
def pts_to_pixels(self, pts):
|
||||
pts = int(pts)
|
||||
return ceil((float(self.conv.profile.dpi)/72)*(pts/10.))
|
||||
|
||||
def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
|
||||
ts = tb.textStyle.attrs
|
||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||
parindent = self.pts_to_pixels(ts['parindent'])
|
||||
ls, ws = self.pts_to_pixels(ts['linespace']), self.pts_to_pixels(ts['wordspace'])
|
||||
top, bottom, left, right = 0, 0, parindent, parindent
|
||||
|
||||
def add_word(width, height, left, right, top, bottom):
|
||||
if left + width > maxwidth:
|
||||
left = width + ws
|
||||
top += height + ls
|
||||
bottom = top+height if top+height > bottom else bottom
|
||||
else:
|
||||
left += (width + ws)
|
||||
right = left if left > right else right
|
||||
bottom = top+height if top+height > bottom else bottom
|
||||
return left, right, top, bottom
|
||||
|
||||
for token, attrs in tokens(tb):
|
||||
font = default_font
|
||||
if isinstance(token, int): # Handle para and line breaks
|
||||
top = bottom
|
||||
left = parindent if int == 1 else 0
|
||||
continue
|
||||
if isinstance(token, Plot):
|
||||
width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize)
|
||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom)
|
||||
continue
|
||||
ff = attrs.get('fontfacename', ts['fontfacename'])
|
||||
fs = attrs.get('fontsize', ts['fontsize'])
|
||||
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
|
||||
font = get_font(ff, self.pts_to_pixels(fs))
|
||||
for word in token.split():
|
||||
width, height = font.getsize(word)
|
||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom)
|
||||
return right+3, bottom
|
||||
|
||||
def text_block_preferred_width(self, tb, debug=False):
|
||||
return self.text_block_size(tb, sys.maxint, debug=debug)[0]
|
||||
|
||||
def preferred_width(self, debug=False):
|
||||
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
||||
|
||||
def height(self, width):
|
||||
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
||||
|
||||
|
||||
|
||||
class Row(object):
|
||||
def __init__(self, conv, row, css, colpad):
|
||||
self.cells = []
|
||||
self.colpad = colpad
|
||||
cells = row.findAll('td')
|
||||
for cell in cells:
|
||||
ccss = conv.tag_css(cell, css)
|
||||
self.cells.append(Cell(conv, cell, ccss))
|
||||
|
||||
def number_of_cells(self):
|
||||
'''Number of cells in this row. Respects colspan'''
|
||||
ans = 0
|
||||
for cell in self.cells:
|
||||
ans += cell.colspan
|
||||
return ans
|
||||
|
||||
def height(self, widths):
|
||||
i, heights = 0, []
|
||||
for cell in self.cells:
|
||||
width = sum(widths[i:i+cell.colspan])
|
||||
heights.append(cell.height(width))
|
||||
i += cell.colspan
|
||||
return max(heights)
|
||||
|
||||
def preferred_width(self, col):
|
||||
i = -1
|
||||
cell = None
|
||||
for cell in self.cells:
|
||||
for k in range(0, cell.colspan):
|
||||
if i == col:
|
||||
break
|
||||
i += 1
|
||||
if i == col:
|
||||
break
|
||||
|
||||
return 0 if cell.colspan > 1 else cell.preferred_width()
|
||||
|
||||
def cell_iterator(self):
|
||||
for c in self.cells:
|
||||
yield c
|
||||
|
||||
|
||||
class Table(object):
|
||||
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
||||
self.rows = []
|
||||
self.conv = conv
|
||||
self.rowpad = rowpad
|
||||
self.colpad = colpad
|
||||
rows = table.findAll('tr')
|
||||
for row in rows:
|
||||
rcss = conv.tag_css(row, css)
|
||||
self.rows.append(Row(conv, row, rcss, colpad))
|
||||
|
||||
def number_of_columns(self):
|
||||
max = 0
|
||||
for row in self.rows:
|
||||
max = row.number_of_cells() if row.number_of_cells() > max else max
|
||||
return max
|
||||
|
||||
def number_or_rows(self):
|
||||
return len(self.rows)
|
||||
|
||||
def height(self, maxwidth):
|
||||
''' Return row heights + self.rowpad'''
|
||||
widths = self.get_widths(maxwidth)
|
||||
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
||||
|
||||
def get_widths(self, maxwidth):
|
||||
'''
|
||||
Return widths of columns + sefl.colpad
|
||||
'''
|
||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||
widths = range(cols)
|
||||
for c in range(cols):
|
||||
cellwidths = [ 0 for i in range(rows)]
|
||||
for r in range(rows):
|
||||
try:
|
||||
cellwidths[r] = self.rows[r].preferred_width(c)
|
||||
except IndexError:
|
||||
continue
|
||||
widths[c] = max(cellwidths)
|
||||
itercount = 0
|
||||
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
||||
widths = [ceil((95./100.)*w) for w in widths]
|
||||
itercount += 1
|
||||
return [i+self.colpad for i in widths]
|
||||
|
||||
def blocks(self, maxwidth):
|
||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||
rowpos = [0 for i in range(rows)]
|
||||
for r in range(rows):
|
||||
nc = self.rows[r].cell_iterator()
|
||||
try:
|
||||
while True:
|
||||
cell = nc.next()
|
||||
cellmatrix[r][rowpos[r]] = cell
|
||||
rowpos[r] += cell.colspan
|
||||
for k in range(1, cell.rowspan):
|
||||
try:
|
||||
rowpos[r+k] += 1
|
||||
except IndexError:
|
||||
break
|
||||
except StopIteration: # No more cells in this row
|
||||
continue
|
||||
|
||||
|
||||
widths = self.get_widths(maxwidth)
|
||||
heights = [row.height(widths) for row in self.rows]
|
||||
|
||||
xpos = [sum(widths[:i]) for i in range(cols)]
|
||||
delta = maxwidth - sum(widths)
|
||||
if delta < 0:
|
||||
delta = 0
|
||||
for r in range(len(cellmatrix)):
|
||||
yield None, 0, heights[r], 0
|
||||
for c in range(len(cellmatrix[r])):
|
||||
cell = cellmatrix[r][c]
|
||||
if not cell:
|
||||
continue
|
||||
width = sum(widths[c:c+cell.colspan])
|
||||
sypos = 0
|
||||
for tb in cell.text_blocks:
|
||||
tb.blockStyle = self.conv.book.create_block_style(
|
||||
blockwidth=width,
|
||||
blockheight=cell.text_block_size(tb, width)[1])
|
||||
|
||||
yield tb, xpos[c], sypos, delta
|
||||
sypos += tb.blockStyle.attrs['blockheight']
|
||||
|
||||
|
||||
|
||||
|
@ -94,7 +94,12 @@ class xml_attr_field(object):
|
||||
|
||||
def __get__(self, obj, typ=None):
|
||||
""" Return the data in this field or '' if the field is empty """
|
||||
document = dom.parseString(obj.info)
|
||||
try:
|
||||
document = dom.parseString(obj.info)
|
||||
except Exception, err:
|
||||
print >>sys.stderr, "Could not parse XML:", err
|
||||
print obj.info
|
||||
raise
|
||||
elems = document.getElementsByTagName(self.tag_name)
|
||||
if len(elems):
|
||||
elem = None
|
||||
@ -108,7 +113,12 @@ class xml_attr_field(object):
|
||||
def __set__(self, obj, val):
|
||||
if val == None:
|
||||
val = ""
|
||||
document = dom.parseString(obj.info)
|
||||
try:
|
||||
document = dom.parseString(obj.info)
|
||||
except Exception, err:
|
||||
print >>sys.stderr, "Could not parse XML:", err
|
||||
print obj.info
|
||||
raise
|
||||
elems = document.getElementsByTagName(self.tag_name)
|
||||
if len(elems):
|
||||
elem = None
|
||||
@ -142,7 +152,13 @@ class xml_field(object):
|
||||
|
||||
def __get__(self, obj, typ=None):
|
||||
""" Return the data in this field or '' if the field is empty """
|
||||
document = dom.parseString(obj.info)
|
||||
try:
|
||||
document = dom.parseString(obj.info)
|
||||
except Exception, err:
|
||||
print >>sys.stderr, "Could not parse XML:", err
|
||||
print obj.info
|
||||
raise
|
||||
|
||||
elems = document.getElementsByTagName(self.tag_name)
|
||||
if len(elems):
|
||||
elem = None
|
||||
@ -158,7 +174,12 @@ class xml_field(object):
|
||||
def __set__(self, obj, val):
|
||||
if val == None:
|
||||
val = ""
|
||||
document = dom.parseString(obj.info)
|
||||
try:
|
||||
document = dom.parseString(obj.info)
|
||||
except Exception, err:
|
||||
print >>sys.stderr, "Could not parse XML:", err
|
||||
print obj.info
|
||||
raise
|
||||
def create_elem():
|
||||
elem = document.createElement(self.tag_name)
|
||||
elem.appendChild(dom.Text())
|
||||
|
@ -56,6 +56,8 @@ DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||
class LrsError(Exception):
|
||||
pass
|
||||
|
||||
class ContentError(Exception):
|
||||
pass
|
||||
|
||||
def _checkExists(filename):
|
||||
if not os.path.exists(filename):
|
||||
@ -435,6 +437,8 @@ class Book(Delegator):
|
||||
self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
|
||||
|
||||
self.applySettings(settings, testValid=True)
|
||||
|
||||
self.allow_new_page = True #: If False L{create_page} raises an exception
|
||||
|
||||
def create_text_style(self, **settings):
|
||||
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
||||
@ -447,6 +451,8 @@ class Book(Delegator):
|
||||
return ans
|
||||
|
||||
def create_page_style(self, **settings):
|
||||
if not self.allow_new_page:
|
||||
raise ContentError
|
||||
ans = PageStyle(**self.defaultPageStyle.attrs.copy())
|
||||
ans.update(settings)
|
||||
return ans
|
||||
@ -641,12 +647,15 @@ class TableOfContents(object):
|
||||
raise LrsError, "TOC destination must be a TextBlock, ImageBlock or RuledLine"+\
|
||||
" not a " + str(type(textBlock))
|
||||
|
||||
if textBlock.parent is None or not isinstance(textBlock.parent, Page):
|
||||
if textBlock.parent is None:
|
||||
raise LrsError, "TOC text block must be already appended to a page"
|
||||
|
||||
if textBlock.parent.parent is None:
|
||||
raise LrsError, \
|
||||
"TOC destination page must be already appended to a book"
|
||||
|
||||
if not hasattr(textBlock.parent, 'objId'):
|
||||
raise LrsError, "TOC destination must be appended to a container with an objID"
|
||||
|
||||
self.tocEntries.append(TocLabel(tocLabel, textBlock))
|
||||
textBlock.tocLabel = tocLabel
|
||||
@ -1373,7 +1382,6 @@ class TextBlock(LrsObject, LrsContainer):
|
||||
|
||||
self.textSettings = {}
|
||||
self.blockSettings = {}
|
||||
|
||||
|
||||
for name, value in settings.items():
|
||||
if name in TextStyle.validSettings:
|
||||
@ -1428,7 +1436,6 @@ class TextBlock(LrsObject, LrsContainer):
|
||||
tb.append(content.toElement(sourceEncoding))
|
||||
|
||||
return tb
|
||||
|
||||
|
||||
def getReferencedObjIds(self):
|
||||
ids = [self.objId, self.extraId, self.blockStyle.objId,
|
||||
@ -2111,7 +2118,7 @@ class PutObj(LrsContainer):
|
||||
self.y1 = int(y)
|
||||
|
||||
|
||||
def appendReferencedObjects(self, parent):
|
||||
def appendReferencedObjects(self, parent):
|
||||
if self.content.parent is None:
|
||||
parent.append(self.content)
|
||||
|
||||
|
@ -17,6 +17,7 @@ Convert .txt files to .lrf
|
||||
"""
|
||||
import os, sys
|
||||
|
||||
from libprs500.ebooks import BeautifulSoup
|
||||
from libprs500.ebooks.lrf import ConversionError, option_parser
|
||||
from libprs500.ebooks.lrf import Book
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
|
||||
@ -63,7 +64,7 @@ def convert_txt(path, options):
|
||||
C{author}, C{title}, C{encoding} (the assumed encoding of
|
||||
the text in C{path}.)
|
||||
"""
|
||||
import fileinput
|
||||
import codecs
|
||||
header = None
|
||||
if options.header:
|
||||
header = Paragraph()
|
||||
@ -84,7 +85,19 @@ def convert_txt(path, options):
|
||||
block = book.create_text_block()
|
||||
pg.append(block)
|
||||
book.append(pg)
|
||||
for line in fileinput.input(path):
|
||||
lines = ""
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'ascii').readlines()
|
||||
print 'huh'
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'cp1252').readlines()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
|
||||
except UnicodeDecodeError:
|
||||
lines = codecs.open(path, 'rb', 'utf8').readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line:
|
||||
buffer = buffer.rstrip() + ' ' + line
|
||||
|
Loading…
x
Reference in New Issue
Block a user