mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added support for conversion for HTML tables.
Added support for common encodings to txt2lrf.
This commit is contained in:
parent
b26adb541e
commit
806aba6f80
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
|
|||||||
suit your distribution.
|
suit your distribution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "0.3.32"
|
__version__ = "0.3.33"
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -352,7 +352,7 @@ class SetTime(Command):
|
|||||||
self.day = t[2]
|
self.day = t[2]
|
||||||
self.hour = t[3]
|
self.hour = t[3]
|
||||||
self.minute = t[4]
|
self.minute = t[4]
|
||||||
# Hack you should actually update the entire time tree is
|
# Hack you should actually update the entire time tree if
|
||||||
# second is > 59
|
# second is > 59
|
||||||
self.second = t[5] if t[5] < 60 else 59
|
self.second = t[5] if t[5] < 60 else 59
|
||||||
|
|
||||||
|
@ -13,7 +13,11 @@
|
|||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
from PIL import ImageFont
|
try:
|
||||||
|
from PIL import ImageFont
|
||||||
|
except ImportError:
|
||||||
|
import ImageFont
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Default fonts used in the PRS500
|
Default fonts used in the PRS500
|
||||||
'''
|
'''
|
||||||
@ -26,7 +30,8 @@ FONT_MAP = {
|
|||||||
def get_font(name, size, encoding='unic'):
|
def get_font(name, size, encoding='unic'):
|
||||||
'''
|
'''
|
||||||
Get an ImageFont object by name.
|
Get an ImageFont object by name.
|
||||||
@param size: Size in pts
|
@param size: Font height in pixels. To convert from pts:
|
||||||
|
sz in pixels = (dpi/72) * size in pts
|
||||||
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
||||||
'''
|
'''
|
||||||
if name in FONT_MAP.keys():
|
if name in FONT_MAP.keys():
|
||||||
|
@ -39,6 +39,7 @@ from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream,
|
|||||||
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
|
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
|
||||||
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
|
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
|
||||||
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
|
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
|
||||||
|
from libprs500.ebooks.lrf.html.table import Table
|
||||||
from libprs500 import extract, filename_to_utf8
|
from libprs500 import extract, filename_to_utf8
|
||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
@ -303,6 +304,7 @@ class HTMLConverter(object):
|
|||||||
self.chapter_regex = chapter_regex #: Regex used to search for chapter titles
|
self.chapter_regex = chapter_regex #: Regex used to search for chapter titles
|
||||||
self.link_exclude = link_exclude #: Ignore matching hrefs
|
self.link_exclude = link_exclude #: Ignore matching hrefs
|
||||||
self.scaled_images = {} #: Temporary files with scaled version of images
|
self.scaled_images = {} #: Temporary files with scaled version of images
|
||||||
|
self.rotated_images = {} #: Temporary files with rotated version of images
|
||||||
self.max_link_levels = max_link_levels #: Number of link levels to process recursively
|
self.max_link_levels = max_link_levels #: Number of link levels to process recursively
|
||||||
self.link_level = link_level #: Current link level
|
self.link_level = link_level #: Current link level
|
||||||
self.blockquote_style = book.create_block_style(sidemargin=60,
|
self.blockquote_style = book.create_block_style(sidemargin=60,
|
||||||
@ -317,6 +319,9 @@ class HTMLConverter(object):
|
|||||||
self.files = {} #: links that point to other files
|
self.files = {} #: links that point to other files
|
||||||
self.links_processed = False #: Whether links_processed has been called on this object
|
self.links_processed = False #: Whether links_processed has been called on this object
|
||||||
self.font_delta = font_delta
|
self.font_delta = font_delta
|
||||||
|
# Set by table processing code so that any <a name> within the table
|
||||||
|
# point to the previous element
|
||||||
|
self.anchor_to_previous = None
|
||||||
self.cover = cover
|
self.cover = cover
|
||||||
self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported
|
self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported
|
||||||
self.in_ol = False #: Flag indicating we're in an <ol> element
|
self.in_ol = False #: Flag indicating we're in an <ol> element
|
||||||
@ -478,6 +483,15 @@ class HTMLConverter(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def process_links(self):
|
def process_links(self):
|
||||||
|
def add_toc_entry(text, target):
|
||||||
|
# TextBlocks in Canvases have a None parent or an Objects Parent
|
||||||
|
if target.parent != None and \
|
||||||
|
hasattr(target.parent, 'objId'):
|
||||||
|
self.book.addTocEntry(ascii_text, tb)
|
||||||
|
elif self.verbose:
|
||||||
|
print "Cannot add link", ascii_text, "to TOC"
|
||||||
|
|
||||||
|
|
||||||
def get_target_block(fragment, targets):
|
def get_target_block(fragment, targets):
|
||||||
'''Return the correct block for the <a name> element'''
|
'''Return the correct block for the <a name> element'''
|
||||||
bs = targets[fragment]
|
bs = targets[fragment]
|
||||||
@ -535,7 +549,7 @@ class HTMLConverter(object):
|
|||||||
if fragment in self.targets.keys():
|
if fragment in self.targets.keys():
|
||||||
tb = get_target_block(fragment, self.targets)
|
tb = get_target_block(fragment, self.targets)
|
||||||
if self.is_root:
|
if self.is_root:
|
||||||
self.book.addTocEntry(ascii_text, tb)
|
add_toc_entry(ascii_text, tb)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
jb = JumpButton(tb)
|
jb = JumpButton(tb)
|
||||||
self.book.append(jb)
|
self.book.append(jb)
|
||||||
@ -580,7 +594,7 @@ class HTMLConverter(object):
|
|||||||
else:
|
else:
|
||||||
tb = conv.top
|
tb = conv.top
|
||||||
if self.is_root:
|
if self.is_root:
|
||||||
self.book.addTocEntry(ascii_text, tb)
|
add_toc_entry(ascii_text, tb)
|
||||||
jb = JumpButton(tb)
|
jb = JumpButton(tb)
|
||||||
self.book.append(jb)
|
self.book.append(jb)
|
||||||
cb = CharButton(jb, text=text)
|
cb = CharButton(jb, text=text)
|
||||||
@ -727,22 +741,32 @@ class HTMLConverter(object):
|
|||||||
blockStyle=self.current_block.blockStyle)
|
blockStyle=self.current_block.blockStyle)
|
||||||
|
|
||||||
def process_image(self, path, tag_css, width=None, height=None):
|
def process_image(self, path, tag_css, width=None, height=None):
|
||||||
|
if self.rotated_images.has_key(path):
|
||||||
|
path = self.rotated_images[path].name
|
||||||
|
if self.scaled_images.has_key(path):
|
||||||
|
path = self.scaled_images[path].name
|
||||||
|
|
||||||
|
im = PILImage.open(path)
|
||||||
|
|
||||||
|
if width == None or height == None:
|
||||||
|
width, height = im.size
|
||||||
|
|
||||||
def scale_image(width, height):
|
def scale_image(width, height):
|
||||||
pt = PersistentTemporaryFile(suffix='.jpeg')
|
pt = PersistentTemporaryFile(suffix='.jpeg')
|
||||||
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
|
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
|
||||||
pt.close()
|
pt.close()
|
||||||
self.scaled_images[path] = pt
|
self.scaled_images[path] = pt
|
||||||
return pt.name
|
return pt.name
|
||||||
|
|
||||||
if self.scaled_images.has_key(path):
|
if width > self.profile.page_width and width > height:
|
||||||
path = self.scaled_images[path].name
|
pt = PersistentTemporaryFile(suffix='.jpeg')
|
||||||
|
|
||||||
im = PILImage.open(path)
|
|
||||||
if width == None or height == None:
|
|
||||||
width, height = im.size
|
|
||||||
if width > height:
|
|
||||||
im = im.rotate(-90)
|
im = im.rotate(-90)
|
||||||
|
im.convert('RGB').save(pt, 'JPEG')
|
||||||
|
path = pt.name
|
||||||
|
pt.close()
|
||||||
|
self.rotated_images[path] = pt
|
||||||
width, height = im.size
|
width, height = im.size
|
||||||
|
|
||||||
if height > self.profile.page_height:
|
if height > self.profile.page_height:
|
||||||
corrf = self.profile.page_height/(1.*height)
|
corrf = self.profile.page_height/(1.*height)
|
||||||
width, height = floor(corrf*width), self.profile.page_height-1
|
width, height = floor(corrf*width), self.profile.page_height-1
|
||||||
@ -788,7 +812,7 @@ class HTMLConverter(object):
|
|||||||
self.end_page()
|
self.end_page()
|
||||||
self.current_page.append(Canvas(width=self.profile.page_width,
|
self.current_page.append(Canvas(width=self.profile.page_width,
|
||||||
height=height))
|
height=height))
|
||||||
left = int(floor((self.profile.page_width - width)/2.))
|
left = int(floor((self.profile.page_width - width)/2.))
|
||||||
self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
|
self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
|
||||||
left, 0)
|
left, 0)
|
||||||
|
|
||||||
@ -824,6 +848,18 @@ class HTMLConverter(object):
|
|||||||
pass
|
pass
|
||||||
elif tagname == 'a' and self.max_link_levels >= 0:
|
elif tagname == 'a' and self.max_link_levels >= 0:
|
||||||
if tag.has_key('name'):
|
if tag.has_key('name'):
|
||||||
|
if self.anchor_to_previous:
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
|
return
|
||||||
|
for c in self.anchor_to_previous.contents:
|
||||||
|
if isinstance(c, (TextBlock, ImageBlock)):
|
||||||
|
self.targets[tag['name']] = c
|
||||||
|
return
|
||||||
|
tb = self.book.create_text_block()
|
||||||
|
tb.Paragraph(" ")
|
||||||
|
self.anchor_to_previous.append(tb)
|
||||||
|
self.targets[tag['name']] = tb
|
||||||
|
return
|
||||||
previous = self.current_block
|
previous = self.current_block
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
target = None
|
target = None
|
||||||
@ -867,7 +903,7 @@ class HTMLConverter(object):
|
|||||||
['png', 'jpg', 'bmp', 'jpeg']:
|
['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
self.process_image(path, tag_css)
|
self.process_image(path, tag_css)
|
||||||
else:
|
else:
|
||||||
self.add_text('Link: ' + tag['href'], tag_css)
|
self.add_text(self.get_text(tag), tag_css)
|
||||||
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
||||||
@ -1010,30 +1046,45 @@ class HTMLConverter(object):
|
|||||||
if tag.has_key('face'):
|
if tag.has_key('face'):
|
||||||
tag_css['font-family'] = tag['face']
|
tag_css['font-family'] = tag['face']
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
elif tagname in ['br', 'tr']:
|
elif tagname in ['br']:
|
||||||
self.current_para.append(CR())
|
self.current_para.append(CR())
|
||||||
self.process_children(tag, tag_css)
|
|
||||||
elif tagname in ['td']:
|
|
||||||
self.current_para.append(' ')
|
|
||||||
self.process_children(tag, tag_css)
|
|
||||||
elif tagname == 'hr':
|
elif tagname == 'hr':
|
||||||
self.end_current_para()
|
self.end_current_para()
|
||||||
self.current_block.append(CR())
|
self.current_block.append(CR())
|
||||||
self.end_current_block()
|
self.end_current_block()
|
||||||
self.current_page.RuledLine(linelength=self.profile.page_width)
|
self.current_page.RuledLine(linelength=self.profile.page_width)
|
||||||
|
elif tagname == 'table':
|
||||||
|
tag_css = self.tag_css(tag) # Table should not inherit CSS
|
||||||
|
self.process_table(tag, tag_css)
|
||||||
else:
|
else:
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
|
|
||||||
if end_page:
|
if end_page:
|
||||||
self.end_page()
|
self.end_page()
|
||||||
|
|
||||||
|
def process_table(self, tag, tag_css):
|
||||||
|
self.end_current_block()
|
||||||
|
colpad = 10
|
||||||
|
table = Table(self, tag, tag_css, rowpad=10, colpad=10)
|
||||||
|
canvases = []
|
||||||
|
for block, xpos, ypos, delta in table.blocks(self.profile.page_width):
|
||||||
|
if not block:
|
||||||
|
canvases.append(Canvas(self.profile.page_width, ypos+colpad,
|
||||||
|
blockrule='block-fixed'))
|
||||||
|
else:
|
||||||
|
canvases[-1].put_object(block, xpos + int(delta/2.), 0)
|
||||||
|
|
||||||
|
for canvas in canvases:
|
||||||
|
self.current_page.append(canvas)
|
||||||
|
self.end_current_block()
|
||||||
|
|
||||||
|
|
||||||
def writeto(self, path, lrs=False):
|
def writeto(self, path, lrs=False):
|
||||||
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
for _file in self.scaled_images.values():
|
for _file in self.scaled_images.values() + self.rotated_images.values():
|
||||||
_file.__del__()
|
_file.__del__()
|
||||||
|
|
||||||
def process_file(path, options):
|
def process_file(path, options):
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
dirpath = None
|
dirpath = None
|
||||||
@ -1070,7 +1121,7 @@ def process_file(path, options):
|
|||||||
tim.save(tf.name)
|
tim.save(tf.name)
|
||||||
tpath = tf.name
|
tpath = tf.name
|
||||||
else:
|
else:
|
||||||
raise ConversionError, 'Cannot read from: %s', (options.cover,)
|
raise ConversionError, 'Cannot read from: %s'% (options.cover,)
|
||||||
|
|
||||||
|
|
||||||
if not options.title:
|
if not options.title:
|
||||||
|
@ -2,20 +2,23 @@
|
|||||||
<head>
|
<head>
|
||||||
<style type='text/css'>
|
<style type='text/css'>
|
||||||
.toc { page-break-after: always; text-indent: 0em; }
|
.toc { page-break-after: always; text-indent: 0em; }
|
||||||
|
.tocpn {text-align: right; }
|
||||||
|
.tocchr {text-align: right; font-variant: small-caps;}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
|
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
|
||||||
<p>
|
<p>
|
||||||
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit<br/><span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
||||||
</p>
|
</p>
|
||||||
<br/>
|
<br/>
|
||||||
<h2><a name='toc'>Table of Contents</a></h2>
|
<h2><a name='toc'>Table of Contents</a></h2>
|
||||||
<ul style='page-break-after:always'>
|
<ul style='page-break-after:always'>
|
||||||
<li><a href='#lists'>Demonstration of Lists</a></li>
|
<li><a href='#lists'>Lists</a></li>
|
||||||
|
<li><a href='#tables'>Tables</a></li>
|
||||||
<li><a href='#text'>Text formatting and ruled lines</a></li>
|
<li><a href='#text'>Text formatting and ruled lines</a></li>
|
||||||
<li><a href='#images'>Inline images</a></li>
|
<li><a href='#images'>Inline images</a></li>
|
||||||
<li><a href='#recursive'>Recursive link following</a></li>
|
<li><a href='#recursive'>Recursive link following</a></li>
|
||||||
<li><a href='demo_ext.html'>The HTML used to create this file</a>
|
<!--<li><a href='demo_ext.html'>The HTML used to create this file</a>-->
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<h2><a name='lists'>Lists</a></h2>
|
<h2><a name='lists'>Lists</a></h2>
|
||||||
@ -40,6 +43,53 @@
|
|||||||
<a href='#toc'>Table of Contents</a>
|
<a href='#toc'>Table of Contents</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<h2><a name='tables'>Tables</a></h2>
|
||||||
|
<p>
|
||||||
|
Because I can!
|
||||||
|
</p>
|
||||||
|
<br/>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr><td colspan=4><h3 style="text-align:center">A matrix</h3></td></tr>
|
||||||
|
<tr><td></td><td style="text-align:center"><b>Column 1</b></td><td style="text-align:center"><b>Column 2</b></td><td style="text-align:center"><b>Column 3</b></td></tr>
|
||||||
|
<tr><td><b>Row 1</b></td><td><p style="text-align:center">(1, 1)</p></tr>
|
||||||
|
<tr><td><b>Row 2</b></td><td></td><td style="text-align:center"><p>(2, 2)</p></td><td></td></tr>
|
||||||
|
<tr><td><b>Row 3</b></td><td></td><td></td><td><p style="text-align:center">(3, 3)</p></td></tr>
|
||||||
|
</table>
|
||||||
|
<br/>
|
||||||
|
<p>
|
||||||
|
html2lrf supports both rowspan and colspan, but no other HTML table attributes, as it uses its own algorithm to determine optimal placement of cells.
|
||||||
|
</p>
|
||||||
|
<br/>
|
||||||
|
<p>
|
||||||
|
The table conversion code is very new and likely to be swarming with bugs, so please report them at <br/><font name="monospace>https://libprs500.kovidgoyal.net/newticket</font>
|
||||||
|
</p>
|
||||||
|
<br/>
|
||||||
|
<p style="page-break-after:always">
|
||||||
|
On the next page you'll see a real life example taken from a Project Gutenberg text with no modifications. It shows off html2lrf's handling of rowspan and colspan.
|
||||||
|
</p>
|
||||||
|
<h3 align="center">Sample Complex Table of Contents</h3>
|
||||||
|
<table summary="TOC">
|
||||||
|
<tr><td colspan="3"> </td><td align="right">PAGE</td></tr>
|
||||||
|
<tr><td class="tocch" colspan="3"><a href="#PREFACE">Preface</a></td><td class="tocpn">v</td></tr>
|
||||||
|
<tr><td class="tocch" colspan="3"><a href="#REFERENCE_WORKS">List of Works of Reference</a></td><td class="tocpn">vii</td></tr>
|
||||||
|
<tr><td class="tocch" colspan="3"><a href="#LIST_OF_ILLUSTRATIONS">List of Illustrations</a></td><td class="tocpn">xi</td></tr>
|
||||||
|
<tr><td class="tocch">Chapter</td><td class="tocchr">I.</td><td class="tocch"><a href="#CHAPTER_I">History of the Foundation</a></td><td class="tocpn">3</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#CHAPTER_II">Exterior of the Church</a></td><td class="tocpn">25</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#CHAPTER_III">Interior of the Church</a></td><td class="tocpn">33</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#CHAPTER_IV">St. Bartholomew-the-Less and the Hospital</a></td><td class="tocpn">63</td></tr>
|
||||||
|
<tr><td class="tocch">Appendix</td><td class="tocchr">I.</td><td class="tocch"><a href="#APPENDIX_I">The Priory Seals</a></td><td class="tocpn">73</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#APPENDIX_II">The Priors and Rectors</a></td><td class="tocpn">77</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#APPENDIX_III">Inventory of Vestments, etc.</a></td><td class="tocpn">79</td></tr>
|
||||||
|
<tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#APPENDIX_IV">The Organ</a></td><td class="tocpn">80</td></tr>
|
||||||
|
<tr><td class="tocch" colspan="3"><a href="#INDEX">Index</a></td><td class="tocpn">83</td></tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p class='toc'>
|
||||||
|
<hr />
|
||||||
|
<a href='#toc'>Table of Contents</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
<h2><a name='text'>Text formatting</a></h2>
|
<h2><a name='text'>Text formatting</a></h2>
|
||||||
<p>
|
<p>
|
||||||
A simple <i>paragraph</i> of <b>formatted
|
A simple <i>paragraph</i> of <b>formatted
|
||||||
|
306
src/libprs500/ebooks/lrf/html/table.py
Normal file
306
src/libprs500/ebooks/lrf/html/table.py
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
import math, sys
|
||||||
|
|
||||||
|
from libprs500.ebooks.lrf.fonts import get_font
|
||||||
|
from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
|
||||||
|
CharButton, Plot, Paragraph, \
|
||||||
|
LrsTextTag
|
||||||
|
|
||||||
|
def ceil(num):
|
||||||
|
return int(math.ceil(num))
|
||||||
|
|
||||||
|
def print_xml(elem):
|
||||||
|
from libprs500.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||||
|
elem = elem.toElement('utf8')
|
||||||
|
ew = ElementWriter(elem, sourceEncoding='utf8')
|
||||||
|
ew.write(sys.stdout)
|
||||||
|
print
|
||||||
|
|
||||||
|
def cattrs(base, extra):
|
||||||
|
new = base.copy()
|
||||||
|
new.update(extra)
|
||||||
|
return new
|
||||||
|
|
||||||
|
def tokens(tb):
|
||||||
|
'''
|
||||||
|
Return the next token. A token is :
|
||||||
|
1. A string
|
||||||
|
a block of text that has the same style
|
||||||
|
'''
|
||||||
|
def process_element(x, attrs):
|
||||||
|
if isinstance(x, CR):
|
||||||
|
yield 2, None
|
||||||
|
elif isinstance(x, Text):
|
||||||
|
yield x.text, cattrs(attrs, {})
|
||||||
|
elif isinstance(x, basestring):
|
||||||
|
yield x, cattrs(attrs, {})
|
||||||
|
elif isinstance(x, (CharButton, LrsTextTag)):
|
||||||
|
if x.contents:
|
||||||
|
yield x.contents[0].text, cattrs(attrs, {})
|
||||||
|
elif isinstance(x, Plot):
|
||||||
|
yield x, None
|
||||||
|
elif isinstance(x, Span):
|
||||||
|
attrs = cattrs(attrs, x.attrs)
|
||||||
|
for y in x.contents:
|
||||||
|
for z in process_element(y, attrs):
|
||||||
|
yield z
|
||||||
|
|
||||||
|
|
||||||
|
for i in tb.contents:
|
||||||
|
if isinstance(i, CR):
|
||||||
|
yield 1, None
|
||||||
|
elif isinstance(i, Paragraph):
|
||||||
|
for j in i.contents:
|
||||||
|
attrs = {}
|
||||||
|
if hasattr(j, 'attrs'):
|
||||||
|
attrs = j.attrs
|
||||||
|
for k in process_element(j, attrs):
|
||||||
|
yield k
|
||||||
|
|
||||||
|
|
||||||
|
class Cell(object):
|
||||||
|
|
||||||
|
def __init__(self, conv, cell, css):
|
||||||
|
self.conv = conv
|
||||||
|
self.cell = cell
|
||||||
|
self.css = css
|
||||||
|
self.text_blocks = []
|
||||||
|
self.rowspan = self.colspan = 1
|
||||||
|
try:
|
||||||
|
self.colspan = int(cell['colspan']) if cell.has_key('colspan') else 1
|
||||||
|
self.rowspan = int(cell['rowspan']) if cell.has_key('rowspan') else 1
|
||||||
|
except:
|
||||||
|
if conv.verbose:
|
||||||
|
print >>sys.stderr, "Error reading row/colspan for ", cell
|
||||||
|
|
||||||
|
pp = conv.current_page
|
||||||
|
conv.book.allow_new_page = False
|
||||||
|
conv.anchor_to_previous = pp
|
||||||
|
conv.current_page = conv.book.create_page()
|
||||||
|
conv.parse_tag(cell, css)
|
||||||
|
conv.end_current_block()
|
||||||
|
for item in conv.current_page.contents:
|
||||||
|
if isinstance(item, TextBlock):
|
||||||
|
self.text_blocks.append(item)
|
||||||
|
conv.current_page = pp
|
||||||
|
conv.book.allow_new_page = True
|
||||||
|
conv.anchor_to_previous = None
|
||||||
|
if not self.text_blocks:
|
||||||
|
tb = conv.book.create_text_block()
|
||||||
|
tb.Paragraph(' ')
|
||||||
|
self.text_blocks.append(tb)
|
||||||
|
for tb in self.text_blocks:
|
||||||
|
tb.parent = None
|
||||||
|
tb.objId = 0
|
||||||
|
# Needed as we have to eventually change this BlockStyle's width and
|
||||||
|
# height attributes. This blockstyle may be shared with other
|
||||||
|
# elements, so doing that causes havoc.
|
||||||
|
tb.blockStyle = conv.book.create_block_style()
|
||||||
|
ts = conv.book.create_text_style(**tb.textStyle.attrs)
|
||||||
|
ts.attrs['parindent'] = 0
|
||||||
|
tb.textStyle = ts
|
||||||
|
if ts.attrs['align'] == 'foot':
|
||||||
|
if isinstance(tb.contents[-1], Paragraph):
|
||||||
|
tb.contents[-1].append(' ')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def pts_to_pixels(self, pts):
|
||||||
|
pts = int(pts)
|
||||||
|
return ceil((float(self.conv.profile.dpi)/72)*(pts/10.))
|
||||||
|
|
||||||
|
def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
|
||||||
|
ts = tb.textStyle.attrs
|
||||||
|
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||||
|
parindent = self.pts_to_pixels(ts['parindent'])
|
||||||
|
ls, ws = self.pts_to_pixels(ts['linespace']), self.pts_to_pixels(ts['wordspace'])
|
||||||
|
top, bottom, left, right = 0, 0, parindent, parindent
|
||||||
|
|
||||||
|
def add_word(width, height, left, right, top, bottom):
|
||||||
|
if left + width > maxwidth:
|
||||||
|
left = width + ws
|
||||||
|
top += height + ls
|
||||||
|
bottom = top+height if top+height > bottom else bottom
|
||||||
|
else:
|
||||||
|
left += (width + ws)
|
||||||
|
right = left if left > right else right
|
||||||
|
bottom = top+height if top+height > bottom else bottom
|
||||||
|
return left, right, top, bottom
|
||||||
|
|
||||||
|
for token, attrs in tokens(tb):
|
||||||
|
font = default_font
|
||||||
|
if isinstance(token, int): # Handle para and line breaks
|
||||||
|
top = bottom
|
||||||
|
left = parindent if int == 1 else 0
|
||||||
|
continue
|
||||||
|
if isinstance(token, Plot):
|
||||||
|
width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize)
|
||||||
|
left, right, top, bottom = add_word(width, height, left, right, top, bottom)
|
||||||
|
continue
|
||||||
|
ff = attrs.get('fontfacename', ts['fontfacename'])
|
||||||
|
fs = attrs.get('fontsize', ts['fontsize'])
|
||||||
|
if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
|
||||||
|
font = get_font(ff, self.pts_to_pixels(fs))
|
||||||
|
for word in token.split():
|
||||||
|
width, height = font.getsize(word)
|
||||||
|
left, right, top, bottom = add_word(width, height, left, right, top, bottom)
|
||||||
|
return right+3, bottom
|
||||||
|
|
||||||
|
def text_block_preferred_width(self, tb, debug=False):
|
||||||
|
return self.text_block_size(tb, sys.maxint, debug=debug)[0]
|
||||||
|
|
||||||
|
def preferred_width(self, debug=False):
|
||||||
|
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
||||||
|
|
||||||
|
def height(self, width):
|
||||||
|
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Row(object):
|
||||||
|
def __init__(self, conv, row, css, colpad):
|
||||||
|
self.cells = []
|
||||||
|
self.colpad = colpad
|
||||||
|
cells = row.findAll('td')
|
||||||
|
for cell in cells:
|
||||||
|
ccss = conv.tag_css(cell, css)
|
||||||
|
self.cells.append(Cell(conv, cell, ccss))
|
||||||
|
|
||||||
|
def number_of_cells(self):
|
||||||
|
'''Number of cells in this row. Respects colspan'''
|
||||||
|
ans = 0
|
||||||
|
for cell in self.cells:
|
||||||
|
ans += cell.colspan
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def height(self, widths):
|
||||||
|
i, heights = 0, []
|
||||||
|
for cell in self.cells:
|
||||||
|
width = sum(widths[i:i+cell.colspan])
|
||||||
|
heights.append(cell.height(width))
|
||||||
|
i += cell.colspan
|
||||||
|
return max(heights)
|
||||||
|
|
||||||
|
def preferred_width(self, col):
|
||||||
|
i = -1
|
||||||
|
cell = None
|
||||||
|
for cell in self.cells:
|
||||||
|
for k in range(0, cell.colspan):
|
||||||
|
if i == col:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
if i == col:
|
||||||
|
break
|
||||||
|
|
||||||
|
return 0 if cell.colspan > 1 else cell.preferred_width()
|
||||||
|
|
||||||
|
def cell_iterator(self):
|
||||||
|
for c in self.cells:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
|
||||||
|
class Table(object):
|
||||||
|
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
||||||
|
self.rows = []
|
||||||
|
self.conv = conv
|
||||||
|
self.rowpad = rowpad
|
||||||
|
self.colpad = colpad
|
||||||
|
rows = table.findAll('tr')
|
||||||
|
for row in rows:
|
||||||
|
rcss = conv.tag_css(row, css)
|
||||||
|
self.rows.append(Row(conv, row, rcss, colpad))
|
||||||
|
|
||||||
|
def number_of_columns(self):
|
||||||
|
max = 0
|
||||||
|
for row in self.rows:
|
||||||
|
max = row.number_of_cells() if row.number_of_cells() > max else max
|
||||||
|
return max
|
||||||
|
|
||||||
|
def number_or_rows(self):
|
||||||
|
return len(self.rows)
|
||||||
|
|
||||||
|
def height(self, maxwidth):
|
||||||
|
''' Return row heights + self.rowpad'''
|
||||||
|
widths = self.get_widths(maxwidth)
|
||||||
|
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
||||||
|
|
||||||
|
def get_widths(self, maxwidth):
|
||||||
|
'''
|
||||||
|
Return widths of columns + sefl.colpad
|
||||||
|
'''
|
||||||
|
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||||
|
widths = range(cols)
|
||||||
|
for c in range(cols):
|
||||||
|
cellwidths = [ 0 for i in range(rows)]
|
||||||
|
for r in range(rows):
|
||||||
|
try:
|
||||||
|
cellwidths[r] = self.rows[r].preferred_width(c)
|
||||||
|
except IndexError:
|
||||||
|
continue
|
||||||
|
widths[c] = max(cellwidths)
|
||||||
|
itercount = 0
|
||||||
|
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
||||||
|
widths = [ceil((95./100.)*w) for w in widths]
|
||||||
|
itercount += 1
|
||||||
|
return [i+self.colpad for i in widths]
|
||||||
|
|
||||||
|
def blocks(self, maxwidth):
|
||||||
|
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||||
|
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||||
|
rowpos = [0 for i in range(rows)]
|
||||||
|
for r in range(rows):
|
||||||
|
nc = self.rows[r].cell_iterator()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
cell = nc.next()
|
||||||
|
cellmatrix[r][rowpos[r]] = cell
|
||||||
|
rowpos[r] += cell.colspan
|
||||||
|
for k in range(1, cell.rowspan):
|
||||||
|
try:
|
||||||
|
rowpos[r+k] += 1
|
||||||
|
except IndexError:
|
||||||
|
break
|
||||||
|
except StopIteration: # No more cells in this row
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
widths = self.get_widths(maxwidth)
|
||||||
|
heights = [row.height(widths) for row in self.rows]
|
||||||
|
|
||||||
|
xpos = [sum(widths[:i]) for i in range(cols)]
|
||||||
|
delta = maxwidth - sum(widths)
|
||||||
|
if delta < 0:
|
||||||
|
delta = 0
|
||||||
|
for r in range(len(cellmatrix)):
|
||||||
|
yield None, 0, heights[r], 0
|
||||||
|
for c in range(len(cellmatrix[r])):
|
||||||
|
cell = cellmatrix[r][c]
|
||||||
|
if not cell:
|
||||||
|
continue
|
||||||
|
width = sum(widths[c:c+cell.colspan])
|
||||||
|
sypos = 0
|
||||||
|
for tb in cell.text_blocks:
|
||||||
|
tb.blockStyle = self.conv.book.create_block_style(
|
||||||
|
blockwidth=width,
|
||||||
|
blockheight=cell.text_block_size(tb, width)[1])
|
||||||
|
|
||||||
|
yield tb, xpos[c], sypos, delta
|
||||||
|
sypos += tb.blockStyle.attrs['blockheight']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -94,7 +94,12 @@ class xml_attr_field(object):
|
|||||||
|
|
||||||
def __get__(self, obj, typ=None):
|
def __get__(self, obj, typ=None):
|
||||||
""" Return the data in this field or '' if the field is empty """
|
""" Return the data in this field or '' if the field is empty """
|
||||||
document = dom.parseString(obj.info)
|
try:
|
||||||
|
document = dom.parseString(obj.info)
|
||||||
|
except Exception, err:
|
||||||
|
print >>sys.stderr, "Could not parse XML:", err
|
||||||
|
print obj.info
|
||||||
|
raise
|
||||||
elems = document.getElementsByTagName(self.tag_name)
|
elems = document.getElementsByTagName(self.tag_name)
|
||||||
if len(elems):
|
if len(elems):
|
||||||
elem = None
|
elem = None
|
||||||
@ -108,7 +113,12 @@ class xml_attr_field(object):
|
|||||||
def __set__(self, obj, val):
|
def __set__(self, obj, val):
|
||||||
if val == None:
|
if val == None:
|
||||||
val = ""
|
val = ""
|
||||||
document = dom.parseString(obj.info)
|
try:
|
||||||
|
document = dom.parseString(obj.info)
|
||||||
|
except Exception, err:
|
||||||
|
print >>sys.stderr, "Could not parse XML:", err
|
||||||
|
print obj.info
|
||||||
|
raise
|
||||||
elems = document.getElementsByTagName(self.tag_name)
|
elems = document.getElementsByTagName(self.tag_name)
|
||||||
if len(elems):
|
if len(elems):
|
||||||
elem = None
|
elem = None
|
||||||
@ -142,7 +152,13 @@ class xml_field(object):
|
|||||||
|
|
||||||
def __get__(self, obj, typ=None):
|
def __get__(self, obj, typ=None):
|
||||||
""" Return the data in this field or '' if the field is empty """
|
""" Return the data in this field or '' if the field is empty """
|
||||||
document = dom.parseString(obj.info)
|
try:
|
||||||
|
document = dom.parseString(obj.info)
|
||||||
|
except Exception, err:
|
||||||
|
print >>sys.stderr, "Could not parse XML:", err
|
||||||
|
print obj.info
|
||||||
|
raise
|
||||||
|
|
||||||
elems = document.getElementsByTagName(self.tag_name)
|
elems = document.getElementsByTagName(self.tag_name)
|
||||||
if len(elems):
|
if len(elems):
|
||||||
elem = None
|
elem = None
|
||||||
@ -158,7 +174,12 @@ class xml_field(object):
|
|||||||
def __set__(self, obj, val):
|
def __set__(self, obj, val):
|
||||||
if val == None:
|
if val == None:
|
||||||
val = ""
|
val = ""
|
||||||
document = dom.parseString(obj.info)
|
try:
|
||||||
|
document = dom.parseString(obj.info)
|
||||||
|
except Exception, err:
|
||||||
|
print >>sys.stderr, "Could not parse XML:", err
|
||||||
|
print obj.info
|
||||||
|
raise
|
||||||
def create_elem():
|
def create_elem():
|
||||||
elem = document.createElement(self.tag_name)
|
elem = document.createElement(self.tag_name)
|
||||||
elem.appendChild(dom.Text())
|
elem.appendChild(dom.Text())
|
||||||
|
@ -56,6 +56,8 @@ DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
|||||||
class LrsError(Exception):
|
class LrsError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class ContentError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
def _checkExists(filename):
|
def _checkExists(filename):
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
@ -435,6 +437,8 @@ class Book(Delegator):
|
|||||||
self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
|
self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
|
||||||
|
|
||||||
self.applySettings(settings, testValid=True)
|
self.applySettings(settings, testValid=True)
|
||||||
|
|
||||||
|
self.allow_new_page = True #: If False L{create_page} raises an exception
|
||||||
|
|
||||||
def create_text_style(self, **settings):
|
def create_text_style(self, **settings):
|
||||||
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
||||||
@ -447,6 +451,8 @@ class Book(Delegator):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def create_page_style(self, **settings):
|
def create_page_style(self, **settings):
|
||||||
|
if not self.allow_new_page:
|
||||||
|
raise ContentError
|
||||||
ans = PageStyle(**self.defaultPageStyle.attrs.copy())
|
ans = PageStyle(**self.defaultPageStyle.attrs.copy())
|
||||||
ans.update(settings)
|
ans.update(settings)
|
||||||
return ans
|
return ans
|
||||||
@ -641,12 +647,15 @@ class TableOfContents(object):
|
|||||||
raise LrsError, "TOC destination must be a TextBlock, ImageBlock or RuledLine"+\
|
raise LrsError, "TOC destination must be a TextBlock, ImageBlock or RuledLine"+\
|
||||||
" not a " + str(type(textBlock))
|
" not a " + str(type(textBlock))
|
||||||
|
|
||||||
if textBlock.parent is None or not isinstance(textBlock.parent, Page):
|
if textBlock.parent is None:
|
||||||
raise LrsError, "TOC text block must be already appended to a page"
|
raise LrsError, "TOC text block must be already appended to a page"
|
||||||
|
|
||||||
if textBlock.parent.parent is None:
|
if textBlock.parent.parent is None:
|
||||||
raise LrsError, \
|
raise LrsError, \
|
||||||
"TOC destination page must be already appended to a book"
|
"TOC destination page must be already appended to a book"
|
||||||
|
|
||||||
|
if not hasattr(textBlock.parent, 'objId'):
|
||||||
|
raise LrsError, "TOC destination must be appended to a container with an objID"
|
||||||
|
|
||||||
self.tocEntries.append(TocLabel(tocLabel, textBlock))
|
self.tocEntries.append(TocLabel(tocLabel, textBlock))
|
||||||
textBlock.tocLabel = tocLabel
|
textBlock.tocLabel = tocLabel
|
||||||
@ -1373,7 +1382,6 @@ class TextBlock(LrsObject, LrsContainer):
|
|||||||
|
|
||||||
self.textSettings = {}
|
self.textSettings = {}
|
||||||
self.blockSettings = {}
|
self.blockSettings = {}
|
||||||
|
|
||||||
|
|
||||||
for name, value in settings.items():
|
for name, value in settings.items():
|
||||||
if name in TextStyle.validSettings:
|
if name in TextStyle.validSettings:
|
||||||
@ -1428,7 +1436,6 @@ class TextBlock(LrsObject, LrsContainer):
|
|||||||
tb.append(content.toElement(sourceEncoding))
|
tb.append(content.toElement(sourceEncoding))
|
||||||
|
|
||||||
return tb
|
return tb
|
||||||
|
|
||||||
|
|
||||||
def getReferencedObjIds(self):
|
def getReferencedObjIds(self):
|
||||||
ids = [self.objId, self.extraId, self.blockStyle.objId,
|
ids = [self.objId, self.extraId, self.blockStyle.objId,
|
||||||
@ -2111,7 +2118,7 @@ class PutObj(LrsContainer):
|
|||||||
self.y1 = int(y)
|
self.y1 = int(y)
|
||||||
|
|
||||||
|
|
||||||
def appendReferencedObjects(self, parent):
|
def appendReferencedObjects(self, parent):
|
||||||
if self.content.parent is None:
|
if self.content.parent is None:
|
||||||
parent.append(self.content)
|
parent.append(self.content)
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ Convert .txt files to .lrf
|
|||||||
"""
|
"""
|
||||||
import os, sys
|
import os, sys
|
||||||
|
|
||||||
|
from libprs500.ebooks import BeautifulSoup
|
||||||
from libprs500.ebooks.lrf import ConversionError, option_parser
|
from libprs500.ebooks.lrf import ConversionError, option_parser
|
||||||
from libprs500.ebooks.lrf import Book
|
from libprs500.ebooks.lrf import Book
|
||||||
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
|
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
|
||||||
@ -63,7 +64,7 @@ def convert_txt(path, options):
|
|||||||
C{author}, C{title}, C{encoding} (the assumed encoding of
|
C{author}, C{title}, C{encoding} (the assumed encoding of
|
||||||
the text in C{path}.)
|
the text in C{path}.)
|
||||||
"""
|
"""
|
||||||
import fileinput
|
import codecs
|
||||||
header = None
|
header = None
|
||||||
if options.header:
|
if options.header:
|
||||||
header = Paragraph()
|
header = Paragraph()
|
||||||
@ -84,7 +85,19 @@ def convert_txt(path, options):
|
|||||||
block = book.create_text_block()
|
block = book.create_text_block()
|
||||||
pg.append(block)
|
pg.append(block)
|
||||||
book.append(pg)
|
book.append(pg)
|
||||||
for line in fileinput.input(path):
|
lines = ""
|
||||||
|
try:
|
||||||
|
lines = codecs.open(path, 'rb', 'ascii').readlines()
|
||||||
|
print 'huh'
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
lines = codecs.open(path, 'rb', 'cp1252').readlines()
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
lines = codecs.open(path, 'rb', 'utf8').readlines()
|
||||||
|
for line in lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line:
|
if line:
|
||||||
buffer = buffer.rstrip() + ' ' + line
|
buffer = buffer.rstrip() + ' ' + line
|
||||||
|
Loading…
x
Reference in New Issue
Block a user