diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index f2968b840e..f59403d9f0 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to suit your distribution. """ -__version__ = "0.3.32" +__version__ = "0.3.33" __docformat__ = "epytext" __author__ = "Kovid Goyal " diff --git a/src/libprs500/devices/prs500/prstypes.py b/src/libprs500/devices/prs500/prstypes.py index 0924ecd0a6..2f966eb968 100755 --- a/src/libprs500/devices/prs500/prstypes.py +++ b/src/libprs500/devices/prs500/prstypes.py @@ -352,7 +352,7 @@ class SetTime(Command): self.day = t[2] self.hour = t[3] self.minute = t[4] - # Hack you should actually update the entire time tree is + # Hack you should actually update the entire time tree if # second is > 59 self.second = t[5] if t[5] < 60 else 59 diff --git a/src/libprs500/ebooks/lrf/fonts/__init__.py b/src/libprs500/ebooks/lrf/fonts/__init__.py index cb9282d2a5..ffa8eb8b22 100644 --- a/src/libprs500/ebooks/lrf/fonts/__init__.py +++ b/src/libprs500/ebooks/lrf/fonts/__init__.py @@ -13,7 +13,11 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import pkg_resources -from PIL import ImageFont +try: + from PIL import ImageFont +except ImportError: + import ImageFont + ''' Default fonts used in the PRS500 ''' @@ -26,7 +30,8 @@ FONT_MAP = { def get_font(name, size, encoding='unic'): ''' Get an ImageFont object by name. - @param size: Size in pts + @param size: Font height in pixels. To convert from pts: + sz in pixels = (dpi/72) * size in pts @param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm' ''' if name in FONT_MAP.keys(): diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 6908cc1697..3514e627a9 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -39,6 +39,7 @@ from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE +from libprs500.ebooks.lrf.html.table import Table from libprs500 import extract, filename_to_utf8 from libprs500.ptempfile import PersistentTemporaryFile @@ -303,6 +304,7 @@ class HTMLConverter(object): self.chapter_regex = chapter_regex #: Regex used to search for chapter titles self.link_exclude = link_exclude #: Ignore matching hrefs self.scaled_images = {} #: Temporary files with scaled version of images + self.rotated_images = {} #: Temporary files with rotated version of images self.max_link_levels = max_link_levels #: Number of link levels to process recursively self.link_level = link_level #: Current link level self.blockquote_style = book.create_block_style(sidemargin=60, @@ -317,6 +319,9 @@ class HTMLConverter(object): self.files = {} #: links that point to other files self.links_processed = False #: Whether links_processed has been called on this object self.font_delta = font_delta + # Set by table processing code so that any within the table + # point to the previous element + self.anchor_to_previous = None self.cover = cover self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported self.in_ol = False #: Flag indicating we're in an
    element @@ -478,6 +483,15 @@ class HTMLConverter(object): return text def process_links(self): + def add_toc_entry(text, target): + # TextBlocks in Canvases have a None parent or an Objects Parent + if target.parent != None and \ + hasattr(target.parent, 'objId'): + self.book.addTocEntry(ascii_text, tb) + elif self.verbose: + print "Cannot add link", ascii_text, "to TOC" + + def get_target_block(fragment, targets): '''Return the correct block for the element''' bs = targets[fragment] @@ -535,7 +549,7 @@ class HTMLConverter(object): if fragment in self.targets.keys(): tb = get_target_block(fragment, self.targets) if self.is_root: - self.book.addTocEntry(ascii_text, tb) + add_toc_entry(ascii_text, tb) sys.stdout.flush() jb = JumpButton(tb) self.book.append(jb) @@ -580,7 +594,7 @@ class HTMLConverter(object): else: tb = conv.top if self.is_root: - self.book.addTocEntry(ascii_text, tb) + add_toc_entry(ascii_text, tb) jb = JumpButton(tb) self.book.append(jb) cb = CharButton(jb, text=text) @@ -727,22 +741,32 @@ class HTMLConverter(object): blockStyle=self.current_block.blockStyle) def process_image(self, path, tag_css, width=None, height=None): + if self.rotated_images.has_key(path): + path = self.rotated_images[path].name + if self.scaled_images.has_key(path): + path = self.scaled_images[path].name + + im = PILImage.open(path) + + if width == None or height == None: + width, height = im.size + def scale_image(width, height): pt = PersistentTemporaryFile(suffix='.jpeg') im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG') pt.close() self.scaled_images[path] = pt return pt.name - - if self.scaled_images.has_key(path): - path = self.scaled_images[path].name - - im = PILImage.open(path) - if width == None or height == None: - width, height = im.size - if width > height: + + if width > self.profile.page_width and width > height: + pt = PersistentTemporaryFile(suffix='.jpeg') im = im.rotate(-90) + im.convert('RGB').save(pt, 'JPEG') + path = pt.name + pt.close() + self.rotated_images[path] = pt width, height = im.size + if height > self.profile.page_height: corrf = self.profile.page_height/(1.*height) width, height = floor(corrf*width), self.profile.page_height-1 @@ -788,7 +812,7 @@ class HTMLConverter(object): self.end_page() self.current_page.append(Canvas(width=self.profile.page_width, height=height)) - left = int(floor((self.profile.page_width - width)/2.)) + left = int(floor((self.profile.page_width - width)/2.)) self.current_page.contents[0].put_object(ImageBlock(self.images[path]), left, 0) @@ -824,6 +848,18 @@ class HTMLConverter(object): pass elif tagname == 'a' and self.max_link_levels >= 0: if tag.has_key('name'): + if self.anchor_to_previous: + self.process_children(tag, tag_css) + return + for c in self.anchor_to_previous.contents: + if isinstance(c, (TextBlock, ImageBlock)): + self.targets[tag['name']] = c + return + tb = self.book.create_text_block() + tb.Paragraph(" ") + self.anchor_to_previous.append(tb) + self.targets[tag['name']] = tb + return previous = self.current_block self.process_children(tag, tag_css) target = None @@ -867,7 +903,7 @@ class HTMLConverter(object): ['png', 'jpg', 'bmp', 'jpeg']: self.process_image(path, tag_css) else: - self.add_text('Link: ' + tag['href'], tag_css) + self.add_text(self.get_text(tag), tag_css) self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag)) elif tagname == 'img': if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK): @@ -1010,30 +1046,45 @@ class HTMLConverter(object): if tag.has_key('face'): tag_css['font-family'] = tag['face'] self.process_children(tag, tag_css) - elif tagname in ['br', 'tr']: + elif tagname in ['br']: self.current_para.append(CR()) - self.process_children(tag, tag_css) - elif tagname in ['td']: - self.current_para.append(' ') - self.process_children(tag, tag_css) elif tagname == 'hr': self.end_current_para() self.current_block.append(CR()) self.end_current_block() self.current_page.RuledLine(linelength=self.profile.page_width) + elif tagname == 'table': + tag_css = self.tag_css(tag) # Table should not inherit CSS + self.process_table(tag, tag_css) else: - self.process_children(tag, tag_css) - + self.process_children(tag, tag_css) if end_page: self.end_page() + def process_table(self, tag, tag_css): + self.end_current_block() + colpad = 10 + table = Table(self, tag, tag_css, rowpad=10, colpad=10) + canvases = [] + for block, xpos, ypos, delta in table.blocks(self.profile.page_width): + if not block: + canvases.append(Canvas(self.profile.page_width, ypos+colpad, + blockrule='block-fixed')) + else: + canvases[-1].put_object(block, xpos + int(delta/2.), 0) + + for canvas in canvases: + self.current_page.append(canvas) + self.end_current_block() + + def writeto(self, path, lrs=False): self.book.renderLrs(path) if lrs else self.book.renderLrf(path) def cleanup(self): - for _file in self.scaled_images.values(): + for _file in self.scaled_images.values() + self.rotated_images.values(): _file.__del__() - + def process_file(path, options): cwd = os.getcwd() dirpath = None @@ -1070,7 +1121,7 @@ def process_file(path, options): tim.save(tf.name) tpath = tf.name else: - raise ConversionError, 'Cannot read from: %s', (options.cover,) + raise ConversionError, 'Cannot read from: %s'% (options.cover,) if not options.title: diff --git a/src/libprs500/ebooks/lrf/html/demo/demo.html b/src/libprs500/ebooks/lrf/html/demo/demo.html index 53050dc65e..027c73a962 100644 --- a/src/libprs500/ebooks/lrf/html/demo/demo.html +++ b/src/libprs500/ebooks/lrf/html/demo/demo.html @@ -2,20 +2,23 @@

    Demo of html2lrf

    - This file contains a demonstration of the capabilities of html2lrf, the HTML to LRF converter from libprs500. To obtain libprs500 visit https://libprs500.kovidgoyal.net + This file contains a demonstration of the capabilities of html2lrf, the HTML to LRF converter from libprs500. To obtain libprs500 visit
    https://libprs500.kovidgoyal.net


    Table of Contents

    Lists

    @@ -40,6 +43,53 @@ Table of Contents

    +

    Tables

    +

    + Because I can! +

    +
    + + + + + + + +

    A matrix

    Column 1Column 2Column 3
    Row 1

    (1, 1)

    Row 2

    (2, 2)

    Row 3

    (3, 3)

    +
    +

    + html2lrf supports both rowspan and colspan, but no other HTML table attributes, as it uses its own algorithm to determine optimal placement of cells. +

    +
    +

    + The table conversion code is very new and likely to be swarming with bugs, so please report them at
    + On the next page you'll see a real life example taken from a Project Gutenberg text with no modifications. It shows off html2lrf's handling of rowspan and colspan. +

    +

    Sample Complex Table of Contents

    + + + + + + + + + + + + + + +
     PAGE
    Prefacev
    List of Works of Referencevii
    List of Illustrationsxi
    ChapterI.History of the Foundation3
    II.Exterior of the Church25
    III.Interior of the Church33
    IV.St. Bartholomew-the-Less and the Hospital63
    AppendixI.The Priory Seals73
    II.The Priors and Rectors77
    III.Inventory of Vestments, etc.79
    IV.The Organ80
    Index83
    + +

    +


    + Table of Contents +

    +

    Text formatting

    A simple paragraph of formatted diff --git a/src/libprs500/ebooks/lrf/html/table.py b/src/libprs500/ebooks/lrf/html/table.py new file mode 100644 index 0000000000..7cfeecb963 --- /dev/null +++ b/src/libprs500/ebooks/lrf/html/table.py @@ -0,0 +1,306 @@ +## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import math, sys + +from libprs500.ebooks.lrf.fonts import get_font +from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \ + CharButton, Plot, Paragraph, \ + LrsTextTag + +def ceil(num): + return int(math.ceil(num)) + +def print_xml(elem): + from libprs500.ebooks.lrf.pylrs.pylrs import ElementWriter + elem = elem.toElement('utf8') + ew = ElementWriter(elem, sourceEncoding='utf8') + ew.write(sys.stdout) + print + +def cattrs(base, extra): + new = base.copy() + new.update(extra) + return new + +def tokens(tb): + ''' + Return the next token. A token is : + 1. A string + a block of text that has the same style + ''' + def process_element(x, attrs): + if isinstance(x, CR): + yield 2, None + elif isinstance(x, Text): + yield x.text, cattrs(attrs, {}) + elif isinstance(x, basestring): + yield x, cattrs(attrs, {}) + elif isinstance(x, (CharButton, LrsTextTag)): + if x.contents: + yield x.contents[0].text, cattrs(attrs, {}) + elif isinstance(x, Plot): + yield x, None + elif isinstance(x, Span): + attrs = cattrs(attrs, x.attrs) + for y in x.contents: + for z in process_element(y, attrs): + yield z + + + for i in tb.contents: + if isinstance(i, CR): + yield 1, None + elif isinstance(i, Paragraph): + for j in i.contents: + attrs = {} + if hasattr(j, 'attrs'): + attrs = j.attrs + for k in process_element(j, attrs): + yield k + + +class Cell(object): + + def __init__(self, conv, cell, css): + self.conv = conv + self.cell = cell + self.css = css + self.text_blocks = [] + self.rowspan = self.colspan = 1 + try: + self.colspan = int(cell['colspan']) if cell.has_key('colspan') else 1 + self.rowspan = int(cell['rowspan']) if cell.has_key('rowspan') else 1 + except: + if conv.verbose: + print >>sys.stderr, "Error reading row/colspan for ", cell + + pp = conv.current_page + conv.book.allow_new_page = False + conv.anchor_to_previous = pp + conv.current_page = conv.book.create_page() + conv.parse_tag(cell, css) + conv.end_current_block() + for item in conv.current_page.contents: + if isinstance(item, TextBlock): + self.text_blocks.append(item) + conv.current_page = pp + conv.book.allow_new_page = True + conv.anchor_to_previous = None + if not self.text_blocks: + tb = conv.book.create_text_block() + tb.Paragraph(' ') + self.text_blocks.append(tb) + for tb in self.text_blocks: + tb.parent = None + tb.objId = 0 + # Needed as we have to eventually change this BlockStyle's width and + # height attributes. This blockstyle may be shared with other + # elements, so doing that causes havoc. + tb.blockStyle = conv.book.create_block_style() + ts = conv.book.create_text_style(**tb.textStyle.attrs) + ts.attrs['parindent'] = 0 + tb.textStyle = ts + if ts.attrs['align'] == 'foot': + if isinstance(tb.contents[-1], Paragraph): + tb.contents[-1].append(' ') + + + + + def pts_to_pixels(self, pts): + pts = int(pts) + return ceil((float(self.conv.profile.dpi)/72)*(pts/10.)) + + def text_block_size(self, tb, maxwidth=sys.maxint, debug=False): + ts = tb.textStyle.attrs + default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize'])) + parindent = self.pts_to_pixels(ts['parindent']) + ls, ws = self.pts_to_pixels(ts['linespace']), self.pts_to_pixels(ts['wordspace']) + top, bottom, left, right = 0, 0, parindent, parindent + + def add_word(width, height, left, right, top, bottom): + if left + width > maxwidth: + left = width + ws + top += height + ls + bottom = top+height if top+height > bottom else bottom + else: + left += (width + ws) + right = left if left > right else right + bottom = top+height if top+height > bottom else bottom + return left, right, top, bottom + + for token, attrs in tokens(tb): + font = default_font + if isinstance(token, int): # Handle para and line breaks + top = bottom + left = parindent if int == 1 else 0 + continue + if isinstance(token, Plot): + width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize) + left, right, top, bottom = add_word(width, height, left, right, top, bottom) + continue + ff = attrs.get('fontfacename', ts['fontfacename']) + fs = attrs.get('fontsize', ts['fontsize']) + if (ff, fs) != (ts['fontfacename'], ts['fontsize']): + font = get_font(ff, self.pts_to_pixels(fs)) + for word in token.split(): + width, height = font.getsize(word) + left, right, top, bottom = add_word(width, height, left, right, top, bottom) + return right+3, bottom + + def text_block_preferred_width(self, tb, debug=False): + return self.text_block_size(tb, sys.maxint, debug=debug)[0] + + def preferred_width(self, debug=False): + return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks])) + + def height(self, width): + return sum([self.text_block_size(i, width)[1] for i in self.text_blocks]) + + + +class Row(object): + def __init__(self, conv, row, css, colpad): + self.cells = [] + self.colpad = colpad + cells = row.findAll('td') + for cell in cells: + ccss = conv.tag_css(cell, css) + self.cells.append(Cell(conv, cell, ccss)) + + def number_of_cells(self): + '''Number of cells in this row. Respects colspan''' + ans = 0 + for cell in self.cells: + ans += cell.colspan + return ans + + def height(self, widths): + i, heights = 0, [] + for cell in self.cells: + width = sum(widths[i:i+cell.colspan]) + heights.append(cell.height(width)) + i += cell.colspan + return max(heights) + + def preferred_width(self, col): + i = -1 + cell = None + for cell in self.cells: + for k in range(0, cell.colspan): + if i == col: + break + i += 1 + if i == col: + break + + return 0 if cell.colspan > 1 else cell.preferred_width() + + def cell_iterator(self): + for c in self.cells: + yield c + + +class Table(object): + def __init__(self, conv, table, css, rowpad=10, colpad=10): + self.rows = [] + self.conv = conv + self.rowpad = rowpad + self.colpad = colpad + rows = table.findAll('tr') + for row in rows: + rcss = conv.tag_css(row, css) + self.rows.append(Row(conv, row, rcss, colpad)) + + def number_of_columns(self): + max = 0 + for row in self.rows: + max = row.number_of_cells() if row.number_of_cells() > max else max + return max + + def number_or_rows(self): + return len(self.rows) + + def height(self, maxwidth): + ''' Return row heights + self.rowpad''' + widths = self.get_widths(maxwidth) + return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad + + def get_widths(self, maxwidth): + ''' + Return widths of columns + sefl.colpad + ''' + rows, cols = self.number_or_rows(), self.number_of_columns() + widths = range(cols) + for c in range(cols): + cellwidths = [ 0 for i in range(rows)] + for r in range(rows): + try: + cellwidths[r] = self.rows[r].preferred_width(c) + except IndexError: + continue + widths[c] = max(cellwidths) + itercount = 0 + while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100: + widths = [ceil((95./100.)*w) for w in widths] + itercount += 1 + return [i+self.colpad for i in widths] + + def blocks(self, maxwidth): + rows, cols = self.number_or_rows(), self.number_of_columns() + cellmatrix = [[None for c in range(cols)] for r in range(rows)] + rowpos = [0 for i in range(rows)] + for r in range(rows): + nc = self.rows[r].cell_iterator() + try: + while True: + cell = nc.next() + cellmatrix[r][rowpos[r]] = cell + rowpos[r] += cell.colspan + for k in range(1, cell.rowspan): + try: + rowpos[r+k] += 1 + except IndexError: + break + except StopIteration: # No more cells in this row + continue + + + widths = self.get_widths(maxwidth) + heights = [row.height(widths) for row in self.rows] + + xpos = [sum(widths[:i]) for i in range(cols)] + delta = maxwidth - sum(widths) + if delta < 0: + delta = 0 + for r in range(len(cellmatrix)): + yield None, 0, heights[r], 0 + for c in range(len(cellmatrix[r])): + cell = cellmatrix[r][c] + if not cell: + continue + width = sum(widths[c:c+cell.colspan]) + sypos = 0 + for tb in cell.text_blocks: + tb.blockStyle = self.conv.book.create_block_style( + blockwidth=width, + blockheight=cell.text_block_size(tb, width)[1]) + + yield tb, xpos[c], sypos, delta + sypos += tb.blockStyle.attrs['blockheight'] + + + + \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/meta.py b/src/libprs500/ebooks/lrf/meta.py index 550ab6d592..d2d5c37445 100644 --- a/src/libprs500/ebooks/lrf/meta.py +++ b/src/libprs500/ebooks/lrf/meta.py @@ -94,7 +94,12 @@ class xml_attr_field(object): def __get__(self, obj, typ=None): """ Return the data in this field or '' if the field is empty """ - document = dom.parseString(obj.info) + try: + document = dom.parseString(obj.info) + except Exception, err: + print >>sys.stderr, "Could not parse XML:", err + print obj.info + raise elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None @@ -108,7 +113,12 @@ class xml_attr_field(object): def __set__(self, obj, val): if val == None: val = "" - document = dom.parseString(obj.info) + try: + document = dom.parseString(obj.info) + except Exception, err: + print >>sys.stderr, "Could not parse XML:", err + print obj.info + raise elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None @@ -142,7 +152,13 @@ class xml_field(object): def __get__(self, obj, typ=None): """ Return the data in this field or '' if the field is empty """ - document = dom.parseString(obj.info) + try: + document = dom.parseString(obj.info) + except Exception, err: + print >>sys.stderr, "Could not parse XML:", err + print obj.info + raise + elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None @@ -158,7 +174,12 @@ class xml_field(object): def __set__(self, obj, val): if val == None: val = "" - document = dom.parseString(obj.info) + try: + document = dom.parseString(obj.info) + except Exception, err: + print >>sys.stderr, "Could not parse XML:", err + print obj.info + raise def create_elem(): elem = document.createElement(self.tag_name) elem.appendChild(dom.Text()) diff --git a/src/libprs500/ebooks/lrf/pylrs/pylrs.py b/src/libprs500/ebooks/lrf/pylrs/pylrs.py index 63f252cefd..4a43e10e83 100644 --- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py +++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py @@ -56,6 +56,8 @@ DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs class LrsError(Exception): pass +class ContentError(Exception): + pass def _checkExists(filename): if not os.path.exists(filename): @@ -435,6 +437,8 @@ class Book(Delegator): self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING) self.applySettings(settings, testValid=True) + + self.allow_new_page = True #: If False L{create_page} raises an exception def create_text_style(self, **settings): ans = TextStyle(**self.defaultTextStyle.attrs.copy()) @@ -447,6 +451,8 @@ class Book(Delegator): return ans def create_page_style(self, **settings): + if not self.allow_new_page: + raise ContentError ans = PageStyle(**self.defaultPageStyle.attrs.copy()) ans.update(settings) return ans @@ -641,12 +647,15 @@ class TableOfContents(object): raise LrsError, "TOC destination must be a TextBlock, ImageBlock or RuledLine"+\ " not a " + str(type(textBlock)) - if textBlock.parent is None or not isinstance(textBlock.parent, Page): + if textBlock.parent is None: raise LrsError, "TOC text block must be already appended to a page" if textBlock.parent.parent is None: raise LrsError, \ "TOC destination page must be already appended to a book" + + if not hasattr(textBlock.parent, 'objId'): + raise LrsError, "TOC destination must be appended to a container with an objID" self.tocEntries.append(TocLabel(tocLabel, textBlock)) textBlock.tocLabel = tocLabel @@ -1373,7 +1382,6 @@ class TextBlock(LrsObject, LrsContainer): self.textSettings = {} self.blockSettings = {} - for name, value in settings.items(): if name in TextStyle.validSettings: @@ -1428,7 +1436,6 @@ class TextBlock(LrsObject, LrsContainer): tb.append(content.toElement(sourceEncoding)) return tb - def getReferencedObjIds(self): ids = [self.objId, self.extraId, self.blockStyle.objId, @@ -2111,7 +2118,7 @@ class PutObj(LrsContainer): self.y1 = int(y) - def appendReferencedObjects(self, parent): + def appendReferencedObjects(self, parent): if self.content.parent is None: parent.append(self.content) diff --git a/src/libprs500/ebooks/lrf/txt/convert_from.py b/src/libprs500/ebooks/lrf/txt/convert_from.py index e5972a0db9..94f61a0978 100644 --- a/src/libprs500/ebooks/lrf/txt/convert_from.py +++ b/src/libprs500/ebooks/lrf/txt/convert_from.py @@ -17,6 +17,7 @@ Convert .txt files to .lrf """ import os, sys +from libprs500.ebooks import BeautifulSoup from libprs500.ebooks.lrf import ConversionError, option_parser from libprs500.ebooks.lrf import Book from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting @@ -63,7 +64,7 @@ def convert_txt(path, options): C{author}, C{title}, C{encoding} (the assumed encoding of the text in C{path}.) """ - import fileinput + import codecs header = None if options.header: header = Paragraph() @@ -84,7 +85,19 @@ def convert_txt(path, options): block = book.create_text_block() pg.append(block) book.append(pg) - for line in fileinput.input(path): + lines = "" + try: + lines = codecs.open(path, 'rb', 'ascii').readlines() + print 'huh' + except UnicodeDecodeError: + try: + lines = codecs.open(path, 'rb', 'cp1252').readlines() + except UnicodeDecodeError: + try: + lines = codecs.open(path, 'rb', 'iso-8859-1').readlines() + except UnicodeDecodeError: + lines = codecs.open(path, 'rb', 'utf8').readlines() + for line in lines: line = line.strip() if line: buffer = buffer.rstrip() + ' ' + line