diff --git a/src/libprs500/lrf/__init__.py b/src/libprs500/lrf/__init__.py index d283ea2723..d86cd2e55a 100644 --- a/src/libprs500/lrf/__init__.py +++ b/src/libprs500/lrf/__init__.py @@ -56,6 +56,7 @@ def Book(font_delta=0, header=None, **settings): ps['header'] = header ps['header'] = hdr ps['topmargin'] = 10 - return _Book(textstyledefault=dict(fontsize=100+font_delta*20), \ + return _Book(textstyledefault=dict(fontsize=100+font_delta*20, + parindent=80, linespace=12), \ pagestyledefault=ps, \ **settings) \ No newline at end of file diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index 3765018e87..f8b95761e5 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -30,17 +30,13 @@ from operator import itemgetter from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \ NavigableString, Declaration, ProcessingInstruction from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \ - ImageBlock, JumpButton, CharButton, BlockStyle,\ - Page, Bold, Space, Plot, TextStyle, Image, BlockSpace,\ + ImageBlock, JumpButton, CharButton, \ + Bold, Space, Plot, Image, BlockSpace,\ RuledLine from libprs500.lrf.pylrs.pylrs import Span as _Span from libprs500.lrf import ConversionError, option_parser, Book from libprs500 import extract -def ImagePage(): - return Page(evensidemargin=0, oddsidemargin=0, topmargin=0, \ - textwidth=600, textheight=800) - class Span(_Span): replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ] patterns = [ re.compile('&'+i+';') for i in replaced_entities ] @@ -212,12 +208,9 @@ class Span(_Span): class HTMLConverter(object): - selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") + SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) - justification_styles = dict(head=TextStyle(align='head'), foot=TextStyle(align='foot'), - center=TextStyle(align='center')) - blockquote_style = BlockStyle(sidemargin=60, topskip=20, footskip=20) - unindented_style = TextStyle(parindent=0) + # Fix elements markup_massage = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), lambda match: match.group(1)+">")] @@ -278,6 +271,12 @@ class HTMLConverter(object): self.page_height = height #: The height of the page self.max_link_levels = max_link_levels #: Number of link levels to process recursively self.link_level = link_level #: Current link level + self.justification_styles = dict(head=book.create_text_style(align='head'), + foot=book.create_text_style(align='foot'), + center=book.create_text_style(align='center')) + self.blockquote_style = book.create_block_style(sidemargin=60, + topskip=20, footskip=20) + self.unindented_style = book.create_text_style(parindent=0) self.images = {} #: Images referenced in the HTML document self.targets = {} #: elements self.links = [] #: elements @@ -320,7 +319,7 @@ class HTMLConverter(object): """ sdict = dict() style = re.sub('/\*.*?\*/', '', style) # Remove /*...*/ comments - for sel in re.findall(HTMLConverter.selector_pat, style): + for sel in re.findall(HTMLConverter.SELECTOR_PAT, style): for key in sel[0].split(','): key = key.strip().lower() val = self.parse_style_properties(sel[1]) @@ -376,8 +375,8 @@ class HTMLConverter(object): def parse_file(self): previous = self.book.last_page() - self.current_page = Page() - self.current_block = TextBlock() + self.current_page = self.book.create_page() + self.current_block = self.book.create_text_block() self.current_para = Paragraph() if self.cover: self.add_image_page(self.cover) @@ -445,7 +444,7 @@ class HTMLConverter(object): break if not ans: - ntb = TextBlock() + ntb = self.book.create_text_block() ntb.Paragraph(' ') page.append(ntb) ans = ntb @@ -528,16 +527,18 @@ class HTMLConverter(object): self.current_para.append_to(self.current_block) self.current_para = Paragraph() self.current_block.append_to(self.current_page) - self.current_block = TextBlock() + self.current_block = self.book.create_text_block() if self.current_page.has_text(): self.book.append(self.current_page) - self.current_page = Page() + self.current_page = self.book.create_page() def add_image_page(self, path): if os.access(path, os.R_OK): self.end_page() - page = ImagePage() + page = self.book.create_page(evensidemargin=0, oddsidemargin=0, + topmargin=0, textwidth=self.page_width, + textheight=self.page_height) if not self.images.has_key(path): self.images[path] = ImageStream(path) page.append(ImageBlock(self.images[path])) @@ -578,7 +579,8 @@ class HTMLConverter(object): if align != self.current_block.textStyle.attrs['align']: self.current_para.append_to(self.current_block) self.current_block.append_to(self.current_page) - self.current_block = TextBlock(textStyle=HTMLConverter.justification_styles[align]) + self.current_block = self.book.create_text_block( + textStyle=self.justification_styles[align]) self.current_para = Paragraph() try: self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\ @@ -613,6 +615,12 @@ class HTMLConverter(object): if self.current_block.contents and \ not isinstance(self.current_block.contents[-1], CR): self.current_block.append(CR()) + + def end_current_block(self): + self.current_para.append_to(self.current_block) + self.current_block.append_to(self.current_page) + self.current_para = Paragraph() + self.current_block = self.book.create_text_block() def parse_tag(self, tag, parent_css): try: @@ -663,7 +671,7 @@ class HTMLConverter(object): break if target and not isinstance(target, (TextBlock, ImageBlock)): if isinstance(target, RuledLine): - target = TextBlock() + target = self.book.create_text_block() target.Paragraph(' ') self.current_page.append(target) else: @@ -719,7 +727,7 @@ class HTMLConverter(object): self.current_block.append(self.current_para) self.current_page.append(self.current_block) self.current_para = Paragraph() - self.current_block = TextBlock() + self.current_block = self.book.create_text_block() im = ImageBlock(self.images[path], x1=width, y1=height, xsize=width, ysize=height) self.current_page.append(im) @@ -747,7 +755,8 @@ class HTMLConverter(object): elif tagname == 'pre': self.end_current_para() self.current_block.append_to(self.current_page) - self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style) + self.current_block = self.book.create_text_block( + textStyle=self.unindented_style) src = ''.join([str(i) for i in tag.contents]) lines = src.split('\n') for line in lines: @@ -756,19 +765,15 @@ class HTMLConverter(object): self.current_para.CR() except ConversionError: pass - self.end_current_para() - self.current_block.append_to(self.current_page) - self.current_block = TextBlock() + self.end_current_block() elif tagname in ['ul', 'ol']: self.in_ol = 1 if tagname == 'ol' else 0 - self.end_current_para() - self.current_block.append_to(self.current_page) - self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style) + self.end_current_block() + self.current_block = self.book.create_text_block( + textStyle=self.unindented_style) self.process_children(tag, tag_css) self.in_ol = 0 - self.end_current_para() - self.current_block.append_to(self.current_page) - self.current_block = TextBlock() + self.end_current_block() elif tagname == 'li': prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' ' if self.current_para.has_text(): @@ -791,13 +796,11 @@ class HTMLConverter(object): self.current_para.append_to(self.current_block) self.current_block.append_to(self.current_page) self.current_para = Paragraph() - self.current_block = TextBlock(blockStyle=HTMLConverter.blockquote_style, - textStyle=HTMLConverter.unindented_style) + self.current_block = self.book.create_text_block( + blockStyle=self.blockquote_style, + textStyle=self.unindented_style) self.process_children(tag, tag_css) - self.current_para.append_to(self.current_block) - self.current_block.append_to(self.current_page) - self.current_para = Paragraph() - self.current_block = TextBlock() + self.end_current_block() elif tagname in ['p', 'div']: self.end_current_para() self.lstrip_toggle = True @@ -813,12 +816,9 @@ class HTMLConverter(object): self.current_para.append(CR()) self.process_children(tag, tag_css) elif tagname == 'hr': - if self.current_para.contents: - self.current_block.append(self.current_para) - self.current_para = Paragraph() + self.end_current_para() self.current_block.append(CR()) - self.current_page.append(self.current_block) - self.current_block = TextBlock() + self.end_current_block() self.current_page.RuledLine(linelength=self.page_width) else: self.process_children(tag, tag_css) diff --git a/src/libprs500/lrf/pylrs/__init__.py b/src/libprs500/lrf/pylrs/__init__.py index 8b13789179..f4d0439228 100644 --- a/src/libprs500/lrf/pylrs/__init__.py +++ b/src/libprs500/lrf/pylrs/__init__.py @@ -1 +1,5 @@ - +""" +This package contains code to generate ebooks in the SONY LRS/F format. It was +originally developed by Mike Higgins and has been extended and modified by Kovid +Goyal. +""" diff --git a/src/libprs500/lrf/pylrs/pylrs.py b/src/libprs500/lrf/pylrs/pylrs.py index 60af5c460b..ffb64a7b1f 100644 --- a/src/libprs500/lrf/pylrs/pylrs.py +++ b/src/libprs500/lrf/pylrs/pylrs.py @@ -1,33 +1,6 @@ -""" - pylrs.py -- a package to create LRS (and LRF) e-Books for the Sony PRS-500. -""" - -import os -import re -import codecs -from datetime import date -try: - from elementtree.ElementTree import (Element, SubElement) -except ImportError: - from xml.etree.ElementTree import (Element, SubElement) - -from elements import ElementWriter -from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, - STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING, - BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream, - STREAM_FORCE_COMPRESSED) - -PYLRS_VERSION = "1.0" - -DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set -DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs - -# -# Acknowledgement: -# This software would not have been possible without the pioneering -# efforts of the author of lrf2lrs.py, Igor Skochinsky. -# # Copyright (c) 2007 Mike Higgins (Falstaff) +# Modifications from the original: +# Copyright (C) 2007 Kovid Goyal # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation @@ -45,22 +18,8 @@ DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - -# Check www.falstaffshouse.com for possible updates to this code. -# Email contact: falstaff (at) falstaffshouse.com - -# -# Change History: -# -# V1.0 06 Feb 2007 -# Initial Release. -# - # # Current limitations and bugs: -# Bug: using two instances of Book() at the same time can cause -# incorrect output if any default styles are used. Workaround: -# supply all styles explicitly, or use only one Book class at a time. # Bug: Does not check if most setting values are valid unless lrf is created. # # Unsupported objects: MiniPage, SimpleTextBlock, Canvas, Window, @@ -76,9 +35,23 @@ DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs # Other unsupported tags: PageDiv, SoundStop, Wait, pos, # Plot, Image (outside of ImageBlock), # EmpLine, EmpDots -# -# Tested on Python 2.4 and 2.5, Windows XP and PRS-500. -# + +import os, re, codecs +from datetime import date +try: + from elementtree.ElementTree import (Element, SubElement) +except ImportError: + from xml.etree.ElementTree import (Element, SubElement) + +from elements import ElementWriter +from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, + STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING, + BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream, + STREAM_FORCE_COMPRESSED) + +DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set +DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs + class LrsError(Exception): pass @@ -263,6 +236,7 @@ class LrsAttributes(object): if type(value) is int: value = str(value) self.attrs[name] = value + class LrsContainer(object): @@ -334,12 +308,12 @@ class LrsContainer(object): class LrsObject(object): """ A mixin class for elements that need an object id. """ - NextObjId = 0 + nextObjId = 0 @classmethod def getNextObjId(selfClass): - selfClass.NextObjId += 1 - return selfClass.NextObjId + selfClass.nextObjId += 1 + return selfClass.nextObjId def __init__(self, assignId=False): if assignId: @@ -412,6 +386,7 @@ class Book(Delegator): There are several other settings -- see the BookInfo class for more. """ + def __init__(self, textstyledefault=None, blockstyledefault=None, pagestyledefault=None, optimizeTags=False, @@ -427,23 +402,23 @@ class Book(Delegator): self.optimizeTags = optimizeTags self.optimizeCompression = optimizeCompression - TextStyle.resetDefaults() - BlockStyle.resetDefaults() - PageStyle.resetDefaults() + pageStyle = PageStyle(**PageStyle.baseDefaults.copy()) + blockStyle = BlockStyle(**BlockStyle.baseDefaults.copy()) + textStyle = TextStyle(**TextStyle.baseDefaults.copy()) if textstyledefault is not None: - TextStyle.setDefaults(textstyledefault) + textStyle.update(textstyledefault) if blockstyledefault is not None: - BlockStyle.setDefaults(blockstyledefault) + blockStyle.update(blockstyledefault) if pagestyledefault is not None: - PageStyle.setDefaults(pagestyledefault) + pageStyle.update(pagestyledefault) - Page.defaultPageStyle = PageStyle() - TextBlock.defaultTextStyle = TextStyle() - TextBlock.defaultBlockStyle = BlockStyle() - LrsObject.nextObjId = 1 + self.defaultPageStyle = pageStyle + self.defaultTextStyle = textStyle + self.defaultBlockStyle = blockStyle + LrsObject.nextObjId += 1 Delegator.__init__(self, [BookInformation(), Main(), Template(), Style(), Solos(), Objects()]) @@ -455,7 +430,46 @@ class Book(Delegator): self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING) self.applySettings(settings, testValid=True) + + def create_text_style(self, **settings): + ans = TextStyle(**self.defaultTextStyle.attrs.copy()) + ans.update(settings) + return ans + + def create_block_style(self, **settings): + ans = BlockStyle(**self.defaultBlockStyle.attrs.copy()) + ans.update(settings) + return ans + + def create_page_style(self, **settings): + ans = PageStyle(**self.defaultPageStyle.attrs.copy()) + ans.update(settings) + return ans + def create_page(self, pageStyle=None, **settings): + ''' + Return a new L{Page}. The page has not been appended to this book. + @param pageStyle: If None the default pagestyle is used. + @type pageStyle: L{PageStyle} + ''' + if not pageStyle: + pageStyle = self.defaultPageStyle + return Page(pageStyle=pageStyle, **settings) + + def create_text_block(self, textStyle=None, blockStyle=None, **settings): + ''' + Return a new L{TextBlock}. The block has not been appended to this + book. + @param textStyle: If None the default text style is used + @type textStyle: L{TextStyle} + @param blockStyle: If None the default block style is used. + @type blockStyle: L{BlockStyle} + ''' + if not textStyle: + textStyle = self.defaultTextStyle + if not blockStyle: + blockStyle = self.defaultBlockStyle + return TextBlock(textStyle=textStyle, blockStyle=blockStyle, **settings) def pages(self): '''Return list of Page objects in this book ''' @@ -538,11 +552,6 @@ class Book(Delegator): spaceBeforeClose=False) writer.write(f) - - - - - class BookInformation(Delegator): @@ -914,8 +923,6 @@ class Style(LrsContainer, Delegator): "appendPageStyle", "appendTextStyle", "appendBlockStyle"] + \ self.delegatedMethods - - def getSettings(self): return [(self.bookStyle, x) for x in self.bookStyle.getSettings()] @@ -1066,19 +1073,13 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer): #self.parent = None - @classmethod - def resetDefaults(selfClass): - selfClass.defaults = selfClass.baseDefaults.copy() - - - @classmethod - def setDefaults(selfClass, settings): + def update(self, settings): for name, value in settings.items(): - if name not in selfClass.validSettings: - raise LrsError, "default setting %s not recognized" % name - selfClass.defaults[name] = value - - + if name not in self.__class__.validSettings: + raise LrsError, "%s not a valid setting for %s" % \ + (name, self.__class__.__name__) + self.attrs[name] = value + def getLabel(self): return str(self.objId) @@ -1119,7 +1120,7 @@ class TextStyle(LrsStyle): fontorientation="0", fontweight="400", fontfacename="Dutch801 Rm BT Roman", textcolor="0x00000000", wordspace="25", letterspace="0", - baselineskip="120", linespace="12", parindent="80", parskip="0", + baselineskip="120", linespace="10", parindent="0", parskip="0", textbgcolor="0xFF000000") alsoAllow = ["empdotscode", "empdotsfontname", "refempdotsfont", @@ -1238,16 +1239,11 @@ class Page(LrsObject, LrsContainer): """ defaultPageStyle = PageStyle() - def __init__(self, *args, **settings): + def __init__(self, pageStyle=defaultPageStyle, **settings): LrsObject.__init__(self) LrsContainer.__init__(self, [TextBlock, BlockSpace, RuledLine, ImageBlock]) - if len(args) > 0: - pageStyle = args[0] - else: - pageStyle = Page.defaultPageStyle - self.pageStyle = pageStyle for settingName in settings.keys(): @@ -1381,7 +1377,7 @@ class TextBlock(LrsObject, LrsContainer): self.textStyle = textStyle self.blockStyle = blockStyle - # create a textStyle with our current text settings (for Span to find) + # create a textStyle with our current text settings (for Span to find) self.currentTextStyle = textStyle.copy() self.currentTextStyle.attrs.update(self.textSettings) diff --git a/src/libprs500/lrf/txt/convert_from.py b/src/libprs500/lrf/txt/convert_from.py index 181a4b5126..0b492311bb 100644 --- a/src/libprs500/lrf/txt/convert_from.py +++ b/src/libprs500/lrf/txt/convert_from.py @@ -68,7 +68,10 @@ def convert_txt(path, options): sourceencoding=options.encoding, freetext=options.freetext, \ category=options.category) buffer = '' - block = book.Page().TextBlock() + pg = book.create_page() + block = book.create_text_block() + pg.append(block) + book.append(pg) for line in fileinput.input(path): line = line.strip() if line: