Refactor pylrs to stop depending on static variables for default style information.

2025-07-31 14:33:54 -04:00 · 2007-05-03 23:32:14 +00:00 · 2007-05-03 23:32:14 +00:00 · 8781e84beb
commit 8781e84beb
parent 9d407875bd
5 changed files with 135 additions and 131 deletions
--- a/src/libprs500/lrf/init.py
+++ b/src/libprs500/lrf/init.py
@ -56,6 +56,7 @@ def Book(font_delta=0, header=None, **settings):
        ps['header'] = header
        ps['header'] = hdr
        ps['topmargin'] = 10
-    return _Book(textstyledefault=dict(fontsize=100+font_delta*20), \
+    return _Book(textstyledefault=dict(fontsize=100+font_delta*20, 
+                                       parindent=80, linespace=12), \
                 pagestyledefault=ps, \
                  **settings)
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@ -30,17 +30,13 @@ from operator import itemgetter
 from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
                                             NavigableString, Declaration, ProcessingInstruction
 from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
-                                      ImageBlock, JumpButton, CharButton, BlockStyle,\
-                                      Page, Bold, Space, Plot, TextStyle, Image, BlockSpace,\
+                                      ImageBlock, JumpButton, CharButton, \
+                                      Bold, Space, Plot, Image, BlockSpace,\
                                      RuledLine
 from libprs500.lrf.pylrs.pylrs import Span as _Span
 from libprs500.lrf import ConversionError, option_parser, Book
 from libprs500 import extract

-def ImagePage():
-    return Page(evensidemargin=0, oddsidemargin=0, topmargin=0, \
-                       textwidth=600, textheight=800)
-    
 class Span(_Span):
    replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ]
    patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
@ -212,12 +208,9 @@ class Span(_Span):
        
        
 class HTMLConverter(object):
-    selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
+    SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
    IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
-    justification_styles = dict(head=TextStyle(align='head'), foot=TextStyle(align='foot'), 
-                                center=TextStyle(align='center'))
-    blockquote_style = BlockStyle(sidemargin=60, topskip=20, footskip=20)
-    unindented_style = TextStyle(parindent=0)
+    
    # Fix <a /> elements 
    markup_massage   = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), 
                         lambda match: match.group(1)+"></a>")]
@ -278,6 +271,12 @@ class HTMLConverter(object):
        self.page_height = height #: The height of the page
        self.max_link_levels = max_link_levels #: Number of link levels to process recursively
        self.link_level  = link_level  #: Current link level
+        self.justification_styles = dict(head=book.create_text_style(align='head'), 
+                                         foot=book.create_text_style(align='foot'), 
+                                         center=book.create_text_style(align='center'))
+        self.blockquote_style = book.create_block_style(sidemargin=60, 
+                                                        topskip=20, footskip=20)
+        self.unindented_style = book.create_text_style(parindent=0)
        self.images  = {}         #: Images referenced in the HTML document
        self.targets = {}         #: <a name=...> elements
        self.links   = []         #: <a href=...> elements        
@ -320,7 +319,7 @@ class HTMLConverter(object):
        """
        sdict = dict()
        style = re.sub('/\*.*?\*/', '', style) # Remove /*...*/ comments
-        for sel in re.findall(HTMLConverter.selector_pat, style):
+        for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
            for key in sel[0].split(','):
                key = key.strip().lower()
                val = self.parse_style_properties(sel[1])
@ -376,8 +375,8 @@ class HTMLConverter(object):
        
    def parse_file(self):
        previous = self.book.last_page()
-        self.current_page = Page()
-        self.current_block = TextBlock()
+        self.current_page = self.book.create_page()
+        self.current_block = self.book.create_text_block()
        self.current_para = Paragraph()
        if self.cover:
            self.add_image_page(self.cover)
@ -445,7 +444,7 @@ class HTMLConverter(object):
                        break
            
            if not ans: 
-                ntb = TextBlock()
+                ntb = self.book.create_text_block()
                ntb.Paragraph(' ')
                page.append(ntb)
                ans = ntb
@ -528,16 +527,18 @@ class HTMLConverter(object):
        self.current_para.append_to(self.current_block)
        self.current_para = Paragraph()
        self.current_block.append_to(self.current_page)
-        self.current_block = TextBlock()
+        self.current_block = self.book.create_text_block()
        if self.current_page.has_text(): 
            self.book.append(self.current_page)
-            self.current_page = Page()
+            self.current_page = self.book.create_page()
        
        
    def add_image_page(self, path):
        if os.access(path, os.R_OK):
            self.end_page()
-            page = ImagePage()
+            page = self.book.create_page(evensidemargin=0, oddsidemargin=0, 
+                                         topmargin=0, textwidth=self.page_width,
+                                         textheight=self.page_height)
            if not self.images.has_key(path):
                self.images[path] = ImageStream(path)
            page.append(ImageBlock(self.images[path]))
@ -578,7 +579,8 @@ class HTMLConverter(object):
            if align != self.current_block.textStyle.attrs['align']:
                self.current_para.append_to(self.current_block)
                self.current_block.append_to(self.current_page)
-                self.current_block = TextBlock(textStyle=HTMLConverter.justification_styles[align])
+                self.current_block = self.book.create_text_block(
+                                    textStyle=self.justification_styles[align])
                self.current_para = Paragraph()
            try:
                self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\
@ -613,6 +615,12 @@ class HTMLConverter(object):
        if self.current_block.contents and \
            not isinstance(self.current_block.contents[-1], CR):
            self.current_block.append(CR())
+            
+    def end_current_block(self):
+        self.current_para.append_to(self.current_block)
+        self.current_block.append_to(self.current_page)
+        self.current_para = Paragraph()
+        self.current_block = self.book.create_text_block()
    
    def parse_tag(self, tag, parent_css):
        try:
@ -663,7 +671,7 @@ class HTMLConverter(object):
                            break
                    if target and not isinstance(target, (TextBlock, ImageBlock)):
                        if isinstance(target, RuledLine):
-                            target = TextBlock()
+                            target = self.book.create_text_block()
                            target.Paragraph(' ')
                            self.current_page.append(target)
                        else:
@ -719,7 +727,7 @@ class HTMLConverter(object):
                    self.current_block.append(self.current_para)
                    self.current_page.append(self.current_block)
                    self.current_para = Paragraph()
-                    self.current_block = TextBlock()
+                    self.current_block = self.book.create_text_block()
                    im = ImageBlock(self.images[path], x1=width, y1=height, 
                                    xsize=width, ysize=height)
                    self.current_page.append(im)                        
@ -747,7 +755,8 @@ class HTMLConverter(object):
        elif tagname == 'pre':
            self.end_current_para()
            self.current_block.append_to(self.current_page)
-            self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style)
+            self.current_block = self.book.create_text_block(
+                                    textStyle=self.unindented_style)
            src = ''.join([str(i) for i in tag.contents])
            lines = src.split('\n')
            for line in lines:
@ -756,19 +765,15 @@ class HTMLConverter(object):
                    self.current_para.CR()
                except ConversionError:
                    pass
-            self.end_current_para()
-            self.current_block.append_to(self.current_page)
-            self.current_block = TextBlock()
+            self.end_current_block()
        elif tagname in ['ul', 'ol']:
            self.in_ol = 1 if tagname == 'ol' else 0
-            self.end_current_para()
-            self.current_block.append_to(self.current_page)
-            self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style)
+            self.end_current_block()
+            self.current_block = self.book.create_text_block(
+                                        textStyle=self.unindented_style)
            self.process_children(tag, tag_css)
            self.in_ol = 0
-            self.end_current_para()
-            self.current_block.append_to(self.current_page)
-            self.current_block = TextBlock()
+            self.end_current_block()
        elif tagname == 'li':
            prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
            if self.current_para.has_text():
@ -791,13 +796,11 @@ class HTMLConverter(object):
            self.current_para.append_to(self.current_block)
            self.current_block.append_to(self.current_page)
            self.current_para = Paragraph()
-            self.current_block = TextBlock(blockStyle=HTMLConverter.blockquote_style,
-                                           textStyle=HTMLConverter.unindented_style)
+            self.current_block = self.book.create_text_block(
+                                    blockStyle=self.blockquote_style,
+                                    textStyle=self.unindented_style)
            self.process_children(tag, tag_css)
-            self.current_para.append_to(self.current_block)
-            self.current_block.append_to(self.current_page)
-            self.current_para = Paragraph()
-            self.current_block = TextBlock()
+            self.end_current_block()
        elif tagname in ['p', 'div']:
            self.end_current_para()
            self.lstrip_toggle = True
@ -813,12 +816,9 @@ class HTMLConverter(object):
            self.current_para.append(CR())
            self.process_children(tag, tag_css)
        elif tagname == 'hr':
-            if self.current_para.contents:
-                self.current_block.append(self.current_para)
-                self.current_para = Paragraph()
+            self.end_current_para()            
            self.current_block.append(CR())
-            self.current_page.append(self.current_block)
-            self.current_block = TextBlock()            
+            self.end_current_block()
            self.current_page.RuledLine(linelength=self.page_width)
        else:            
            self.process_children(tag, tag_css)
--- a/src/libprs500/lrf/pylrs/init.py
+++ b/src/libprs500/lrf/pylrs/init.py
@ -1 +1,5 @@
-
+"""
+This package contains code to generate ebooks in the SONY LRS/F format. It was
+originally developed by Mike Higgins and has been extended and modified by Kovid
+Goyal.
+"""
--- a/src/libprs500/lrf/pylrs/pylrs.py
+++ b/src/libprs500/lrf/pylrs/pylrs.py
@ -1,33 +1,6 @@
-"""
-    pylrs.py -- a package to create LRS (and LRF) e-Books for the Sony PRS-500.
-"""
-
-import os
-import re
-import codecs
-from datetime import date
-try:
-    from elementtree.ElementTree import (Element, SubElement)
-except ImportError:
-    from xml.etree.ElementTree import (Element, SubElement)
-
-from elements import ElementWriter
-from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
-        STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
-        BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
-        STREAM_FORCE_COMPRESSED)
-
-PYLRS_VERSION = "1.0"
-
-DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
-DEFAULT_GENREADING      = "f"           # default is yes to lrf, no to lrs
-
-#
-# Acknowledgement:
-#   This software would not have been possible without the pioneering
-#   efforts of the author of lrf2lrs.py, Igor Skochinsky.
-#
 # Copyright (c) 2007 Mike Higgins (Falstaff)
+# Modifications from the original: 
+#    Copyright (C) 2007 Kovid Goyal <kovid@kovidgoyal.net>
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
 # to deal in the Software without restriction, including without limitation
@ -45,22 +18,8 @@ DEFAULT_GENREADING      = "f"           # default is yes to lrf, no to lrs
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
-
-# Check www.falstaffshouse.com for possible updates to this code.
-# Email contact: falstaff (at) falstaffshouse.com
-
-#
-# Change History:
-#
-# V1.0 06 Feb 2007
-# Initial Release.
-#
-
 #
 # Current limitations and bugs:
-#   Bug: using two instances of Book() at the same time can cause
-#        incorrect output if any default styles are used.  Workaround:
-#        supply all styles explicitly, or use only one Book class at a time.
 #   Bug: Does not check if most setting values are valid unless lrf is created.
 #
 #   Unsupported objects: MiniPage, SimpleTextBlock, Canvas, Window,
@ -76,9 +35,23 @@ DEFAULT_GENREADING      = "f"           # default is yes to lrf, no to lrs
 #   Other unsupported tags: PageDiv, SoundStop, Wait, pos,
 #                           Plot, Image (outside of ImageBlock), 
 #                           EmpLine, EmpDots
-#
-#   Tested on Python 2.4 and 2.5, Windows XP and PRS-500.
-#
+
+import os, re, codecs
+from datetime import date
+try:
+    from elementtree.ElementTree import (Element, SubElement)
+except ImportError:
+    from xml.etree.ElementTree import (Element, SubElement)
+
+from elements import ElementWriter
+from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
+        STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
+        BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
+        STREAM_FORCE_COMPRESSED)
+
+DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
+DEFAULT_GENREADING      = "f"           # default is yes to lrf, no to lrs
+

 class LrsError(Exception):
    pass
@ -263,6 +236,7 @@ class LrsAttributes(object):
            if type(value) is int:
                value = str(value)
            self.attrs[name] = value
+        


 class LrsContainer(object):
@ -334,12 +308,12 @@ class LrsContainer(object):

 class LrsObject(object):
    """ A mixin class for elements that need an object id. """
-    NextObjId = 0
+    nextObjId = 0
 
    @classmethod
    def getNextObjId(selfClass):
-        selfClass.NextObjId += 1
-        return selfClass.NextObjId
+        selfClass.nextObjId += 1
+        return selfClass.nextObjId

    def __init__(self, assignId=False):
        if assignId:
@ -412,6 +386,7 @@ class Book(Delegator):
        
        There are several other settings -- see the BookInfo class for more.       
    """
+    
    def __init__(self, textstyledefault=None, blockstyledefault=None,
                       pagestyledefault=None,
                       optimizeTags=False,
@ -427,23 +402,23 @@ class Book(Delegator):
        self.optimizeTags = optimizeTags
        self.optimizeCompression = optimizeCompression

-        TextStyle.resetDefaults()
-        BlockStyle.resetDefaults()
-        PageStyle.resetDefaults()
+        pageStyle  = PageStyle(**PageStyle.baseDefaults.copy())
+        blockStyle = BlockStyle(**BlockStyle.baseDefaults.copy())
+        textStyle  = TextStyle(**TextStyle.baseDefaults.copy())

        if textstyledefault is not None:
-            TextStyle.setDefaults(textstyledefault)
+            textStyle.update(textstyledefault)

        if blockstyledefault is not None:
-            BlockStyle.setDefaults(blockstyledefault)
+            blockStyle.update(blockstyledefault)

        if pagestyledefault is not None:
-            PageStyle.setDefaults(pagestyledefault)
+            pageStyle.update(pagestyledefault)

-        Page.defaultPageStyle = PageStyle()
-        TextBlock.defaultTextStyle = TextStyle()
-        TextBlock.defaultBlockStyle = BlockStyle()
-        LrsObject.nextObjId = 1
+        self.defaultPageStyle = pageStyle
+        self.defaultTextStyle = textStyle
+        self.defaultBlockStyle = blockStyle
+        LrsObject.nextObjId += 1

        Delegator.__init__(self, [BookInformation(), Main(),
            Template(), Style(), Solos(), Objects()])
@ -455,7 +430,46 @@ class Book(Delegator):
        self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
        
        self.applySettings(settings, testValid=True)
+
+    def create_text_style(self, **settings):
+        ans = TextStyle(**self.defaultTextStyle.attrs.copy())
+        ans.update(settings)
+        return ans
+        
+    def create_block_style(self, **settings):
+        ans = BlockStyle(**self.defaultBlockStyle.attrs.copy())
+        ans.update(settings)
+        return ans
+        
+    def create_page_style(self, **settings):
+        ans = PageStyle(**self.defaultPageStyle.attrs.copy())
+        ans.update(settings)
+        return ans
    
+    def create_page(self, pageStyle=None, **settings):
+        '''
+        Return a new L{Page}. The page has not been appended to this book.
+        @param pageStyle: If None the default pagestyle is used.
+        @type pageStyle: L{PageStyle}
+        '''
+        if not pageStyle:
+            pageStyle = self.defaultPageStyle
+        return Page(pageStyle=pageStyle, **settings)
+    
+    def create_text_block(self, textStyle=None, blockStyle=None, **settings):
+        '''
+        Return a new L{TextBlock}. The block has not been appended to this
+        book.
+        @param textStyle: If None the default text style is used
+        @type textStyle: L{TextStyle}
+        @param blockStyle: If None the default block style is used.
+        @type blockStyle: L{BlockStyle}
+        '''
+        if not textStyle:
+            textStyle = self.defaultTextStyle
+        if not blockStyle:
+            blockStyle = self.defaultBlockStyle
+        return TextBlock(textStyle=textStyle, blockStyle=blockStyle, **settings)

    def pages(self):
        '''Return list of Page objects in this book '''
@ -538,11 +552,6 @@ class Book(Delegator):
                               spaceBeforeClose=False)
        writer.write(f)
        
-       
-        
-
-
-


 class BookInformation(Delegator):
@ -914,8 +923,6 @@ class Style(LrsContainer, Delegator):
                "appendPageStyle", "appendTextStyle", "appendBlockStyle"] + \
                        self.delegatedMethods

-    
-
    def getSettings(self):
        return [(self.bookStyle, x) for x in self.bookStyle.getSettings()]

@ -1066,19 +1073,13 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
        #self.parent = None
        

-    @classmethod
-    def resetDefaults(selfClass):
-        selfClass.defaults = selfClass.baseDefaults.copy()
-
-
-    @classmethod
-    def setDefaults(selfClass, settings):
+    def update(self, settings):
        for name, value in settings.items():
-            if name not in selfClass.validSettings:
-                raise LrsError, "default setting %s not recognized" % name
-            selfClass.defaults[name] = value
-
-
+            if name not in self.__class__.validSettings:
+                raise LrsError, "%s not a valid setting for %s" % \
+                                                (name, self.__class__.__name__)
+            self.attrs[name] = value
+            
    def getLabel(self):
        return str(self.objId)
 
@ -1119,7 +1120,7 @@ class TextStyle(LrsStyle):
            fontorientation="0", fontweight="400",
            fontfacename="Dutch801 Rm BT Roman",
            textcolor="0x00000000", wordspace="25", letterspace="0",
-            baselineskip="120", linespace="12", parindent="80", parskip="0",
+            baselineskip="120", linespace="10", parindent="0", parskip="0",
            textbgcolor="0xFF000000")

    alsoAllow = ["empdotscode", "empdotsfontname", "refempdotsfont",
@ -1238,16 +1239,11 @@ class Page(LrsObject, LrsContainer):
    """
    defaultPageStyle = PageStyle()

-    def __init__(self, *args, **settings):
+    def __init__(self, pageStyle=defaultPageStyle, **settings):
        LrsObject.__init__(self)
        LrsContainer.__init__(self, [TextBlock, BlockSpace, RuledLine,
            ImageBlock])

-        if len(args) > 0:
-            pageStyle = args[0]
-        else:
-            pageStyle = Page.defaultPageStyle
-
        self.pageStyle = pageStyle

        for settingName in settings.keys():
@ -1381,7 +1377,7 @@ class TextBlock(LrsObject, LrsContainer):
        self.textStyle = textStyle
        self.blockStyle = blockStyle

-        # create a textStyle with our current text settings (for Span to find)
+        # create a textStyle with our current text settings (for Span to find)        
        self.currentTextStyle = textStyle.copy()
        self.currentTextStyle.attrs.update(self.textSettings)

--- a/src/libprs500/lrf/txt/convert_from.py
+++ b/src/libprs500/lrf/txt/convert_from.py
@ -68,7 +68,10 @@ def convert_txt(path, options):
                sourceencoding=options.encoding, freetext=options.freetext, \
                category=options.category)
    buffer = ''
-    block = book.Page().TextBlock()
+    pg = book.create_page()
+    block = book.create_text_block()
+    pg.append(block)
+    book.append(pg)
    for line in fileinput.input(path):
        line = line.strip()
        if line: