Refactor pylrs to stop depending on static variables for default style information.

This commit is contained in:
Kovid Goyal 2007-05-03 23:32:14 +00:00
parent 9d407875bd
commit 8781e84beb
5 changed files with 135 additions and 131 deletions

View File

@ -56,6 +56,7 @@ def Book(font_delta=0, header=None, **settings):
ps['header'] = header
ps['header'] = hdr
ps['topmargin'] = 10
return _Book(textstyledefault=dict(fontsize=100+font_delta*20), \
return _Book(textstyledefault=dict(fontsize=100+font_delta*20,
parindent=80, linespace=12), \
pagestyledefault=ps, \
**settings)

View File

@ -30,17 +30,13 @@ from operator import itemgetter
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
NavigableString, Declaration, ProcessingInstruction
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
ImageBlock, JumpButton, CharButton, BlockStyle,\
Page, Bold, Space, Plot, TextStyle, Image, BlockSpace,\
ImageBlock, JumpButton, CharButton, \
Bold, Space, Plot, Image, BlockSpace,\
RuledLine
from libprs500.lrf.pylrs.pylrs import Span as _Span
from libprs500.lrf import ConversionError, option_parser, Book
from libprs500 import extract
def ImagePage():
return Page(evensidemargin=0, oddsidemargin=0, topmargin=0, \
textwidth=600, textheight=800)
class Span(_Span):
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ]
patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
@ -212,12 +208,9 @@ class Span(_Span):
class HTMLConverter(object):
selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
justification_styles = dict(head=TextStyle(align='head'), foot=TextStyle(align='foot'),
center=TextStyle(align='center'))
blockquote_style = BlockStyle(sidemargin=60, topskip=20, footskip=20)
unindented_style = TextStyle(parindent=0)
# Fix <a /> elements
markup_massage = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"),
lambda match: match.group(1)+"></a>")]
@ -278,6 +271,12 @@ class HTMLConverter(object):
self.page_height = height #: The height of the page
self.max_link_levels = max_link_levels #: Number of link levels to process recursively
self.link_level = link_level #: Current link level
self.justification_styles = dict(head=book.create_text_style(align='head'),
foot=book.create_text_style(align='foot'),
center=book.create_text_style(align='center'))
self.blockquote_style = book.create_block_style(sidemargin=60,
topskip=20, footskip=20)
self.unindented_style = book.create_text_style(parindent=0)
self.images = {} #: Images referenced in the HTML document
self.targets = {} #: <a name=...> elements
self.links = [] #: <a href=...> elements
@ -320,7 +319,7 @@ class HTMLConverter(object):
"""
sdict = dict()
style = re.sub('/\*.*?\*/', '', style) # Remove /*...*/ comments
for sel in re.findall(HTMLConverter.selector_pat, style):
for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
for key in sel[0].split(','):
key = key.strip().lower()
val = self.parse_style_properties(sel[1])
@ -376,8 +375,8 @@ class HTMLConverter(object):
def parse_file(self):
previous = self.book.last_page()
self.current_page = Page()
self.current_block = TextBlock()
self.current_page = self.book.create_page()
self.current_block = self.book.create_text_block()
self.current_para = Paragraph()
if self.cover:
self.add_image_page(self.cover)
@ -445,7 +444,7 @@ class HTMLConverter(object):
break
if not ans:
ntb = TextBlock()
ntb = self.book.create_text_block()
ntb.Paragraph(' ')
page.append(ntb)
ans = ntb
@ -528,16 +527,18 @@ class HTMLConverter(object):
self.current_para.append_to(self.current_block)
self.current_para = Paragraph()
self.current_block.append_to(self.current_page)
self.current_block = TextBlock()
self.current_block = self.book.create_text_block()
if self.current_page.has_text():
self.book.append(self.current_page)
self.current_page = Page()
self.current_page = self.book.create_page()
def add_image_page(self, path):
if os.access(path, os.R_OK):
self.end_page()
page = ImagePage()
page = self.book.create_page(evensidemargin=0, oddsidemargin=0,
topmargin=0, textwidth=self.page_width,
textheight=self.page_height)
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
page.append(ImageBlock(self.images[path]))
@ -578,7 +579,8 @@ class HTMLConverter(object):
if align != self.current_block.textStyle.attrs['align']:
self.current_para.append_to(self.current_block)
self.current_block.append_to(self.current_page)
self.current_block = TextBlock(textStyle=HTMLConverter.justification_styles[align])
self.current_block = self.book.create_text_block(
textStyle=self.justification_styles[align])
self.current_para = Paragraph()
try:
self.current_para.append(Span(src, self.sanctify_css(css), self.memory,\
@ -613,6 +615,12 @@ class HTMLConverter(object):
if self.current_block.contents and \
not isinstance(self.current_block.contents[-1], CR):
self.current_block.append(CR())
def end_current_block(self):
self.current_para.append_to(self.current_block)
self.current_block.append_to(self.current_page)
self.current_para = Paragraph()
self.current_block = self.book.create_text_block()
def parse_tag(self, tag, parent_css):
try:
@ -663,7 +671,7 @@ class HTMLConverter(object):
break
if target and not isinstance(target, (TextBlock, ImageBlock)):
if isinstance(target, RuledLine):
target = TextBlock()
target = self.book.create_text_block()
target.Paragraph(' ')
self.current_page.append(target)
else:
@ -719,7 +727,7 @@ class HTMLConverter(object):
self.current_block.append(self.current_para)
self.current_page.append(self.current_block)
self.current_para = Paragraph()
self.current_block = TextBlock()
self.current_block = self.book.create_text_block()
im = ImageBlock(self.images[path], x1=width, y1=height,
xsize=width, ysize=height)
self.current_page.append(im)
@ -747,7 +755,8 @@ class HTMLConverter(object):
elif tagname == 'pre':
self.end_current_para()
self.current_block.append_to(self.current_page)
self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style)
self.current_block = self.book.create_text_block(
textStyle=self.unindented_style)
src = ''.join([str(i) for i in tag.contents])
lines = src.split('\n')
for line in lines:
@ -756,19 +765,15 @@ class HTMLConverter(object):
self.current_para.CR()
except ConversionError:
pass
self.end_current_para()
self.current_block.append_to(self.current_page)
self.current_block = TextBlock()
self.end_current_block()
elif tagname in ['ul', 'ol']:
self.in_ol = 1 if tagname == 'ol' else 0
self.end_current_para()
self.current_block.append_to(self.current_page)
self.current_block = TextBlock(textStyle=HTMLConverter.unindented_style)
self.end_current_block()
self.current_block = self.book.create_text_block(
textStyle=self.unindented_style)
self.process_children(tag, tag_css)
self.in_ol = 0
self.end_current_para()
self.current_block.append_to(self.current_page)
self.current_block = TextBlock()
self.end_current_block()
elif tagname == 'li':
prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
if self.current_para.has_text():
@ -791,13 +796,11 @@ class HTMLConverter(object):
self.current_para.append_to(self.current_block)
self.current_block.append_to(self.current_page)
self.current_para = Paragraph()
self.current_block = TextBlock(blockStyle=HTMLConverter.blockquote_style,
textStyle=HTMLConverter.unindented_style)
self.current_block = self.book.create_text_block(
blockStyle=self.blockquote_style,
textStyle=self.unindented_style)
self.process_children(tag, tag_css)
self.current_para.append_to(self.current_block)
self.current_block.append_to(self.current_page)
self.current_para = Paragraph()
self.current_block = TextBlock()
self.end_current_block()
elif tagname in ['p', 'div']:
self.end_current_para()
self.lstrip_toggle = True
@ -813,12 +816,9 @@ class HTMLConverter(object):
self.current_para.append(CR())
self.process_children(tag, tag_css)
elif tagname == 'hr':
if self.current_para.contents:
self.current_block.append(self.current_para)
self.current_para = Paragraph()
self.end_current_para()
self.current_block.append(CR())
self.current_page.append(self.current_block)
self.current_block = TextBlock()
self.end_current_block()
self.current_page.RuledLine(linelength=self.page_width)
else:
self.process_children(tag, tag_css)

View File

@ -1 +1,5 @@
"""
This package contains code to generate ebooks in the SONY LRS/F format. It was
originally developed by Mike Higgins and has been extended and modified by Kovid
Goyal.
"""

View File

@ -1,33 +1,6 @@
"""
pylrs.py -- a package to create LRS (and LRF) e-Books for the Sony PRS-500.
"""
import os
import re
import codecs
from datetime import date
try:
from elementtree.ElementTree import (Element, SubElement)
except ImportError:
from xml.etree.ElementTree import (Element, SubElement)
from elements import ElementWriter
from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
STREAM_FORCE_COMPRESSED)
PYLRS_VERSION = "1.0"
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs
#
# Acknowledgement:
# This software would not have been possible without the pioneering
# efforts of the author of lrf2lrs.py, Igor Skochinsky.
#
# Copyright (c) 2007 Mike Higgins (Falstaff)
# Modifications from the original:
# Copyright (C) 2007 Kovid Goyal <kovid@kovidgoyal.net>
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
@ -45,22 +18,8 @@ DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# Check www.falstaffshouse.com for possible updates to this code.
# Email contact: falstaff (at) falstaffshouse.com
#
# Change History:
#
# V1.0 06 Feb 2007
# Initial Release.
#
#
# Current limitations and bugs:
# Bug: using two instances of Book() at the same time can cause
# incorrect output if any default styles are used. Workaround:
# supply all styles explicitly, or use only one Book class at a time.
# Bug: Does not check if most setting values are valid unless lrf is created.
#
# Unsupported objects: MiniPage, SimpleTextBlock, Canvas, Window,
@ -76,9 +35,23 @@ DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs
# Other unsupported tags: PageDiv, SoundStop, Wait, pos,
# Plot, Image (outside of ImageBlock),
# EmpLine, EmpDots
#
# Tested on Python 2.4 and 2.5, Windows XP and PRS-500.
#
import os, re, codecs
from datetime import date
try:
from elementtree.ElementTree import (Element, SubElement)
except ImportError:
from xml.etree.ElementTree import (Element, SubElement)
from elements import ElementWriter
from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
STREAM_FORCE_COMPRESSED)
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
DEFAULT_GENREADING = "f" # default is yes to lrf, no to lrs
class LrsError(Exception):
pass
@ -263,6 +236,7 @@ class LrsAttributes(object):
if type(value) is int:
value = str(value)
self.attrs[name] = value
class LrsContainer(object):
@ -334,12 +308,12 @@ class LrsContainer(object):
class LrsObject(object):
""" A mixin class for elements that need an object id. """
NextObjId = 0
nextObjId = 0
@classmethod
def getNextObjId(selfClass):
selfClass.NextObjId += 1
return selfClass.NextObjId
selfClass.nextObjId += 1
return selfClass.nextObjId
def __init__(self, assignId=False):
if assignId:
@ -412,6 +386,7 @@ class Book(Delegator):
There are several other settings -- see the BookInfo class for more.
"""
def __init__(self, textstyledefault=None, blockstyledefault=None,
pagestyledefault=None,
optimizeTags=False,
@ -427,23 +402,23 @@ class Book(Delegator):
self.optimizeTags = optimizeTags
self.optimizeCompression = optimizeCompression
TextStyle.resetDefaults()
BlockStyle.resetDefaults()
PageStyle.resetDefaults()
pageStyle = PageStyle(**PageStyle.baseDefaults.copy())
blockStyle = BlockStyle(**BlockStyle.baseDefaults.copy())
textStyle = TextStyle(**TextStyle.baseDefaults.copy())
if textstyledefault is not None:
TextStyle.setDefaults(textstyledefault)
textStyle.update(textstyledefault)
if blockstyledefault is not None:
BlockStyle.setDefaults(blockstyledefault)
blockStyle.update(blockstyledefault)
if pagestyledefault is not None:
PageStyle.setDefaults(pagestyledefault)
pageStyle.update(pagestyledefault)
Page.defaultPageStyle = PageStyle()
TextBlock.defaultTextStyle = TextStyle()
TextBlock.defaultBlockStyle = BlockStyle()
LrsObject.nextObjId = 1
self.defaultPageStyle = pageStyle
self.defaultTextStyle = textStyle
self.defaultBlockStyle = blockStyle
LrsObject.nextObjId += 1
Delegator.__init__(self, [BookInformation(), Main(),
Template(), Style(), Solos(), Objects()])
@ -455,7 +430,46 @@ class Book(Delegator):
self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
self.applySettings(settings, testValid=True)
def create_text_style(self, **settings):
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
ans.update(settings)
return ans
def create_block_style(self, **settings):
ans = BlockStyle(**self.defaultBlockStyle.attrs.copy())
ans.update(settings)
return ans
def create_page_style(self, **settings):
ans = PageStyle(**self.defaultPageStyle.attrs.copy())
ans.update(settings)
return ans
def create_page(self, pageStyle=None, **settings):
'''
Return a new L{Page}. The page has not been appended to this book.
@param pageStyle: If None the default pagestyle is used.
@type pageStyle: L{PageStyle}
'''
if not pageStyle:
pageStyle = self.defaultPageStyle
return Page(pageStyle=pageStyle, **settings)
def create_text_block(self, textStyle=None, blockStyle=None, **settings):
'''
Return a new L{TextBlock}. The block has not been appended to this
book.
@param textStyle: If None the default text style is used
@type textStyle: L{TextStyle}
@param blockStyle: If None the default block style is used.
@type blockStyle: L{BlockStyle}
'''
if not textStyle:
textStyle = self.defaultTextStyle
if not blockStyle:
blockStyle = self.defaultBlockStyle
return TextBlock(textStyle=textStyle, blockStyle=blockStyle, **settings)
def pages(self):
'''Return list of Page objects in this book '''
@ -538,11 +552,6 @@ class Book(Delegator):
spaceBeforeClose=False)
writer.write(f)
class BookInformation(Delegator):
@ -914,8 +923,6 @@ class Style(LrsContainer, Delegator):
"appendPageStyle", "appendTextStyle", "appendBlockStyle"] + \
self.delegatedMethods
def getSettings(self):
return [(self.bookStyle, x) for x in self.bookStyle.getSettings()]
@ -1066,19 +1073,13 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
#self.parent = None
@classmethod
def resetDefaults(selfClass):
selfClass.defaults = selfClass.baseDefaults.copy()
@classmethod
def setDefaults(selfClass, settings):
def update(self, settings):
for name, value in settings.items():
if name not in selfClass.validSettings:
raise LrsError, "default setting %s not recognized" % name
selfClass.defaults[name] = value
if name not in self.__class__.validSettings:
raise LrsError, "%s not a valid setting for %s" % \
(name, self.__class__.__name__)
self.attrs[name] = value
def getLabel(self):
return str(self.objId)
@ -1119,7 +1120,7 @@ class TextStyle(LrsStyle):
fontorientation="0", fontweight="400",
fontfacename="Dutch801 Rm BT Roman",
textcolor="0x00000000", wordspace="25", letterspace="0",
baselineskip="120", linespace="12", parindent="80", parskip="0",
baselineskip="120", linespace="10", parindent="0", parskip="0",
textbgcolor="0xFF000000")
alsoAllow = ["empdotscode", "empdotsfontname", "refempdotsfont",
@ -1238,16 +1239,11 @@ class Page(LrsObject, LrsContainer):
"""
defaultPageStyle = PageStyle()
def __init__(self, *args, **settings):
def __init__(self, pageStyle=defaultPageStyle, **settings):
LrsObject.__init__(self)
LrsContainer.__init__(self, [TextBlock, BlockSpace, RuledLine,
ImageBlock])
if len(args) > 0:
pageStyle = args[0]
else:
pageStyle = Page.defaultPageStyle
self.pageStyle = pageStyle
for settingName in settings.keys():
@ -1381,7 +1377,7 @@ class TextBlock(LrsObject, LrsContainer):
self.textStyle = textStyle
self.blockStyle = blockStyle
# create a textStyle with our current text settings (for Span to find)
# create a textStyle with our current text settings (for Span to find)
self.currentTextStyle = textStyle.copy()
self.currentTextStyle.attrs.update(self.textSettings)

View File

@ -68,7 +68,10 @@ def convert_txt(path, options):
sourceencoding=options.encoding, freetext=options.freetext, \
category=options.category)
buffer = ''
block = book.Page().TextBlock()
pg = book.create_page()
block = book.create_text_block()
pg.append(block)
book.append(pg)
for line in fileinput.input(path):
line = line.strip()
if line: