Fix #3368 (TypeError: not enough arguments for format string)

This commit is contained in:
Kovid Goyal 2009-08-31 14:09:08 -06:00
parent 330779fa13
commit ec16cc89f4

View File

@ -75,13 +75,13 @@ class PMLMLizer(object):
self.log = log self.log = log
self.image_hrefs = {} self.image_hrefs = {}
self.link_hrefs = {} self.link_hrefs = {}
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to PML markup...') self.log.info('Converting XHTML to PML markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.pmlmlize_spine() return self.pmlmlize_spine()
def pmlmlize_spine(self): def pmlmlize_spine(self):
self.image_hrefs = {} self.image_hrefs = {}
self.link_hrefs = {} self.link_hrefs = {}
@ -139,15 +139,15 @@ class PMLMLizer(object):
# Remove excess spaces at beginning and end of lines # Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text) text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text) text = re.sub('(?m)[ ]+$', '', text)
# Remove excessive newlines # Remove excessive newlines
text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text) text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text) text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('[ ]{2,}', ' ', text) text = re.sub('[ ]{2,}', ' ', text)
# Remove excessive \p tags # Remove excessive \p tags
text = re.sub(r'\\p\s*\\p', '', text) text = re.sub(r'\\p\s*\\p', '', text)
# Remove anchors that do not have links # Remove anchors that do not have links
anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text)) anchors = set(re.findall(r'(?<=\\Q=").+?(?=")', text))
links = set(re.findall(r'(?<=\\q="#).+?(?=")', text)) links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
@ -157,7 +157,7 @@ class PMLMLizer(object):
for entity in set(re.findall('&.+?;', text)): for entity in set(re.findall('&.+?;', text)):
mo = re.search('(%s)' % entity[1:-1], text) mo = re.search('(%s)' % entity[1:-1], text)
text = text.replace(entity, entity_to_unicode(mo)) text = text.replace(entity, entity_to_unicode(mo))
return text return text
def dump_text(self, elem, stylizer, page, tag_stack=[]): def dump_text(self, elem, stylizer, page, tag_stack=[]):
@ -167,7 +167,7 @@ class PMLMLizer(object):
text = u'' text = u''
style = stylizer.style(elem) style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden': or style['visibility'] == 'hidden':
return u'' return u''
@ -180,7 +180,7 @@ class PMLMLizer(object):
if 'block' not in tag_stack: if 'block' not in tag_stack:
tag_count += 1 tag_count += 1
tag_stack.append('block') tag_stack.append('block')
# Process tags that need special processing and that do not have inner # Process tags that need special processing and that do not have inner
# text. Usually these require an argument # text. Usually these require an argument
if tag in IMAGE_TAGS: if tag in IMAGE_TAGS:
@ -192,15 +192,15 @@ class PMLMLizer(object):
text += '\\w' text += '\\w'
width = elem.get('width') width = elem.get('width')
if width: if width:
text += '="%s%"' % width text += '="%s%%"' % width
else: else:
text += '="50%"' text += '="50%"'
# Process style information that needs holds a single tag # Process style information that needs holds a single tag
# Commented out because every page in an OEB book starts with this style # Commented out because every page in an OEB book starts with this style
#if style['page-break-before'] == 'always': #if style['page-break-before'] == 'always':
# text += '\\p' # text += '\\p'
pml_tag = TAG_MAP.get(tag, None) pml_tag = TAG_MAP.get(tag, None)
if pml_tag and pml_tag not in tag_stack: if pml_tag and pml_tag not in tag_stack:
tag_count += 1 tag_count += 1
@ -240,23 +240,23 @@ class PMLMLizer(object):
# Proccess tags that contain text. # Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '': if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
text += self.elem_text(elem, tag_stack) text += self.elem_text(elem, tag_stack)
for item in elem: for item in elem:
text += self.dump_text(item, stylizer, page, tag_stack) text += self.dump_text(item, stylizer, page, tag_stack)
close_tag_list = [] close_tag_list = []
for i in range(0, tag_count): for i in range(0, tag_count):
close_tag_list.insert(0, tag_stack.pop()) close_tag_list.insert(0, tag_stack.pop())
text += self.close_tags(close_tag_list) text += self.close_tags(close_tag_list)
if tag in SEPARATE_TAGS: if tag in SEPARATE_TAGS:
text += os.linesep + os.linesep text += os.linesep + os.linesep
if 'block' not in tag_stack: if 'block' not in tag_stack:
text += os.linesep + os.linesep text += os.linesep + os.linesep
#if style['page-break-after'] == 'always': #if style['page-break-after'] == 'always':
# text += '\\p' # text += '\\p'
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '': if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
text += self.elem_tail(elem, tag_stack) text += self.elem_tail(elem, tag_stack)