mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various FB2 improvements
This commit is contained in:
parent
51e20a8764
commit
0adef17457
@ -10,6 +10,7 @@ Transform OEB content into FB2 markup
|
|||||||
|
|
||||||
import cStringIO
|
import cStringIO
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
import re
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@ -30,14 +31,15 @@ TAG_MAP = {
|
|||||||
'i' : 'emphasis',
|
'i' : 'emphasis',
|
||||||
'p' : 'p',
|
'p' : 'p',
|
||||||
'li' : 'p',
|
'li' : 'p',
|
||||||
'br' : 'p',
|
'div': 'p',
|
||||||
}
|
}
|
||||||
|
|
||||||
TAG_SPACE = [
|
TAG_FORCE_P = [
|
||||||
'div',
|
|
||||||
'br',
|
'br',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
TAG_SPACE = []
|
||||||
|
|
||||||
TAG_IMAGES = [
|
TAG_IMAGES = [
|
||||||
'img',
|
'img',
|
||||||
]
|
]
|
||||||
@ -79,8 +81,14 @@ class FB2MLizer(object):
|
|||||||
output.append(self.fb2mlize_images())
|
output.append(self.fb2mlize_images())
|
||||||
output.append(self.fb2_footer())
|
output.append(self.fb2_footer())
|
||||||
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
||||||
|
output = self.clean_text(output)
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
||||||
|
|
||||||
|
def clean_text(self, text):
|
||||||
|
text = re.sub('<p>[ ]*</p>', '', text)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
def fb2_header(self):
|
def fb2_header(self):
|
||||||
author_first = u''
|
author_first = u''
|
||||||
author_middle = u''
|
author_middle = u''
|
||||||
@ -124,7 +132,7 @@ class FB2MLizer(object):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
def get_toc(self):
|
def get_toc(self):
|
||||||
toc = [u'']
|
toc = []
|
||||||
if self.opts.inline_toc:
|
if self.opts.inline_toc:
|
||||||
self.log.debug('Generating table of contents...')
|
self.log.debug('Generating table of contents...')
|
||||||
toc.append(u'<p>%s</p>' % _('Table of Contents:'))
|
toc.append(u'<p>%s</p>' % _('Table of Contents:'))
|
||||||
@ -136,7 +144,7 @@ class FB2MLizer(object):
|
|||||||
return ''.join(toc)
|
return ''.join(toc)
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
text = [u'']
|
text = []
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||||
@ -162,7 +170,7 @@ class FB2MLizer(object):
|
|||||||
return '<a id="%s" />' % aid
|
return '<a id="%s" />' % aid
|
||||||
|
|
||||||
def fb2mlize_images(self):
|
def fb2mlize_images(self):
|
||||||
images = [u'']
|
images = []
|
||||||
for item in self.oeb_book.manifest:
|
for item in self.oeb_book.manifest:
|
||||||
if item.media_type in OEB_RASTER_IMAGES:
|
if item.media_type in OEB_RASTER_IMAGES:
|
||||||
try:
|
try:
|
||||||
@ -190,14 +198,15 @@ class FB2MLizer(object):
|
|||||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
return [u'']
|
return []
|
||||||
|
|
||||||
fb2_text = [u'']
|
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
return [u'']
|
return []
|
||||||
|
|
||||||
|
fb2_text = []
|
||||||
|
tags = []
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
|
|
||||||
@ -221,14 +230,32 @@ class FB2MLizer(object):
|
|||||||
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
||||||
href = self.link_hrefs[href]
|
href = self.link_hrefs[href]
|
||||||
fb2_text.append('<a xlink:href="#%s">' % href)
|
fb2_text.append('<a xlink:href="#%s">' % href)
|
||||||
tag_stack.append('a')
|
tags.append('a')
|
||||||
|
|
||||||
# Anchor ids
|
# Anchor ids
|
||||||
id_name = elem.get('id')
|
id_name = elem.get('id')
|
||||||
if id_name:
|
if id_name:
|
||||||
fb2_text.append(self.get_anchor(page, id_name))
|
fb2_text.append(self.get_anchor(page, id_name))
|
||||||
|
|
||||||
if tag in TAG_TITLE:
|
if tag in TAG_FORCE_P:
|
||||||
|
if 'p' in tag_stack+tags:
|
||||||
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
||||||
|
all_tags = tag_stack+tags
|
||||||
|
closed_tags = []
|
||||||
|
all_tags.reverse()
|
||||||
|
for t in all_tags:
|
||||||
|
fb2_text.append('</%s>' % t)
|
||||||
|
closed_tags.append(t)
|
||||||
|
if t == 'p':
|
||||||
|
break
|
||||||
|
closed_tags.reverse()
|
||||||
|
for t in closed_tags:
|
||||||
|
fb2_text.append('<%s>' % t)
|
||||||
|
else:
|
||||||
|
fb2_text.append('<p>')
|
||||||
|
tags.append('p')
|
||||||
|
|
||||||
|
'''if tag in TAG_TITLE:
|
||||||
if 'p' in tag_stack:
|
if 'p' in tag_stack:
|
||||||
ctag = []
|
ctag = []
|
||||||
ctag.append(tag_stack.pop())
|
ctag.append(tag_stack.pop())
|
||||||
@ -237,42 +264,35 @@ class FB2MLizer(object):
|
|||||||
fb2_text += self.close_tags(ctag)
|
fb2_text += self.close_tags(ctag)
|
||||||
fb2_text.append('</section><section><title><p>')
|
fb2_text.append('</section><section><title><p>')
|
||||||
tag_stack.append('title')
|
tag_stack.append('title')
|
||||||
tag_stack.append('p')
|
tag_stack.append('p')'''
|
||||||
|
|
||||||
fb2_tag = TAG_MAP.get(tag, None)
|
fb2_tag = TAG_MAP.get(tag, None)
|
||||||
if fb2_tag:
|
if fb2_tag and fb2_tag not in tag_stack+tags:
|
||||||
if fb2_tag in tag_stack:
|
|
||||||
tag_stack.reverse()
|
|
||||||
tag_stack.remove(fb2_tag)
|
|
||||||
tag_stack.reverse()
|
|
||||||
fb2_text.append('</%s>' % fb2_tag)
|
|
||||||
fb2_text.append('<%s>' % fb2_tag)
|
fb2_text.append('<%s>' % fb2_tag)
|
||||||
tag_stack.append(fb2_tag)
|
tags.append(fb2_tag)
|
||||||
|
|
||||||
# Processes style information
|
# Processes style information
|
||||||
for s in STYLES:
|
for s in STYLES:
|
||||||
style_tag = s[1].get(style[s[0]], None)
|
style_tag = s[1].get(style[s[0]], None)
|
||||||
if style_tag:
|
if style_tag and style_tag not in tag_stack+tags:
|
||||||
fb2_text.append('<%s>' % style_tag)
|
fb2_text.append('<%s>' % style_tag)
|
||||||
tag_stack.append(style_tag)
|
tags.append(style_tag)
|
||||||
|
|
||||||
if tag in TAG_SPACE:
|
if tag in TAG_SPACE:
|
||||||
if not fb2_text or fb2_text[-1] != ' ':
|
if not fb2_text or fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '):
|
||||||
fb2_text.append(' ')
|
fb2_text.append(' ')
|
||||||
|
|
||||||
if hasattr(elem, 'text') and elem.text:
|
if hasattr(elem, 'text') and elem.text:
|
||||||
if 'p' not in tag_stack:
|
if 'p' not in tag_stack+tags:
|
||||||
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
|
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
|
||||||
else:
|
else:
|
||||||
fb2_text.append(prepare_string_for_xml(elem.text))
|
fb2_text.append(prepare_string_for_xml(elem.text))
|
||||||
|
|
||||||
for item in elem:
|
for item in elem:
|
||||||
fb2_text += self.dump_text(item, stylizer, page, tag_stack)
|
fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags)
|
||||||
|
|
||||||
close_tag_list = []
|
tags.reverse()
|
||||||
for i in range(0, len(tag_stack)):
|
fb2_text += self.close_tags(tags)
|
||||||
close_tag_list.insert(0, tag_stack.pop())
|
|
||||||
fb2_text += self.close_tags(close_tag_list)
|
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
if 'p' not in tag_stack:
|
if 'p' not in tag_stack:
|
||||||
@ -280,12 +300,12 @@ class FB2MLizer(object):
|
|||||||
else:
|
else:
|
||||||
fb2_text.append(prepare_string_for_xml(elem.tail))
|
fb2_text.append(prepare_string_for_xml(elem.tail))
|
||||||
|
|
||||||
|
#print fb2_text
|
||||||
return fb2_text
|
return fb2_text
|
||||||
|
|
||||||
def close_tags(self, tags):
|
def close_tags(self, tags):
|
||||||
fb2_text = [u'']
|
text = []
|
||||||
for i in range(0, len(tags)):
|
for tag in tags:
|
||||||
fb2_tag = tags.pop()
|
text.append('</%s>' % tag)
|
||||||
fb2_text.append('</%s>' % fb2_tag)
|
|
||||||
|
|
||||||
return fb2_text
|
return text
|
||||||
|
Loading…
x
Reference in New Issue
Block a user