Sync to trunk.

This commit is contained in:
John Schember 2009-09-02 19:05:49 -04:00
commit 10085c8f1b
2 changed files with 11 additions and 12 deletions

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into FB2 markup Transform OEB content into FB2 markup
''' '''
import os
import cStringIO import cStringIO
from base64 import b64encode from base64 import b64encode
@ -52,18 +51,18 @@ STYLES = [
] ]
class FB2MLizer(object): class FB2MLizer(object):
def __init__(self, log): def __init__(self, log):
self.log = log self.log = log
self.image_hrefs = {} self.image_hrefs = {}
self.link_hrefs = {} self.link_hrefs = {}
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to FB2 markup...') self.log.info('Converting XHTML to FB2 markup...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
return self.fb2mlize_spine() return self.fb2mlize_spine()
def fb2mlize_spine(self): def fb2mlize_spine(self):
self.image_hrefs = {} self.image_hrefs = {}
self.link_hrefs = {} self.link_hrefs = {}
@ -82,7 +81,7 @@ class FB2MLizer(object):
author_middle = u'' author_middle = u''
author_last = u'' author_last = u''
author_parts = self.oeb_book.metadata.creator[0].value.split(' ') author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
if len(author_parts) == 1: if len(author_parts) == 1:
author_last = author_parts[0] author_last = author_parts[0]
elif len(author_parts) == 2: elif len(author_parts) == 2:
@ -139,7 +138,7 @@ class FB2MLizer(object):
def fb2_body_footer(self): def fb2_body_footer(self):
return u'\n</section>\n</body>' return u'\n</section>\n</body>'
def fb2_footer(self): def fb2_footer(self):
return u'</FictionBook>' return u'</FictionBook>'
@ -184,14 +183,14 @@ class FB2MLizer(object):
if not isinstance(elem.tag, basestring) \ if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS: or namespace(elem.tag) != XHTML_NS:
return [u''] return [u'']
fb2_text = [u''] fb2_text = [u'']
style = stylizer.style(elem) style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden': or style['visibility'] == 'hidden':
return [u''] return [u'']
tag = barename(elem.tag) tag = barename(elem.tag)
tag_count = 0 tag_count = 0
@ -242,7 +241,7 @@ class FB2MLizer(object):
if hasattr(elem, 'text') and elem.text != None: if hasattr(elem, 'text') and elem.text != None:
fb2_text.append(prepare_string_for_xml(elem.text)) fb2_text.append(prepare_string_for_xml(elem.text))
for item in elem: for item in elem:
fb2_text += self.dump_text(item, stylizer, page, tag_stack) fb2_text += self.dump_text(item, stylizer, page, tag_stack)

View File

@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into RB compatible markup. Transform OEB content into RB compatible markup.
''' '''
import os
import re import re
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.rb import unique_name
TAGS = [ TAGS = [
'b', 'b',
@ -123,7 +123,7 @@ class RBMLizer(object):
aid = self.link_hrefs[aid] aid = self.link_hrefs[aid]
return u'<A NAME="%s"></A>' % aid return u'<A NAME="%s"></A>' % aid
def clean_text(self, text): def clean_text(self, text):
# Remove anchors that do not have links # Remove anchors that do not have links
anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text)) anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text))
links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text)) links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text))
@ -146,7 +146,7 @@ class RBMLizer(object):
tag = barename(elem.tag) tag = barename(elem.tag)
tag_count = 0 tag_count = 0
# Process tags that need special processing and that do not have inner # Process tags that need special processing and that do not have inner
# text. Usually these require an argument # text. Usually these require an argument
if tag in IMAGE_TAGS: if tag in IMAGE_TAGS: