Initial (incomplete) implementation of periodical indexing for MOBI files using te recipe system

This commit is contained in:
Kovid Goyal 2009-07-06 12:16:14 -06:00
parent 0e60596eda
commit ce9c61580b
15 changed files with 116 additions and 32 deletions

View File

@ -225,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
if 'titlepage' in self.oeb.guide.refs:
self.oeb.guide.refs['titlepage'].href = item.href
def condense_ncx(self, ncx_path):
if not self.opts.pretty_print:
tree = etree.parse(ncx_path)
@ -238,8 +236,6 @@ class EPUBOutput(OutputFormatPlugin):
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed)
def workaround_ade_quirks(self):
'''
Perform various markup transforms to get the output to render correctly

View File

@ -218,7 +218,7 @@ class MetaInformation(object):
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
'pubdate'):
'pubdate', 'rights', 'publication_type'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@ -243,7 +243,8 @@ class MetaInformation(object):
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type',
):
setattr(self, x, getattr(mi, x, None))
@ -262,7 +263,8 @@ class MetaInformation(object):
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer',
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
'publication_type'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
@ -332,6 +334,8 @@ class MetaInformation(object):
fmt('Timestamp', self.timestamp.isoformat(' '))
if self.pubdate is not None:
fmt('Published', self.pubdate.isoformat(' '))
if self.rights is not None:
fmt('Rights', unicode(self.rights))
if self.lccn:
fmt('LCCN', unicode(self.lccn))
if self.lcc:
@ -362,6 +366,8 @@ class MetaInformation(object):
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)

View File

@ -7,6 +7,7 @@ import re
xml:lang="en"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
xmlns:py="http://genshi.edgewall.org/"
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
>
<head>
<meta name="dtb:uid" content="${uid}"/>
@ -23,6 +24,8 @@ import re
${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
${'%*s'%(4*level,'')}</navLabel>
${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
<py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
${'%*s'%(4*level,'')}</navPoint>
</py:def>

View File

@ -19,6 +19,7 @@
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
<meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
<meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
<py:for each="tag in mi.tags">
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
</py:for>

View File

@ -440,10 +440,12 @@ class OPF(object):
language = MetadataField('language')
comments = MetadataField('description')
category = MetadataField('category')
rights = MetadataField('rights')
series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
rating = MetadataField('rating', is_dc=False, formatter=int)
pubdate = MetadataField('date', formatter=parser.parse)
publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)

View File

@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
base_path=os.getcwd(), type='unknown'):
base_path=os.getcwd(), type='unknown', author=None,
description=None):
self.href = href
self.fragment = fragment
if not self.fragment:
@ -31,6 +32,8 @@ class TOC(list):
self.base_path = base_path
self.play_order = play_order
self.type = type
self.author = author
self.description = description
def __str__(self):
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@ -59,11 +62,13 @@ class TOC(list):
list.remove(self, entry)
entry.parent = None
def add_item(self, href, fragment, text, play_order=None, type='unknown'):
def add_item(self, href, fragment, text, play_order=None, type='unknown',
author=None, description=None):
if play_order is None:
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
base_path=self.base_path, play_order=play_order, type=type))
base_path=self.base_path, play_order=play_order,
type=type, author=author, description=description))
return self[-1]
def top_level_items(self):

View File

@ -6,6 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
@ -31,16 +32,76 @@ class MOBIOutput(OutputFormatPlugin):
OptionRecommendation(name='toc_title', recommended_value=None,
help=_('Title for any generated in-line table of contents.')
),
OptionRecommendation(name='mobi_periodical',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Generate a periodical rather than a book.')
),
OptionRecommendation(name='dont_compress',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Disable compression of the file contents.')
),
])
def check_for_periodical(self):
if self.oeb.metadata.publication_type and \
self.oeb.metadata.publication_type[0].startswith('periodical:'):
self.periodicalize_toc()
self.check_for_masthead()
self.opts.mobi_periodical = True
else:
self.opts.mobi_periodical = False
def check_for_masthead(self):
found = False
for typ in self.oeb.guide:
if type == 'masthead':
found = True
break
if not found:
self.oeb.debug('No masthead found, generating default one...')
from calibre.resources import server_resources
try:
from PIL import Image as PILImage
PILImage
except ImportError:
import Image as PILImage
raw = StringIO(server_resources['calibre.png'])
im = PILImage.open(raw)
of = StringIO()
im.save(of, 'GIF')
raw = of.getvalue()
id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
def periodicalize_toc(self):
from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc
if toc and toc[0].klass != 'periodical':
self.log('Converting TOC for MOBI periodical indexing...')
articles = {}
if toc.depth < 3:
sections = [TOC(klass='section')]
for x in toc:
sections[0].append(x)
else:
sections = list(toc)
for sec in sections:
articles[id(sec)] = []
for a in list(sec):
articles[id(sec)].append(a)
sec.nodes.remove(a)
root = TOC(klass='periodical', title=self.oeb.metadata.title[0])
for s in sections:
if articles[id(s)]:
for a in articles[id(s)]:
s.nodes.append(a)
root.nodes.append(s)
for x in list(toc.nodes):
toc.nodes.remove(x)
toc.nodes.append(root)
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
@ -60,6 +121,7 @@ class MOBIOutput(OutputFormatPlugin):
rasterizer(oeb, opts)
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
writer = MobiWriter(opts, imagemax=imagemax,
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,

View File

@ -455,7 +455,8 @@ class Metadata(object):
'description', 'format', 'identifier', 'language',
'publisher', 'relation', 'rights', 'source',
'subject', 'title', 'type'])
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
'publication_type'])
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
'scheme': OPF('scheme'), 'event': OPF('event'),
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}

View File

@ -335,14 +335,14 @@ class OEBReader(object):
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
authorElement = xpath(child,
'descendant::mbp:meta[@name = "author"]')
'descendant::calibre:meta[@name = "author"]')
if authorElement :
author = authorElement[0].text
else :
author = None
descriptionElement = xpath(child,
'descendant::mbp:meta[@name = "description"]')
'descendant::calibre:meta[@name = "description"]')
if descriptionElement :
description = descriptionElement[0].text
else :

View File

@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
if mi.timestamp is not None:
m.clear('timestamp')
m.add('timestamp', mi.timestamp.isoformat())
if mi.rights is not None:
m.clear('rights')
m.add('rights', mi.rights)
if mi.publication_type is not None:
m.clear('publication_type')
m.add('publication_type', mi.publication_type)
if not m.timestamp:
m.add('timestamp', datetime.utcnow().isoformat())
class MergeMetadata(object):
'Merge in user metadata, including cover'

View File

@ -13,6 +13,7 @@ from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
from calibre.ebooks import ConversionError
def XPath(x):
try:
return etree.XPath(x, namespaces=XPNSMAP)

View File

@ -64,8 +64,5 @@ class ManifestTrimmer(object):
unchecked = new
for item in oeb.manifest.values():
if item not in used:
if getattr(self.opts, 'mobi_periodical', False) and \
item.href == 'images/mastheadImage.gif':
continue
oeb.logger.info('Trimming %r from manifest' % item.href)
oeb.manifest.remove(item)

View File

@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'mobi_output',
['prefer_author_sort', 'rescale_images', 'toc_title',
'dont_compress', 'mobi_periodical']
'dont_compress',]
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -41,7 +41,7 @@
</property>
</widget>
</item>
<item row="6" column="0">
<item row="5" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -61,13 +61,6 @@
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_mobi_periodical">
<property name="text">
<string>Generate a periodical rather than a book</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<property name="text">

View File

@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
#: Delay between consecutive downloads in seconds
delay = 0
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'unknown'
#: Number of simultaneous downloads. Set to 1 if the server is picky.
#: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
simultaneous_downloads = 5
@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
mi.publication_type = 'periodical:'+self.publication_type
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
desc = a.text_summary
if not desc:
desc = None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po)
play_order=po, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
desc = f.description
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)