Merge from trunk

This commit is contained in:
Charles Haley 2010-10-19 11:08:35 +01:00
commit 347a766ba7
13 changed files with 272 additions and 47 deletions

View File

@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
for poem in soup.findAll('div', attrs={'class':'poem'}):
title = self.tag_to_string(poem.find('h4'))
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
url = 'http://www.theatlantic.com'+poem.find('a')['href']
url = poem.find('a')['href']
if url.startswith('/'):
url = 'http://www.theatlantic.com' + url
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)
poems.append({'title':title, 'url':url, 'description':desc,
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
if div is not None:
self.log('Found section: Advice')
title = self.tag_to_string(div.find('h4'))
url = 'http://www.theatlantic.com'+div.find('a')['href']
url = div.find('a')['href']
if url.startswith('/'):
url = 'http://www.theatlantic.com' + url
desc = self.tag_to_string(div.find('p'))
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)

View File

@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
'''
raise NotImplementedError
@property
def is_periodical(self):
return self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')

View File

@ -15,22 +15,30 @@ def rules(stylesheets):
if r.type == r.STYLE_RULE:
yield r
def initialize_container(path_to_container, opf_name='metadata.opf'):
def initialize_container(path_to_container, opf_name='metadata.opf',
extra_entries=[]):
'''
Create an empty EPUB document, with a default skeleton.
'''
CONTAINER='''\
rootfiles = ''
for path, mimetype, _ in extra_entries:
rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
path, mimetype)
CONTAINER = u'''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="%s" media-type="application/oebps-package+xml"/>
<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
{extra_entries}
</rootfiles>
</container>
'''%opf_name
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
zf = ZipFile(path_to_container, 'w')
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
zf.writestr('META-INF/', '', 0700)
zf.writestr('META-INF/container.xml', CONTAINER)
for path, _, data in extra_entries:
zf.writestr(path, data)
return zf

View File

@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
def workaround_webkit_quirks(self): # {{{
from calibre.ebooks.oeb.base import XPath
for x in self.oeb.spine:
@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
metadata_xml = None
extra_entries = []
if self.is_periodical:
from calibre.ebooks.epub.periodical import sony_metadata
metadata_xml, atom_xml = sony_metadata(oeb)
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
from calibre.ebooks.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf))
epub = initialize_container(output_path, os.path.basename(opf),
extra_entries=extra_entries)
epub.add_dir(tdir)
if encryption is not None:
epub.writestr('META-INF/encryption.xml', encryption)
if metadata_xml is not None:
epub.writestr('META-INF/metadata.xml',
metadata_xml.encode('utf-8'))
if opts.extract_to is not None:
if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to)

View File

@ -0,0 +1,173 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from uuid import uuid4
from calibre.constants import __appname__, __version__
from calibre import strftime, prepare_string_for_xml as xml
SONY_METADATA = u'''\
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:prs="http://xmlns.sony.net/e-book/prs/">
<rdf:Description rdf:about="">
<dc:title>{title}</dc:title>
<dc:publisher>{publisher}</dc:publisher>
<dcterms:alternative>{short_title}</dcterms:alternative>
<dcterms:issued>{issue_date}</dcterms:issued>
<dc:language>{language}</dc:language>
<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
</rdf:Description>
</rdf:RDF>
'''
SONY_ATOM = u'''\
<?xml version="1.0" encoding="utf-8" ?>
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:prs="http://xmlns.sony.net/e-book/prs/"
xmlns:media="http://video.search.yahoo.com/mrss"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<title>{short_title}</title>
<updated>{updated}</updated>
<id>{id}</id>
{entries}
</feed>
'''
SONY_ATOM_SECTION = u'''\
<entry rdf:ID="{title}">
<title>{title}</title>
<link href="{href}"/>
<id>{id}</id>
<updated>{updated}</updated>
<summary>{desc}</summary>
<category term="{short_title}/{title}"
scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
<dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
<dcterms:isReferencedBy rdf:resource=""/>
</entry>
'''
SONY_ATOM_ENTRY = u'''\
<entry>
<title>{title}</title>
<author><name>{author}</name></author>
<link href="{href}"/>
<id>{id}</id>
<updated>{updated}</updated>
<summary>{desc}</summary>
<category term="{short_title}/{section_title}"
scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
<dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
<dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
<dcterms:isReferencedBy rdf:resource="#{section_title}"/>
</entry>
'''
def sony_metadata(oeb):
m = oeb.metadata
title = short_title = unicode(m.title[0])
publisher = __appname__ + ' ' + __version__
try:
pt = unicode(oeb.metadata.publication_type[0])
short_title = u':'.join(pt.split(':')[2:])
except:
pass
try:
date = unicode(m.date[0]).split('T')[0]
except:
date = strftime('%Y-%m-%d')
try:
language = unicode(m.language[0]).replace('_', '-')
except:
language = 'en'
short_title = xml(short_title, True)
metadata = SONY_METADATA.format(title=xml(title),
short_title=short_title,
publisher=xml(publisher), issue_date=xml(date),
language=xml(language))
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
def cal_id(x):
for k, v in x.attrib.items():
if k.endswith('scheme') and v == 'uuid':
return True
try:
base_id = unicode(list(filter(cal_id, m.identifier))[0])
except:
base_id = str(uuid4())
entries = []
seen_titles = set([])
for i, section in enumerate(oeb.toc):
if not section.href:
continue
secid = 'section%d'%i
sectitle = section.title
if not sectitle:
sectitle = _('Unknown')
d = 1
bsectitle = sectitle
while sectitle in seen_titles:
sectitle = bsectitle + ' ' + str(d)
d += 1
seen_titles.add(sectitle)
sectitle = xml(sectitle, True)
secdesc = section.description
if not secdesc:
secdesc = ''
secdesc = xml(secdesc)
entries.append(SONY_ATOM_SECTION.format(title=sectitle,
href=section.href, id=xml(base_id)+'/'+secid,
short_title=short_title, desc=secdesc, updated=updated))
for j, article in enumerate(section):
if not article.href:
continue
atitle = article.title
btitle = atitle
d = 1
while atitle in seen_titles:
atitle = btitle + ' ' + str(d)
d += 1
auth = article.author if article.author else ''
desc = section.description
if not desc:
desc = ''
aid = 'article%d'%j
entries.append(SONY_ATOM_ENTRY.format(
title=xml(atitle),
author=xml(auth),
updated=updated,
desc=desc,
short_title=short_title,
section_title=sectitle,
href=article.href,
word_count=str(1),
id=xml(base_id)+'/'+secid+'/'+aid
))
atom = SONY_ATOM.format(short_title=short_title,
entries='\n\n'.join(entries), updated=updated,
id=xml(base_id)).encode('utf-8')
return metadata, atom

View File

@ -382,11 +382,13 @@ class Guide(ResourceCollection): # {{{
class MetadataField(object):
def __init__(self, name, is_dc=True, formatter=None, none_is=None):
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: unicode(x)):
self.name = name
self.is_dc = is_dc
self.formatter = formatter
self.none_is = none_is
self.renderer = renderer
def __real_get__(self, obj, type=None):
ans = obj.get_metadata_element(self.name)
@ -418,7 +420,7 @@ class MetadataField(object):
return
if elem is None:
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
obj.set_text(elem, unicode(val))
obj.set_text(elem, self.renderer(val))
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
@ -489,10 +491,11 @@ class OPF(object): # {{{
series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
rating = MetadataField('rating', is_dc=False, formatter=int)
pubdate = MetadataField('date', formatter=parse_date)
pubdate = MetadataField('date', formatter=parse_date,
renderer=isoformat)
publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False,
formatter=parse_date)
formatter=parse_date, renderer=isoformat)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
@ -826,8 +829,7 @@ class OPF(object): # {{{
def fset(self, val):
matches = self.isbn_path(self.metadata)
if val is None:
if matches:
if not val:
for x in matches:
x.getparent().remove(x)
return
@ -987,11 +989,14 @@ class OPF(object): # {{{
def smart_update(self, mi, replace_metadata=False):
for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'language', 'tags', 'category', 'comments',
'isbn', 'tags', 'category', 'comments',
'pubdate'):
val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None):
setattr(self, attr, val)
lang = getattr(mi, 'language', None)
if lang and lang != 'und':
self.language = lang
temp = self.to_book_metadata()
temp.smart_update(mi, replace_metadata=replace_metadata)
self._user_metadata_ = temp.get_all_user_metadata(True)

View File

@ -42,8 +42,7 @@ class MOBIOutput(OutputFormatPlugin):
])
def check_for_periodical(self):
if self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
if self.is_periodical:
self.periodicalize_toc()
self.check_for_masthead()
self.opts.mobi_periodical = True

View File

@ -190,7 +190,8 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
if self.model.rowCount() < 1:
info_dialog(self, _('No metadata found'),
_('No metadata found, try adjusting the title and author '
'or the ISBN key.')).exec_()
'and/or removing the ISBN.')).exec_()
self.reject()
return
self.matches.setModel(self.model)

View File

@ -783,6 +783,10 @@ class BooksModel(QAbstractTableModel): # {{{
self.db.set_rating(id, val)
elif column == 'series':
val = val.strip()
if not val:
self.db.set_series(id, val)
self.db.set_series_index(id, 1.0)
else:
pat = re.compile(r'\[([.0-9]+)\]')
match = pat.search(val)
if match is not None:

View File

@ -148,6 +148,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
cherrypy.engine.graceful()
def set_search_restriction(self, restriction):
self.search_restriction_name = restriction
if restriction:
self.search_restriction = 'search:"%s"'%restriction
else:

View File

@ -116,7 +116,10 @@ def render_rating(rating, container='span', prefix=None): # {{{
# }}}
def get_category_items(category, items, db, datatype): # {{{
def get_category_items(category, items, restriction, datatype): # {{{
if category == 'search':
items = [x for x in items if x.name != restriction]
def item(i):
templ = (u'<div title="{4}" class="category-item">'
@ -299,6 +302,7 @@ class BrowseServer(object):
category_meta = self.db.field_metadata
cats = [
(_('Newest'), 'newest', 'forward.png'),
(_('All books'), 'allbooks', 'book.png'),
]
def getter(x):
@ -370,7 +374,8 @@ class BrowseServer(object):
if len(items) <= self.opts.max_opds_ungrouped_items:
script = 'false'
items = get_category_items(category, items, self.db, datatype)
items = get_category_items(category, items,
self.search_restriction_name, datatype)
else:
getter = lambda x: unicode(getattr(x, 'sort', x.name))
starts = set([])
@ -440,7 +445,8 @@ class BrowseServer(object):
entries.append(x)
sort = self.browse_sort_categories(entries, sort)
entries = get_category_items(category, entries, self.db, datatype)
entries = get_category_items(category, entries,
self.search_restriction_name, datatype)
return json.dumps(entries, ensure_ascii=False)
@ -451,6 +457,8 @@ class BrowseServer(object):
ans = self.browse_toplevel()
elif category == 'newest':
raise cherrypy.InternalRedirect('/browse/matches/newest/dummy')
elif category == 'allbooks':
raise cherrypy.InternalRedirect('/browse/matches/allbooks/dummy')
else:
ans = self.browse_category(category, category_sort)
@ -478,16 +486,20 @@ class BrowseServer(object):
raise cherrypy.HTTPError(404, 'invalid category id: %r'%cid)
categories = self.categories_cache()
if category not in categories and category != 'newest':
if category not in categories and \
category not in ('newest', 'allbooks'):
raise cherrypy.HTTPError(404, 'category not found')
fm = self.db.field_metadata
try:
category_name = fm[category]['name']
dt = fm[category]['datatype']
except:
if category != 'newest':
if category not in ('newest', 'allbooks'):
raise
category_name = _('Newest')
category_name = {
'newest' : _('Newest'),
'allbooks' : _('All books'),
}[category]
dt = None
hide_sort = 'true' if dt == 'series' else 'false'
@ -498,8 +510,10 @@ class BrowseServer(object):
except:
raise cherrypy.HTTPError(404, 'Search: %r not understood'%which)
elif category == 'newest':
ids = list(self.db.data.iterallids())
ids = self.search_cache('')
hide_sort = 'true'
elif category == 'allbooks':
ids = self.search_cache('')
else:
q = category
if q == 'news':

View File

@ -1104,7 +1104,7 @@ class BasicNewsRecipe(Recipe):
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
mi.publication_type = 'periodical:'+self.publication_type
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
mi.comments = self.description
if not isinstance(mi.comments, unicode):