mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
347a766ba7
@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||
title = self.tag_to_string(poem.find('h4'))
|
||||
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
||||
url = poem.find('a')['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
poems.append({'title':title, 'url':url, 'description':desc,
|
||||
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
if div is not None:
|
||||
self.log('Found section: Advice')
|
||||
title = self.tag_to_string(div.find('h4'))
|
||||
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
||||
url = div.find('a')['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
desc = self.tag_to_string(div.find('p'))
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
|
@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def is_periodical(self):
|
||||
return self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||
|
||||
|
@ -15,22 +15,30 @@ def rules(stylesheets):
|
||||
if r.type == r.STYLE_RULE:
|
||||
yield r
|
||||
|
||||
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
||||
def initialize_container(path_to_container, opf_name='metadata.opf',
|
||||
extra_entries=[]):
|
||||
'''
|
||||
Create an empty EPUB document, with a default skeleton.
|
||||
'''
|
||||
CONTAINER='''\
|
||||
rootfiles = ''
|
||||
for path, mimetype, _ in extra_entries:
|
||||
rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
|
||||
path, mimetype)
|
||||
CONTAINER = u'''\
|
||||
<?xml version="1.0"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="%s" media-type="application/oebps-package+xml"/>
|
||||
<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
|
||||
{extra_entries}
|
||||
</rootfiles>
|
||||
</container>
|
||||
'''%opf_name
|
||||
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
|
||||
zf = ZipFile(path_to_container, 'w')
|
||||
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
||||
zf.writestr('META-INF/', '', 0700)
|
||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||
for path, _, data in extra_entries:
|
||||
zf.writestr(path, data)
|
||||
return zf
|
||||
|
||||
|
||||
|
@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||
|
||||
|
||||
|
||||
def workaround_webkit_quirks(self): # {{{
|
||||
from calibre.ebooks.oeb.base import XPath
|
||||
for x in self.oeb.spine:
|
||||
@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
|
||||
with TemporaryDirectory('_epub_output') as tdir:
|
||||
from calibre.customize.ui import plugin_for_output_format
|
||||
metadata_xml = None
|
||||
extra_entries = []
|
||||
if self.is_periodical:
|
||||
from calibre.ebooks.epub.periodical import sony_metadata
|
||||
metadata_xml, atom_xml = sony_metadata(oeb)
|
||||
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
|
||||
oeb_output = plugin_for_output_format('oeb')
|
||||
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||
@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||
|
||||
from calibre.ebooks.epub import initialize_container
|
||||
epub = initialize_container(output_path, os.path.basename(opf))
|
||||
epub = initialize_container(output_path, os.path.basename(opf),
|
||||
extra_entries=extra_entries)
|
||||
epub.add_dir(tdir)
|
||||
if encryption is not None:
|
||||
epub.writestr('META-INF/encryption.xml', encryption)
|
||||
if metadata_xml is not None:
|
||||
epub.writestr('META-INF/metadata.xml',
|
||||
metadata_xml.encode('utf-8'))
|
||||
if opts.extract_to is not None:
|
||||
if os.path.exists(opts.extract_to):
|
||||
shutil.rmtree(opts.extract_to)
|
||||
|
173
src/calibre/ebooks/epub/periodical.py
Normal file
173
src/calibre/ebooks/epub/periodical.py
Normal file
@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre import strftime, prepare_string_for_xml as xml
|
||||
|
||||
SONY_METADATA = u'''\
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:prs="http://xmlns.sony.net/e-book/prs/">
|
||||
<rdf:Description rdf:about="">
|
||||
<dc:title>{title}</dc:title>
|
||||
<dc:publisher>{publisher}</dc:publisher>
|
||||
<dcterms:alternative>{short_title}</dcterms:alternative>
|
||||
<dcterms:issued>{issue_date}</dcterms:issued>
|
||||
<dc:language>{language}</dc:language>
|
||||
<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
|
||||
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
|
||||
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
|
||||
</rdf:Description>
|
||||
</rdf:RDF>
|
||||
'''
|
||||
|
||||
SONY_ATOM = u'''\
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||
xmlns:prs="http://xmlns.sony.net/e-book/prs/"
|
||||
xmlns:media="http://video.search.yahoo.com/mrss"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
|
||||
<title>{short_title}</title>
|
||||
<updated>{updated}</updated>
|
||||
<id>{id}</id>
|
||||
{entries}
|
||||
</feed>
|
||||
'''
|
||||
|
||||
SONY_ATOM_SECTION = u'''\
|
||||
<entry rdf:ID="{title}">
|
||||
<title>{title}</title>
|
||||
<link href="{href}"/>
|
||||
<id>{id}</id>
|
||||
<updated>{updated}</updated>
|
||||
<summary>{desc}</summary>
|
||||
<category term="{short_title}/{title}"
|
||||
scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
|
||||
<dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
|
||||
<dcterms:isReferencedBy rdf:resource=""/>
|
||||
</entry>
|
||||
'''
|
||||
|
||||
SONY_ATOM_ENTRY = u'''\
|
||||
<entry>
|
||||
<title>{title}</title>
|
||||
<author><name>{author}</name></author>
|
||||
<link href="{href}"/>
|
||||
<id>{id}</id>
|
||||
<updated>{updated}</updated>
|
||||
<summary>{desc}</summary>
|
||||
<category term="{short_title}/{section_title}"
|
||||
scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
|
||||
<dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
|
||||
<dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
|
||||
<dcterms:isReferencedBy rdf:resource="#{section_title}"/>
|
||||
</entry>
|
||||
'''
|
||||
|
||||
def sony_metadata(oeb):
|
||||
m = oeb.metadata
|
||||
title = short_title = unicode(m.title[0])
|
||||
publisher = __appname__ + ' ' + __version__
|
||||
try:
|
||||
pt = unicode(oeb.metadata.publication_type[0])
|
||||
short_title = u':'.join(pt.split(':')[2:])
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
date = unicode(m.date[0]).split('T')[0]
|
||||
except:
|
||||
date = strftime('%Y-%m-%d')
|
||||
try:
|
||||
language = unicode(m.language[0]).replace('_', '-')
|
||||
except:
|
||||
language = 'en'
|
||||
short_title = xml(short_title, True)
|
||||
|
||||
metadata = SONY_METADATA.format(title=xml(title),
|
||||
short_title=short_title,
|
||||
publisher=xml(publisher), issue_date=xml(date),
|
||||
language=xml(language))
|
||||
|
||||
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
def cal_id(x):
|
||||
for k, v in x.attrib.items():
|
||||
if k.endswith('scheme') and v == 'uuid':
|
||||
return True
|
||||
|
||||
try:
|
||||
base_id = unicode(list(filter(cal_id, m.identifier))[0])
|
||||
except:
|
||||
base_id = str(uuid4())
|
||||
|
||||
entries = []
|
||||
seen_titles = set([])
|
||||
for i, section in enumerate(oeb.toc):
|
||||
if not section.href:
|
||||
continue
|
||||
secid = 'section%d'%i
|
||||
sectitle = section.title
|
||||
if not sectitle:
|
||||
sectitle = _('Unknown')
|
||||
d = 1
|
||||
bsectitle = sectitle
|
||||
while sectitle in seen_titles:
|
||||
sectitle = bsectitle + ' ' + str(d)
|
||||
d += 1
|
||||
seen_titles.add(sectitle)
|
||||
sectitle = xml(sectitle, True)
|
||||
secdesc = section.description
|
||||
if not secdesc:
|
||||
secdesc = ''
|
||||
secdesc = xml(secdesc)
|
||||
entries.append(SONY_ATOM_SECTION.format(title=sectitle,
|
||||
href=section.href, id=xml(base_id)+'/'+secid,
|
||||
short_title=short_title, desc=secdesc, updated=updated))
|
||||
|
||||
for j, article in enumerate(section):
|
||||
if not article.href:
|
||||
continue
|
||||
atitle = article.title
|
||||
btitle = atitle
|
||||
d = 1
|
||||
while atitle in seen_titles:
|
||||
atitle = btitle + ' ' + str(d)
|
||||
d += 1
|
||||
|
||||
auth = article.author if article.author else ''
|
||||
desc = section.description
|
||||
if not desc:
|
||||
desc = ''
|
||||
aid = 'article%d'%j
|
||||
|
||||
entries.append(SONY_ATOM_ENTRY.format(
|
||||
title=xml(atitle),
|
||||
author=xml(auth),
|
||||
updated=updated,
|
||||
desc=desc,
|
||||
short_title=short_title,
|
||||
section_title=sectitle,
|
||||
href=article.href,
|
||||
word_count=str(1),
|
||||
id=xml(base_id)+'/'+secid+'/'+aid
|
||||
))
|
||||
|
||||
atom = SONY_ATOM.format(short_title=short_title,
|
||||
entries='\n\n'.join(entries), updated=updated,
|
||||
id=xml(base_id)).encode('utf-8')
|
||||
|
||||
return metadata, atom
|
||||
|
@ -382,11 +382,13 @@ class Guide(ResourceCollection): # {{{
|
||||
|
||||
class MetadataField(object):
|
||||
|
||||
def __init__(self, name, is_dc=True, formatter=None, none_is=None):
|
||||
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
|
||||
renderer=lambda x: unicode(x)):
|
||||
self.name = name
|
||||
self.is_dc = is_dc
|
||||
self.formatter = formatter
|
||||
self.none_is = none_is
|
||||
self.renderer = renderer
|
||||
|
||||
def __real_get__(self, obj, type=None):
|
||||
ans = obj.get_metadata_element(self.name)
|
||||
@ -418,7 +420,7 @@ class MetadataField(object):
|
||||
return
|
||||
if elem is None:
|
||||
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
|
||||
obj.set_text(elem, unicode(val))
|
||||
obj.set_text(elem, self.renderer(val))
|
||||
|
||||
|
||||
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
|
||||
@ -489,10 +491,11 @@ class OPF(object): # {{{
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parse_date)
|
||||
pubdate = MetadataField('date', formatter=parse_date,
|
||||
renderer=isoformat)
|
||||
publication_type = MetadataField('publication_type', is_dc=False)
|
||||
timestamp = MetadataField('timestamp', is_dc=False,
|
||||
formatter=parse_date)
|
||||
formatter=parse_date, renderer=isoformat)
|
||||
|
||||
|
||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
||||
@ -826,11 +829,10 @@ class OPF(object): # {{{
|
||||
|
||||
def fset(self, val):
|
||||
matches = self.isbn_path(self.metadata)
|
||||
if val is None:
|
||||
if matches:
|
||||
for x in matches:
|
||||
x.getparent().remove(x)
|
||||
return
|
||||
if not val:
|
||||
for x in matches:
|
||||
x.getparent().remove(x)
|
||||
return
|
||||
if not matches:
|
||||
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
|
||||
matches = [self.create_metadata_element('identifier',
|
||||
@ -987,11 +989,14 @@ class OPF(object): # {{{
|
||||
def smart_update(self, mi, replace_metadata=False):
|
||||
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'language', 'tags', 'category', 'comments',
|
||||
'isbn', 'tags', 'category', 'comments',
|
||||
'pubdate'):
|
||||
val = getattr(mi, attr, None)
|
||||
if val is not None and val != [] and val != (None, None):
|
||||
setattr(self, attr, val)
|
||||
lang = getattr(mi, 'language', None)
|
||||
if lang and lang != 'und':
|
||||
self.language = lang
|
||||
temp = self.to_book_metadata()
|
||||
temp.smart_update(mi, replace_metadata=replace_metadata)
|
||||
self._user_metadata_ = temp.get_all_user_metadata(True)
|
||||
|
@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def check_for_periodical(self):
|
||||
if self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
|
||||
self.periodicalize_toc()
|
||||
self.check_for_masthead()
|
||||
self.opts.mobi_periodical = True
|
||||
if self.is_periodical:
|
||||
self.periodicalize_toc()
|
||||
self.check_for_masthead()
|
||||
self.opts.mobi_periodical = True
|
||||
else:
|
||||
self.opts.mobi_periodical = False
|
||||
|
||||
|
@ -190,7 +190,8 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||
if self.model.rowCount() < 1:
|
||||
info_dialog(self, _('No metadata found'),
|
||||
_('No metadata found, try adjusting the title and author '
|
||||
'or the ISBN key.')).exec_()
|
||||
'and/or removing the ISBN.')).exec_()
|
||||
self.reject()
|
||||
return
|
||||
|
||||
self.matches.setModel(self.model)
|
||||
|
@ -783,18 +783,22 @@ class BooksModel(QAbstractTableModel): # {{{
|
||||
self.db.set_rating(id, val)
|
||||
elif column == 'series':
|
||||
val = val.strip()
|
||||
pat = re.compile(r'\[([.0-9]+)\]')
|
||||
match = pat.search(val)
|
||||
if match is not None:
|
||||
self.db.set_series_index(id, float(match.group(1)))
|
||||
val = pat.sub('', val).strip()
|
||||
elif val:
|
||||
if tweaks['series_index_auto_increment'] == 'next':
|
||||
ni = self.db.get_next_series_num_for(val)
|
||||
if ni != 1:
|
||||
self.db.set_series_index(id, ni)
|
||||
if val:
|
||||
if not val:
|
||||
self.db.set_series(id, val)
|
||||
self.db.set_series_index(id, 1.0)
|
||||
else:
|
||||
pat = re.compile(r'\[([.0-9]+)\]')
|
||||
match = pat.search(val)
|
||||
if match is not None:
|
||||
self.db.set_series_index(id, float(match.group(1)))
|
||||
val = pat.sub('', val).strip()
|
||||
elif val:
|
||||
if tweaks['series_index_auto_increment'] == 'next':
|
||||
ni = self.db.get_next_series_num_for(val)
|
||||
if ni != 1:
|
||||
self.db.set_series_index(id, ni)
|
||||
if val:
|
||||
self.db.set_series(id, val)
|
||||
elif column == 'timestamp':
|
||||
if val.isNull() or not val.isValid():
|
||||
return False
|
||||
|
@ -148,6 +148,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
||||
cherrypy.engine.graceful()
|
||||
|
||||
def set_search_restriction(self, restriction):
|
||||
self.search_restriction_name = restriction
|
||||
if restriction:
|
||||
self.search_restriction = 'search:"%s"'%restriction
|
||||
else:
|
||||
|
@ -116,7 +116,10 @@ def render_rating(rating, container='span', prefix=None): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
def get_category_items(category, items, db, datatype): # {{{
|
||||
def get_category_items(category, items, restriction, datatype): # {{{
|
||||
|
||||
if category == 'search':
|
||||
items = [x for x in items if x.name != restriction]
|
||||
|
||||
def item(i):
|
||||
templ = (u'<div title="{4}" class="category-item">'
|
||||
@ -299,6 +302,7 @@ class BrowseServer(object):
|
||||
category_meta = self.db.field_metadata
|
||||
cats = [
|
||||
(_('Newest'), 'newest', 'forward.png'),
|
||||
(_('All books'), 'allbooks', 'book.png'),
|
||||
]
|
||||
|
||||
def getter(x):
|
||||
@ -370,7 +374,8 @@ class BrowseServer(object):
|
||||
|
||||
if len(items) <= self.opts.max_opds_ungrouped_items:
|
||||
script = 'false'
|
||||
items = get_category_items(category, items, self.db, datatype)
|
||||
items = get_category_items(category, items,
|
||||
self.search_restriction_name, datatype)
|
||||
else:
|
||||
getter = lambda x: unicode(getattr(x, 'sort', x.name))
|
||||
starts = set([])
|
||||
@ -440,7 +445,8 @@ class BrowseServer(object):
|
||||
entries.append(x)
|
||||
|
||||
sort = self.browse_sort_categories(entries, sort)
|
||||
entries = get_category_items(category, entries, self.db, datatype)
|
||||
entries = get_category_items(category, entries,
|
||||
self.search_restriction_name, datatype)
|
||||
return json.dumps(entries, ensure_ascii=False)
|
||||
|
||||
|
||||
@ -451,6 +457,8 @@ class BrowseServer(object):
|
||||
ans = self.browse_toplevel()
|
||||
elif category == 'newest':
|
||||
raise cherrypy.InternalRedirect('/browse/matches/newest/dummy')
|
||||
elif category == 'allbooks':
|
||||
raise cherrypy.InternalRedirect('/browse/matches/allbooks/dummy')
|
||||
else:
|
||||
ans = self.browse_category(category, category_sort)
|
||||
|
||||
@ -478,16 +486,20 @@ class BrowseServer(object):
|
||||
raise cherrypy.HTTPError(404, 'invalid category id: %r'%cid)
|
||||
categories = self.categories_cache()
|
||||
|
||||
if category not in categories and category != 'newest':
|
||||
if category not in categories and \
|
||||
category not in ('newest', 'allbooks'):
|
||||
raise cherrypy.HTTPError(404, 'category not found')
|
||||
fm = self.db.field_metadata
|
||||
try:
|
||||
category_name = fm[category]['name']
|
||||
dt = fm[category]['datatype']
|
||||
except:
|
||||
if category != 'newest':
|
||||
if category not in ('newest', 'allbooks'):
|
||||
raise
|
||||
category_name = _('Newest')
|
||||
category_name = {
|
||||
'newest' : _('Newest'),
|
||||
'allbooks' : _('All books'),
|
||||
}[category]
|
||||
dt = None
|
||||
|
||||
hide_sort = 'true' if dt == 'series' else 'false'
|
||||
@ -498,8 +510,10 @@ class BrowseServer(object):
|
||||
except:
|
||||
raise cherrypy.HTTPError(404, 'Search: %r not understood'%which)
|
||||
elif category == 'newest':
|
||||
ids = list(self.db.data.iterallids())
|
||||
ids = self.search_cache('')
|
||||
hide_sort = 'true'
|
||||
elif category == 'allbooks':
|
||||
ids = self.search_cache('')
|
||||
else:
|
||||
q = category
|
||||
if q == 'news':
|
||||
|
@ -1104,7 +1104,7 @@ class BasicNewsRecipe(Recipe):
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
mi.publication_type = 'periodical:'+self.publication_type
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
|
Loading…
x
Reference in New Issue
Block a user