Sync to trunk.

This commit is contained in:
John Schember 2013-04-04 08:09:25 -04:00
commit 99f5769450
71 changed files with 1822 additions and 509 deletions

View File

@ -66,4 +66,3 @@ class Adventure_zone(BasicNewsRecipe):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href'] a['href']=self.index + a['href']
return soup return soup

290
recipes/am730.recipe Normal file
View File

@ -0,0 +1,290 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2013, Eddie Lau'
__Date__ = ''
__HiResImg__ = True
'''
Change Log:
2013/03/30 -- first version
'''
from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe):
title = u'AM730'
__author__ = 'Eddie Lau'
publisher = 'AM730'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
language = 'zh'
encoding = 'utf-8'
auto_cleanup = False
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
description = 'http://www.am730.com.hk'
category = 'Chinese, News, Hong Kong'
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
dict(name='div', attrs={'class':'thecontent wordsnap'}),
dict(name='a', attrs={'class':'lightboximg'})]
remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at HKT 6am, all news are available
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
def get_fetchdate(self):
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
# Note: does not work with custom date given by __Date__
def get_weekday(self):
return self.get_dtlocal().weekday()
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def parse_index(self):
feeds = []
soup = self.index_to_soup('http://www.am730.com.hk/')
ul = soup.find(attrs={'class':'nav-section'})
sectionList = []
for li in ul.findAll('li'):
a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
title = li.find('a').get('title', False).strip()
sectionList.append((title, a))
for title, url in sectionList:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def parse_section(self, url):
soup = self.index_to_soup(url)
items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
current_articles = []
for item in items:
a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
title = self.tag_to_string(a)
description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
current_articles.append({'title': title, 'url': articlelink, 'description': description})
return current_articles
def preprocess_html(self, soup):
multia = soup.findAll('a')
for a in multia:
if not (a == None):
image = a.find('img')
if not (image == None):
if __HiResImg__:
image['src'] = image.get('src').replace('/thumbs/', '/')
caption = image.get('alt')
tag = Tag(soup, "photo", [])
tag2 = Tag(soup, "photocaption", [])
tag.insert(0, image)
if not caption == None:
tag2.insert(0, caption)
tag.insert(1, tag2)
a.replaceWith(tag)
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
title = self.short_title()
if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
article_titles, aseen = [], set()
for f in feeds:
for a in f:
if a.title and a.title not in aseen:
aseen.add(a.title)
article_titles.append(force_unicode(a.title, 'utf-8'))
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles))
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -1,161 +1,275 @@
# -*- coding: utf-8 -*- # vim:fileencoding=UTF-8
import re from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2013, Eddie Lau'
__Date__ = ''
from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe): class AppleDaily(BasicNewsRecipe):
title = u'蘋果日報 (香港)'
title = u'蘋果日報' __author__ = 'Eddie Lau'
__author__ = u'蘋果日報' publisher = '蘋果日報'
__publisher__ = u'蘋果日報' oldest_article = 1
description = u'蘋果日報' max_articles_per_feed = 100
masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' auto_cleanup = False
language = 'zh_TW' language = 'zh'
encoding = 'UTF-8' encoding = 'utf-8'
timefmt = ' [%a, %d %b, %Y]' auto_cleanup = False
needs_subscription = False
remove_javascript = True remove_javascript = True
remove_tags_before = dict(name=['ul', 'h1']) use_embedded_content = False
remove_tags_after = dict(name='form')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style', 'form'])]
no_stylesheets = True no_stylesheets = True
extra_css = ''' description = 'http://hkm.appledaily.com/'
@font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n category = 'Chinese, News, Hong Kong'
body {margin-right: 8pt; font-family: 'uming', serif;} masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
h1 {font-family: 'uming', serif, sans-serif}
'''
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
preprocess_regexps = [ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
(re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE), keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
lambda match: 'http://' + match.group('server') + '/' + match.group('path')), remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
] dict(name='p', attrs={'class':'next'})]
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at HKT 6am, all news are available
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
def get_fetchdate(self):
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
# Note: does not work with custom date given by __Date__
def get_weekday(self):
return self.get_dtlocal().weekday()
def get_cover_url(self): def get_cover_url(self):
return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' soup = self.index_to_soup('http://hkm.appledaily.com/')
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
br = BasicNewsRecipe.get_browser(self)
#def get_browser(self): try:
#br = BasicNewsRecipe.get_browser(self) br.open(cover)
#if self.username is not None and self.password is not None: except:
# br.open('http://www.nytimes.com/auth/login') cover = None
# br.select_form(name='login') return cover
# br['USERID'] = self.username
# br['PASSWORD'] = self.password
# br.submit()
#return br
def preprocess_html(self, soup):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['src'] = iurl
#else:
#print 'not good'
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
iurl = tag['href']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['href'] = iurl
#else:
#print 'not good'
return soup
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def parse_index(self): def parse_index(self):
base = 'http://news.hotpot.hk/fruit' feeds = []
soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php') soup = self.index_to_soup('http://hkm.appledaily.com/')
ul = soup.find(attrs={'class':'menu'})
sectionList = []
for li in ul.findAll('li'):
a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
title = li.find('a', text=True).strip()
if not title == u'動新聞':
sectionList.append((title, a))
for title, url in sectionList:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
#def feed_title(div): def parse_section(self, url):
# return ''.join(div.findAll(text=True, recursive=False)).strip() soup = self.index_to_soup(url)
ul = soup.find(attrs={'class':'list'})
current_articles = []
for li in ul.findAll('li'):
a = li.find('a', href=True)
title = li.find('p', text=True).strip()
if a is not None:
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
pass
return current_articles
articles = {} def create_opf(self, feeds, dir=None):
key = None if dir is None:
ans = [] dir = self.output_dir
for div in soup.findAll('li'): title = self.short_title()
key = div.find(text=True, recursive=True); if self.output_profile.periodical_date_in_title:
#if key == u'豪情': title += strftime(self.timefmt)
# continue; mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
article_titles, aseen = [], set()
for f in feeds:
for a in f:
if a.title and a.title not in aseen:
aseen.add(a.title)
article_titles.append(force_unicode(a.title, 'utf-8'))
print 'section=' + key mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles))
articles[key] = [] language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
ans.append(key) opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
a = div.find('a', href=True) manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
if not a: # Get cover
continue cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
url = base + '/' + a['href'] # Get masthead
print 'url=' + url mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
if not articles.has_key(key): opf.create_manifest_from_files_in(manifest)
articles[key] = [] for mani in opf.manifest:
else: if mani.path.endswith('.ncx'):
# sub page mani.id = 'ncx'
subSoup = self.index_to_soup(url) if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
for subDiv in subSoup.findAll('li'): entries = ['index.html']
subA = subDiv.find('a', href=True) toc = TOC(base_path=dir)
subTitle = subDiv.find(text=True, recursive=True) self.play_order_counter = 0
subUrl = base + '/' + subA['href'] self.play_order_map = {}
print 'subUrl' + subUrl
articles[key].append(
dict(title=subTitle,
url=subUrl,
date='',
description='',
content=''))
# elif div['class'] in ['story', 'story headline']: def feed_index(num, parent):
# a = div.find('a', href=True) f = feeds[num]
# if not a: for j, a in enumerate(f):
# continue if getattr(a, 'downloaded', False):
# url = re.sub(r'\?.*', '', a['href']) adir = 'feed_%d/article_%d/'%(num, j)
# url += '?pagewanted=all' auth = a.author
# title = self.tag_to_string(a, use_alt=True).strip() if not auth:
# description = '' auth = None
# pubdate = strftime('%a, %d %b') desc = a.text_summary
# summary = div.find(True, attrs={'class':'summary'}) if not desc:
# if summary: desc = None
# description = self.tag_to_string(summary, use_alt=False) else:
# desc = self.description_limiter(desc)
# feed = key if key is not None else 'Uncategorized' tt = a.toc_thumbnail if a.toc_thumbnail else None
# if not articles.has_key(feed): entries.append('%sindex.html'%adir)
# articles[feed] = [] po = self.play_order_map.get(entries[-1], None)
# if not 'podcasts' in url: if po is None:
# articles[feed].append( self.play_order_counter += 1
# dict(title=title, url=url, date=pubdate, po = self.play_order_counter
# description=description, parent.add_item('%sindex.html'%adir, None,
# content='')) a.title if a.title else _('Untitled Article'),
# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) play_order=po, author=auth,
ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)] description=desc, toc_thumbnail=tt)
return ans last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
#remove_tags = [
#dict(attrs={'class':'inStory'})
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
#,dict(attrs={'id':['inset','videoDisplay']})
#]
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
remove_attributes = ['lang']
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
feeds = [ feeds = [
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'), (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
] ]
def get_article_url(self, article):
url = article.get('guid', None)
if 'podcasts' in url:
return None
if 'surveys' in url:
return None
if 'images' in url:
return None
if 'feedroom' in url:
return None
if '/magazine/toc/' in url:
return None
rurl, sep, rest = url.rpartition('?')
if rurl:
return rurl
return rest
def print_version(self, url): def print_version(self, url):
if '/news/' in url or '/blog/ in url': soup = self.index_to_soup(url)
return url prntver = soup.find('li', attrs={'class':'print tracked'})
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/') rurl = prntver.find('a', href=True)['href']
return rurl.replace('/investing/','/investor/') return rurl
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
dzialzagraniczny.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class dzial_zagraniczny(BasicNewsRecipe):
title = u'Dział Zagraniczny'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description = u'Polskiego czytelnika to nie interesuje'
INDEX = 'http://dzialzagraniczny.pl'
extra_css = 'img {display: block;}'
oldest_article = 7
cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg'
max_articles_per_feed = 100
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
use_embedded_content = True
feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')]

View File

@ -26,7 +26,7 @@ class ElDiplo_Recipe(BasicNewsRecipe):
title = u'El Diplo' title = u'El Diplo'
__author__ = 'Tomas Di Domenico' __author__ = 'Tomas Di Domenico'
description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina' description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina'
langauge = 'es_AR' language = 'es_AR'
needs_subscription = True needs_subscription = True
auto_cleanup = True auto_cleanup = True

29
recipes/equipped.recipe Normal file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
'''
equipped.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class equipped(BasicNewsRecipe):
title = u'Equipped'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description = u'Wiadomości z equipped.pl'
INDEX = 'http://equipped.pl'
extra_css = '.alignleft {float:left; margin-right:5px;}'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
#keep_only_tags = [dict(name='article')]
#remove_tags = [dict(id='disqus_thread')]
#remove_tags_after = [dict(id='disqus_thread')]
feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')]

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class FocusRecipe(BasicNewsRecipe): class FocusRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl' language = 'pl'
version = 1 version = 1

View File

@ -14,13 +14,14 @@ class gazetaprawna(BasicNewsRecipe):
title = u'Gazeta Prawna' title = u'Gazeta Prawna'
__author__ = u'Vroo' __author__ = u'Vroo'
publisher = u'Infor Biznes' publisher = u'Infor Biznes'
oldest_article = 7 oldest_article = 1
max_articles_per_feed = 20 max_articles_per_feed = 20
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
description = 'Polski dziennik gospodarczy' description = 'Polski dziennik gospodarczy'
language = 'pl' language = 'pl'
encoding = 'utf-8' encoding = 'utf-8'
ignore_duplicate_articles = {'title', 'url'}
remove_tags_after = [ remove_tags_after = [
dict(name='div', attrs={'class':['data-art']}) dict(name='div', attrs={'class':['data-art']})
@ -30,7 +31,7 @@ class gazetaprawna(BasicNewsRecipe):
] ]
feeds = [ feeds = [
(u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'), (u'Z ostatniej chwili', u'http://rss.gazetaprawna.pl/GazetaPrawna'),
(u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'), (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
(u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'), (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
(u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'), (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
@ -51,3 +52,8 @@ class gazetaprawna(BasicNewsRecipe):
url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna') url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
url = url.replace('praca.gazetaprawna', 'www.gazetaprawna') url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
return url return url
def get_cover_url(self):
soup = self.index_to_soup('http://www.egazety.pl/infor/e-wydanie-dziennik-gazeta-prawna.html')
self.cover_url = soup.find('p', attrs={'class':'covr'}).a['href']
return getattr(self, 'cover_url', self.cover_url)

Binary file not shown.

After

Width:  |  Height:  |  Size: 491 B

BIN
recipes/icons/equipped.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 929 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 612 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 731 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 982 B

BIN
recipes/icons/media2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 660 B

BIN
recipes/icons/mobilna.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 885 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 307 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 616 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
recipes/icons/osw.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 489 B

BIN
recipes/icons/ppe_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 733 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 497 B

26
recipes/ittechblog.recipe Normal file
View File

@ -0,0 +1,26 @@
__license__ = 'GPL v3'
__copyright__ = 'MrStefan'
'''
www.ittechblog.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ittechblog(BasicNewsRecipe):
title = u'IT techblog'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'pl'
description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.'
extra_css = '.cover > img {display:block;}'
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
keep_only_tags =[dict(attrs={'class':'box'})]
remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})]
feeds = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')]

View File

@ -2,8 +2,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class KrytykaPolitycznaRecipe(BasicNewsRecipe): class KrytykaPolitycznaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl' language = 'pl'
version = 1 version = 1

View File

@ -1,33 +1,23 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AListApart (BasicNewsRecipe): class AListApart (BasicNewsRecipe):
__author__ = u'Marc Busqué <marc@lamarciana.com>' __author__ = 'Marc Busqué <marc@lamarciana.com>'
__url__ = 'http://www.lamarciana.com' __url__ = 'http://www.lamarciana.com'
__version__ = '1.0' __version__ = '2.0'
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>' __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'A List Apart' title = u'A List Apart'
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.' description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
language = 'en' language = 'en'
tags = 'web development, software' tags = 'web development, software'
oldest_article = 120 oldest_article = 120
remove_empty_feeds = True remove_empty_feeds = True
no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
cover_url = u'http://alistapart.com/pix/alalogo.gif' cover_url = u'http://alistapart.com/pix/alalogo.gif'
keep_only_tags = [ extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
dict(name='div', attrs={'id': 'content'})
]
remove_tags = [
dict(name='ul', attrs={'id': 'metastuff'}),
dict(name='div', attrs={'class': 'discuss'}),
dict(name='div', attrs={'class': 'discuss'}),
dict(name='div', attrs={'id': 'learnmore'}),
]
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
feeds = [ feeds = [
(u'A List Apart', u'http://www.alistapart.com/site/rss'), (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
] ]

View File

@ -0,0 +1,88 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
'''
magazynconsido.pl/
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class magazynconsido(BasicNewsRecipe):
title = u'Magazyn Consido'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com> ,teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Portal dla architektów i projektantów'
masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets = True
use_embedded_content = False
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1'))
keep_only_tags.append(dict(name = 'p'))
keep_only_tags.append(dict(attrs = {'class' : 'navigation'}))
remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })]
remove_tags_after =[dict(attrs = {'class' : 'navigation' })]
extra_css=''' img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;}
h1 {text-align: center;}'''
def parse_index(self): #(kk)
soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml')
feeds = []
articles = {}
sections = []
section = ''
for item in soup.findAll('item') :
section = self.tag_to_string(item.category)
if not articles.has_key(section) :
sections.append(section)
articles[section] = []
article_url = self.tag_to_string(item.guid)
article_title = self.tag_to_string(item.title)
article_date = self.tag_to_string(item.pubDate)
article_description = self.tag_to_string(item.description)
articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date, 'description' : article_description })
for section in sections :
if section == 'Video':
feeds.append((section, articles[section]))
feeds.pop()
else:
feeds.append((section, articles[section]))
return feeds
def append_page(self, soup, appendtag):
apage = soup.find('div', attrs={'class':'wp-pagenavi'})
if apage is not None:
nexturl = soup.find('a', attrs={'class':'nextpostslink'})
soup2 = self.index_to_soup(nexturl['href'])
pagetext = soup2.findAll('p')
for tag in pagetext:
pos = len(appendtag.contents)
appendtag.insert(pos, tag)
while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None:
appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('')
def preprocess_html(self, soup): #(kk)
self.append_page(soup, soup.body)
return self.adeify_images(soup)
def postprocess_html(self, soup, first):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

35
recipes/media2.recipe Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'teepel'
'''
media2.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class media2_pl(BasicNewsRecipe):
title = u'Media2'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
masthead_url='http://media2.pl/res/logo/www.png'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 5
extra_css = '''.news-lead{font-weight: bold; }'''
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'}))
remove_tags =[]
remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'}))
feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')]

26
recipes/mobilna.recipe Normal file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'MrStefan'
'''
www.mobilna.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class mobilna(BasicNewsRecipe):
title = u'Mobilna.pl'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'pl'
description =u'twoja mobilna strona'
#masthead_url=''
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
use_embedded_content = True
#keep_only_tags =[dict(attrs={'class':'Post'})]
feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')]

View File

@ -0,0 +1,50 @@
#!usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'MrStefan, teepel'
'''
www.mojegotowanie.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class mojegotowanie(BasicNewsRecipe):
title = u'Moje Gotowanie'
__author__ = 'MrStefan <mrstefaan@gmail.com>, teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.'
masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif'
cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'),
(u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')]
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'film' in article.title:
feed.articles.remove(article)
return feeds
def get_article_url(self, article):
link = article.get('link')
if 'Clayout0Cset0Cprint0' in link:
return link
def print_version(self, url):
segment = url.split('/')
URLPart = segment[-2]
URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/')
URLPart = URLPart.replace('0I', '_')
URLPart = URLPart.replace('0C', '/')
return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
nczas.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class nczas(BasicNewsRecipe):
title = u'Najwy\u017cszy Czas'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Wiadomości z nczas.com'
INDEX='http://nczas.com'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = True
remove_empty_feeds= True
simultaneous_downloads = 5
remove_javascript=True
remove_attributes = ['style']
no_stylesheets=True
feeds = [(u'Najwyższy Czas', u'http://nczas.com/feed/')]

View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class NowinyRybnik(BasicNewsRecipe):
title = u'Nowiny - Rybnik'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic'
oldest_article = 7
masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))]
remove_tags = []
remove_tags.append(dict(name='div', attrs={'id': 'footer'}))
feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

41
recipes/osw.recipe Normal file
View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://www.osw.waw.pl - Osrodek studiow wschodnich
'''
from calibre.web.feeds.news import BasicNewsRecipe
class OSW_Recipe(BasicNewsRecipe):
language = 'pl'
title = u'Ośrodek Studiów Wschodnich'
__author__ = 'teepel <teepel44@gmail.com>'
INDEX='http://www.osw.waw.pl'
description = u'Ośrodek Studiów Wschodnich im. Marka Karpia. Centre for Eastern Studies.'
category = u'News'
oldest_article = 7
max_articles_per_feed = 100
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
remove_javascript = True
simultaneous_downloads = 5
keep_only_tags =[]
#this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'}))
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'id' : 'attachments'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'}))
feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
def print_version(self, url):
return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')

41
recipes/ppe_pl.recipe Normal file
View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class ppeRecipe(BasicNewsRecipe):
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
title = u'ppe.pl'
category = u'News'
description = u'Portal o konsolach i grach wideo.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 1
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 2
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'}))
remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'}))
feeds = [
('Newsy', 'feed://ppe.pl/rss/rss.xml'),
]

33
recipes/presseurop.recipe Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
'''
www.presseurop.eu/pl
'''
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class presseurop(BasicNewsRecipe):
title = u'Presseurop'
description = u'Najlepsze artykuły z prasy europejskiej'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [
(u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
(u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'),
(u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'),
(u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'),
(u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed')
]
preprocess_regexps = [
(re.compile(r'\|.*</title>', re.DOTALL|re.IGNORECASE),
lambda match: '</title>'),
]

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class ResPublicaNowaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Res Publica Nowa'
category = u'News'
description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 5
feeds = [
('Artykuly', 'feed://publica.pl/feed'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -1,30 +1,30 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Eddie Lau' __copyright__ = '2011-2013, Eddie Lau'
# data source: normal, mobile # data source: normal, mobile
__Source__ = 'mobile' __Source__ = 'mobile'
# please replace the following "True" with "False". (Default: True) # please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to True if your device supports display of CJK titles (Default: False) # Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False __UseChineseTitle__ = True
# Set it to False if you want to skip images (Default: True) # Set it to False if you want to skip images (Default: True)
__KeepImages__ = True __KeepImages__ = True
# Set it to True if you want to include a summary in Kindle's article view (Default: False) # Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False __IncludeSummary__ = True
# Set it to True if you want thumbnail images in Kindle's article view (Default: True) # Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True __IncludeThumbnails__ = True
''' '''
Change Log: Change Log:
2013/03/31 -- fix cover retrieval code and heading size, and remove &nbsp; in summary
2011/12/29 -- first version done 2011/12/29 -- first version done
TODO:
* use alternative source at http://m.singtao.com/index.php
''' '''
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
import os, datetime, re import os, datetime, re
from datetime import date
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
title = 'Sing Tao Daily - Hong Kong' title = 'Sing Tao Daily - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://singtao.com)' description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png' masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
if __Source__ == 'normal': if __Source__ == 'normal':
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})] keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
return self.get_dtlocal().strftime("%d") return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
#cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29 soup = self.index_to_soup('http://m.singtao.com/')
base = 2660 cover = soup.find(attrs={'class':'special'}).get('src', False)
todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
diff = todaydate - date(2011, 12, 29)
base = base + int(diff.total_seconds()/(3600*24))
cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
try: try:
br.open(cover) br.open(cover)
except: except:
cover = 'http://singtao.com/images/stlogo.gif' cover = None
return cover return cover
def parse_index(self): def parse_index(self):
@ -289,11 +285,11 @@ class STHKRecipe(BasicNewsRecipe):
# the text may or may not be enclosed in <p></p> tag # the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p') paras = articlebody.findAll('p')
if not paras: if not paras:
paras = articlebody paras = articlebody
textFound = False textFound = False
for p in paras: for p in paras:
if not textFound: if not textFound:
summary_candidate = self.tag_to_string(p).strip() summary_candidate = self.tag_to_string(p).strip().replace('&nbsp;', '')
if len(summary_candidate) > 0: if len(summary_candidate) > 0:
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1) summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
article.summary = article.text_summary = summary_candidate article.summary = article.text_summary = summary_candidate
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):

View File

@ -20,7 +20,7 @@ class sport_pl(BasicNewsRecipe):
remove_javascript=True remove_javascript=True
no_stylesheets=True no_stylesheets=True
remove_empty_feeds = True remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags =[] keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'})) keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class WirtualneMedia(BasicNewsRecipe): class WirtualneMedia(BasicNewsRecipe):
title = u'wirtualnemedia.pl' title = u'Wirtualnemedia.pl'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True

View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
wolnemedia.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class wolne_media(BasicNewsRecipe):
title = u'Wolne Media'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Wiadomości z wolnemedia.net'
INDEX='http://wolnemedia.net'
oldest_article = 1
max_articles_per_feed = 100
remove_empty_feeds= True
simultaneous_downloads = 5
remove_javascript=True
no_stylesheets=True
auto_cleanup = True
feeds = [(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),(u'Media', u'http://wolnemedia.net/category/media/feed/'),(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')]

View File

@ -1,10 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com' __copyright__ = '''2010, matek09, matek09@gmail.com
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>' Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>
__copyright__ = 'Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>' Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
@ -16,12 +15,12 @@ class Wprost(BasicNewsRecipe):
ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png' ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
title = u'Wprost' title = u'Wprost'
__author__ = 'matek09' __author__ = 'matek09'
description = 'Weekly magazine' description = u'Popularny tygodnik ogólnopolski - Wprost. Najlepszy wśród polskich tygodników - opiniotwórczy - społeczno-informacyjny - społeczno-kulturalny.'
encoding = 'ISO-8859-2' encoding = 'ISO-8859-2'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
recursions = 0 recursions = 0
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
''' '''
@ -94,5 +93,3 @@ class Wprost(BasicNewsRecipe):
'description' : '' 'description' : ''
}) })
return articles return articles

View File

@ -1,10 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
__copyright__ = 'Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'
__license__ = 'GPL v3'
__copyright__ = '''2010, matek09, matek09@gmail.com
Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>
Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
@ -12,13 +11,14 @@ import re
class Wprost(BasicNewsRecipe): class Wprost(BasicNewsRecipe):
title = u'Wprost (RSS)' title = u'Wprost (RSS)'
__author__ = 'matek09' __author__ = 'matek09'
description = 'Weekly magazine' description = u'Portal informacyjny. Najświeższe wiadomości, najciekawsze komentarze i opinie. Blogi najlepszych publicystów.'
encoding = 'ISO-8859-2' encoding = 'ISO-8859-2'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
recursions = 0 recursions = 0
use_embedded_content = False use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True remove_empty_feeds = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
@ -48,20 +48,20 @@ class Wprost(BasicNewsRecipe):
#h2 {font-size: x-large; font-weight: bold} #h2 {font-size: x-large; font-weight: bold}
feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'), feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'),
(u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'),
(u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'),
(u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'),
(u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'),
(u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'),
(u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'),
(u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'),
(u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'),
(u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'),
(u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'),
(u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'),
(u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'),
(u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php')
] ]
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.wprost.pl/tygodnik') soup = self.index_to_soup('http://www.wprost.pl/tygodnik')

View File

@ -79,7 +79,7 @@ author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
# By default, calibre splits a string containing multiple author names on # By default, calibre splits a string containing multiple author names on
# ampersands and the words "and" and "with". You can customize the splitting # ampersands and the words "and" and "with". You can customize the splitting
# by changing the regular expression below. Strings are split on whatever the # by changing the regular expression below. Strings are split on whatever the
# specified regular expression matches. # specified regular expression matches, in addition to ampersands.
# Default: r'(?i),?\s+(and|with)\s+' # Default: r'(?i),?\s+(and|with)\s+'
authors_split_regex = r'(?i),?\s+(and|with)\s+' authors_split_regex = r'(?i),?\s+(and|with)\s+'

View File

@ -47,6 +47,10 @@ binary_includes = [
'/usr/lib/libgthread-2.0.so.0', '/usr/lib/libgthread-2.0.so.0',
'/usr/lib/libpng14.so.14', '/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0', '/usr/lib/libexslt.so.0',
# Ensure that libimobiledevice is compiled against openssl, not gnutls
'/usr/lib/libimobiledevice.so.3',
'/usr/lib/libusbmuxd.so.2',
'/usr/lib/libplist.so.1',
MAGICK_PREFIX+'/lib/libMagickWand.so.5', MAGICK_PREFIX+'/lib/libMagickWand.so.5',
MAGICK_PREFIX+'/lib/libMagickCore.so.5', MAGICK_PREFIX+'/lib/libMagickCore.so.5',
'/usr/lib/libgcrypt.so.11', '/usr/lib/libgcrypt.so.11',

View File

@ -399,7 +399,8 @@ class Py2App(object):
@flush @flush
def add_fontconfig(self): def add_fontconfig(self):
info('\nAdding fontconfig') info('\nAdding fontconfig')
for x in ('fontconfig.1', 'freetype.6', 'expat.1'): for x in ('fontconfig.1', 'freetype.6', 'expat.1',
'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
src = os.path.join(SW, 'lib', 'lib'+x+'.dylib') src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
self.install_dylib(src) self.install_dylib(src)
dst = os.path.join(self.resources_dir, 'fonts') dst = os.path.join(self.resources_dir, 'fonts')

View File

@ -757,6 +757,7 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.douban import Douban
from calibre.ebooks.metadata.sources.ozon import Ozon from calibre.ebooks.metadata.sources.ozon import Ozon
# from calibre.ebooks.metadata.sources.google_images import GoogleImages
plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]

View File

@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep config['enabled_plugins'] = ep
default_disabled_plugins = set([ default_disabled_plugins = set([
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
]) ])
def is_disabled(plugin): def is_disabled(plugin):

View File

@ -7,7 +7,9 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import weakref
from functools import partial from functools import partial
from itertools import izip, imap
def sanitize_sort_field_name(field_metadata, field): def sanitize_sort_field_name(field_metadata, field):
field = field_metadata.search_term_to_field_key(field.lower().strip()) field = field_metadata.search_term_to_field_key(field.lower().strip())
@ -15,11 +17,39 @@ def sanitize_sort_field_name(field_metadata, field):
field = {'title': 'sort', 'authors':'author_sort'}.get(field, field) field = {'title': 'sort', 'authors':'author_sort'}.get(field, field)
return field return field
class MarkedVirtualField(object):
def __init__(self, marked_ids):
self.marked_ids = marked_ids
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
for book_id in candidates:
yield self.marked_ids.get(book_id, default_value), {book_id}
class TableRow(list):
def __init__(self, book_id, view):
self.book_id = book_id
self.view = weakref.ref(view)
def __getitem__(self, obj):
view = self.view()
if isinstance(obj, slice):
return [view._field_getters[c](self.book_id)
for c in xrange(*obj.indices(len(view._field_getters)))]
else:
return view._field_getters[obj](self.book_id)
class View(object): class View(object):
''' A table view of the database, with rows and columns. Also supports
filtering and sorting. '''
def __init__(self, cache): def __init__(self, cache):
self.cache = cache self.cache = cache
self.marked_ids = {} self.marked_ids = {}
self.search_restriction_book_count = 0
self.search_restriction = ''
self._field_getters = {} self._field_getters = {}
for col, idx in cache.backend.FIELD_MAP.iteritems(): for col, idx in cache.backend.FIELD_MAP.iteritems():
if isinstance(col, int): if isinstance(col, int):
@ -38,16 +68,33 @@ class View(object):
except KeyError: except KeyError:
self._field_getters[idx] = partial(self.get, col) self._field_getters[idx] = partial(self.get, col)
self._map = list(self.cache.all_book_ids()) self._map = tuple(self.cache.all_book_ids())
self._map_filtered = list(self._map) self._map_filtered = tuple(self._map)
@property @property
def field_metadata(self): def field_metadata(self):
return self.cache.field_metadata return self.cache.field_metadata
def _get_id(self, idx, index_is_id=True): def _get_id(self, idx, index_is_id=True):
ans = idx if index_is_id else self.index_to_id(idx) return idx if index_is_id else self.index_to_id(idx)
return ans
def __getitem__(self, row):
return TableRow(self._map_filtered[row], self.cache)
def __len__(self):
return len(self._map_filtered)
def __iter__(self):
for book_id in self._map_filtered:
yield self._data[book_id]
def iterall(self):
for book_id in self._map:
yield self[book_id]
def iterallids(self):
for book_id in self._map:
yield book_id
def get_field_map_field(self, row, col, index_is_id=True): def get_field_map_field(self, row, col, index_is_id=True):
''' '''
@ -66,7 +113,7 @@ class View(object):
def get_ondevice(self, idx, index_is_id=True, default_value=''): def get_ondevice(self, idx, index_is_id=True, default_value=''):
id_ = idx if index_is_id else self.index_to_id(idx) id_ = idx if index_is_id else self.index_to_id(idx)
self.cache.field_for('ondevice', id_, default_value=default_value) return self.cache.field_for('ondevice', id_, default_value=default_value)
def get_marked(self, idx, index_is_id=True, default_value=None): def get_marked(self, idx, index_is_id=True, default_value=None):
id_ = idx if index_is_id else self.index_to_id(idx) id_ = idx if index_is_id else self.index_to_id(idx)
@ -93,7 +140,7 @@ class View(object):
ans.append(self.cache._author_data(id_)) ans.append(self.cache._author_data(id_))
return tuple(ans) return tuple(ans)
def multisort(self, fields=[], subsort=False): def multisort(self, fields=[], subsort=False, only_ids=None):
fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields] fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields]
keys = self.field_metadata.sortable_field_keys() keys = self.field_metadata.sortable_field_keys()
fields = [x for x in fields if x[0] in keys] fields = [x for x in fields if x[0] in keys]
@ -102,8 +149,70 @@ class View(object):
if not fields: if not fields:
fields = [('timestamp', False)] fields = [('timestamp', False)]
sorted_book_ids = self.cache.multisort(fields) sorted_book_ids = self.cache.multisort(fields, ids_to_sort=only_ids)
sorted_book_ids if only_ids is None:
# TODO: change maps self._map = tuple(sorted_book_ids)
if len(self._map_filtered) == len(self._map):
self._map_filtered = tuple(self._map)
else:
fids = frozenset(self._map_filtered)
self._map_filtered = tuple(i for i in self._map if i in fids)
else:
smap = {book_id:i for i, book_id in enumerate(sorted_book_ids)}
only_ids.sort(key=smap.get)
def search(self, query, return_matches=False):
ans = self.search_getting_ids(query, self.search_restriction,
set_restriction_count=True)
if return_matches:
return ans
self._map_filtered = tuple(ans)
def search_getting_ids(self, query, search_restriction,
set_restriction_count=False):
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'(%s) and (%s)' % (search_restriction, query)
if not q:
if set_restriction_count:
self.search_restriction_book_count = len(self._map)
return list(self._map)
matches = self.cache.search(
query, search_restriction, virtual_fields={'marked':MarkedVirtualField(self.marked_ids)})
rv = [x for x in self._map if x in matches]
if set_restriction_count and q == search_restriction:
self.search_restriction_book_count = len(rv)
return rv
def set_search_restriction(self, s):
self.search_restriction = s
def search_restriction_applied(self):
return bool(self.search_restriction)
def get_search_restriction_book_count(self):
return self.search_restriction_book_count
def set_marked_ids(self, id_dict):
'''
ids in id_dict are "marked". They can be searched for by
using the search term ``marked:true``. Pass in an empty dictionary or
set to clear marked ids.
:param id_dict: Either a dictionary mapping ids to values or a set
of ids. In the latter case, the value is set to 'true' for all ids. If
a mapping is provided, then the search can be used to search for
particular values: ``marked:value``
'''
if not hasattr(id_dict, 'items'):
# Simple list. Make it a dict of string 'true'
self.marked_ids = dict.fromkeys(id_dict, u'true')
else:
# Ensure that all the items in the dict are text
self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode,
id_dict.itervalues())))

View File

@ -239,7 +239,7 @@ class ANDROID(USBMS):
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS', 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1'] 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -858,7 +858,7 @@ class Amazon(Source):
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.info('No cached cover found, running identify') log.info('No cached cover found, running identify')

View File

@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
# Google covers are often poor quality (scans/errors) but they have high # Google covers are often poor quality (scans/errors) but they have high
# resolution, so they trump covers from better sources. So make sure they # resolution, so they trump covers from better sources. So make sure they
# are only used if no other covers are found. # are only used if no other covers are found.
msprefs.defaults['cover_priorities'] = {'Google':2} msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
def create_log(ostream=None): def create_log(ostream=None):
from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.logging import ThreadSafeLog, FileStream
@ -222,6 +222,9 @@ class Source(Plugin):
#: plugin #: plugin
config_help_message = None config_help_message = None
#: If True this source can return multiple covers for a given query
can_get_multiple_covers = False
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs) Plugin.__init__(self, *args, **kwargs)
@ -522,7 +525,7 @@ class Source(Plugin):
return None return None
def download_cover(self, log, result_queue, abort, def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
''' '''
Download a cover and put it into result_queue. The parameters all have Download a cover and put it into result_queue. The parameters all have
the same meaning as for :meth:`identify`. Put (self, cover_data) into the same meaning as for :meth:`identify`. Put (self, cover_data) into
@ -531,6 +534,9 @@ class Source(Plugin):
This method should use cached cover URLs for efficiency whenever This method should use cached cover URLs for efficiency whenever
possible. When cached data is not present, most plugins simply call possible. When cached data is not present, most plugins simply call
identify and use its results. identify and use its results.
If the parameter get_best_cover is True and this plugin can get
multiple covers, it should only get the "best" one.
''' '''
pass pass

View File

@ -35,9 +35,14 @@ class Worker(Thread):
start_time = time.time() start_time = time.time()
if not self.abort.is_set(): if not self.abort.is_set():
try: try:
self.plugin.download_cover(self.log, self.rq, self.abort, if self.plugin.can_get_multiple_covers:
title=self.title, authors=self.authors, self.plugin.download_cover(self.log, self.rq, self.abort,
identifiers=self.identifiers, timeout=self.timeout) title=self.title, authors=self.authors, get_best_cover=True,
identifiers=self.identifiers, timeout=self.timeout)
else:
self.plugin.download_cover(self.log, self.rq, self.abort,
title=self.title, authors=self.authors,
identifiers=self.identifiers, timeout=self.timeout)
except: except:
self.log.exception('Failed to download cover from', self.log.exception('Failed to download cover from',
self.plugin.name) self.plugin.name)

View File

@ -221,7 +221,7 @@ class Douban(Source):
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.info('No cached cover found, running identify') log.info('No cached cover found, running identify')

View File

@ -320,7 +320,7 @@ class Edelweiss(Source):
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.info('No cached cover found, running identify') log.info('No cached cover found, running identify')

View File

@ -209,7 +209,7 @@ class GoogleBooks(Source):
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.info('No cached cover found, running identify') log.info('No cached cover found, running identify')

View File

@ -0,0 +1,148 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import OrderedDict
from calibre import as_unicode
from calibre.ebooks.metadata.sources.base import Source, Option
class GoogleImages(Source):
name = 'Google Images'
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
capabilities = frozenset(['cover'])
config_help_message = _('Configure the Google Image Search plugin')
can_get_multiple_covers = True
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
_('The maximum number of covers to process from the google search result')),
Option('size', 'choices', 'svga', _('Cover size'),
_('Search for covers larger than the specified size'),
choices=OrderedDict((
('any', _('Any size'),),
('l', _('Large'),),
('qsvga', _('Larger than %s')%'400x300',),
('vga', _('Larger than %s')%'640x480',),
('svga', _('Larger than %s')%'600x800',),
('xga', _('Larger than %s')%'1024x768',),
('2mp', _('Larger than %s')%'2 MP',),
('4mp', _('Larger than %s')%'4 MP',),
))),
)
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if not title:
return
from threading import Thread
import time
timeout = max(60, timeout) # Needs at least a minute
title = ' '.join(self.get_title_tokens(title))
author = ' '.join(self.get_author_tokens(authors))
urls = self.get_image_urls(title, author, log, abort, timeout)
if not urls:
log('No images found in Google for, title: %r and authors: %r'%(title, author))
return
urls = urls[:self.prefs['max_covers']]
if get_best_cover:
urls = urls[:1]
workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
for w in workers:
w.daemon = True
w.start()
alive = True
start_time = time.time()
while alive and not abort.is_set() and time.time() - start_time < timeout:
alive = False
for w in workers:
if w.is_alive():
alive = True
break
abort.wait(0.1)
def download_image(self, url, timeout, log, result_queue):
try:
ans = self.browser.open_novisit(url, timeout=timeout).read()
result_queue.put((self, ans))
log('Downloaded cover from: %s'%url)
except Exception:
self.log.exception('Failed to download cover from: %r'%url)
def get_image_urls(self, title, author, log, abort, timeout):
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
try:
return fork_job('calibre.ebooks.metadata.sources.google_images',
'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result']
except WorkerError as e:
if e.orig_tb:
log.error(e.orig_tb)
log.exception('Searching google failed:' + as_unicode(e))
except Exception as e:
log.exception('Searching google failed:' + as_unicode(e))
return []
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Firefox/3.6.13'
def find_image_urls(br, ans):
import urlparse
for w in br.page.mainFrame().documentElement().findAll('.images_table a[href]'):
try:
imgurl = urlparse.parse_qs(urlparse.urlparse(unicode(w.attribute('href'))).query)['imgurl'][0]
except:
continue
if imgurl not in ans:
ans.append(imgurl)
def search(title, author, size, timeout, debug=False):
import time
from calibre.web.jsbrowser.browser import Browser, LoadWatcher, Timeout
ans = []
start_time = time.time()
br = Browser(user_agent=USER_AGENT, enable_developer_tools=debug)
br.visit('https://www.google.com/advanced_image_search')
f = br.select_form('form[action="/search"]')
f['as_q'] = '%s %s'%(title, author)
if size != 'any':
f['imgsz'] = size
f['imgar'] = 't|xt'
f['as_filetype'] = 'jpg'
br.submit(wait_for_load=False)
# Loop until the page finishes loading or at least five image urls are
# found
lw = LoadWatcher(br.page, br)
while lw.is_loading and len(ans) < 5:
br.run_for_a_time(0.2)
find_image_urls(br, ans)
if time.time() - start_time > timeout:
raise Timeout('Timed out trying to load google image search page')
find_image_urls(br, ans)
if debug:
br.show_browser()
br.close()
del br # Needed to prevent PyQt from segfaulting
return ans
def test_google():
import pprint
pprint.pprint(search('heroes', 'abercrombie', 'svga', 60, debug=True))
def test():
from Queue import Queue
from threading import Event
from calibre.utils.logging import default_log
p = GoogleImages(None)
rq = Queue()
p.download_cover(default_log, rq, Event(), title='The Heroes',
authors=('Joe Abercrombie',))
print ('Downloaded', rq.qsize(), 'covers')
if __name__ == '__main__':
test()

View File

@ -19,7 +19,7 @@ class OpenLibrary(Source):
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
def download_cover(self, log, result_queue, abort, def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if 'isbn' not in identifiers: if 'isbn' not in identifiers:
return return
isbn = identifiers['isbn'] isbn = identifiers['isbn']

View File

@ -75,7 +75,7 @@ class OverDrive(Source):
# }}} # }}}
def download_cover(self, log, result_queue, abort, # {{{ def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30): title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
import mechanize import mechanize
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:

View File

@ -55,7 +55,7 @@ class Ozon(Source):
# for ozon.ru search we have to format ISBN with '-' # for ozon.ru search we have to format ISBN with '-'
isbn = _format_isbn(log, identifiers.get('isbn', None)) isbn = _format_isbn(log, identifiers.get('isbn', None))
ozonid = identifiers.get('ozon', None) ozonid = identifiers.get('ozon', None)
unk = unicode(_('Unknown')).upper() unk = unicode(_('Unknown')).upper()
if (title and title != unk) or (authors and authors != [unk]) or isbn or not ozonid: if (title and title != unk) or (authors and authors != [unk]) or isbn or not ozonid:
qItems = set([isbn, title]) qItems = set([isbn, title])
@ -64,19 +64,19 @@ class Ozon(Source):
qItems.discard(None) qItems.discard(None)
qItems.discard('') qItems.discard('')
qItems = map(_quoteString, qItems) qItems = map(_quoteString, qItems)
q = u' '.join(qItems).strip() q = u' '.join(qItems).strip()
log.info(u'search string: ' + q) log.info(u'search string: ' + q)
if isinstance(q, unicode): if isinstance(q, unicode):
q = q.encode('utf-8') q = q.encode('utf-8')
if not q: if not q:
return None return None
search_url += quote_plus(q) search_url += quote_plus(q)
else: else:
search_url = self.ozon_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % ozonid search_url = self.ozon_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % ozonid
log.debug(u'search url: %r'%search_url) log.debug(u'search url: %r'%search_url)
return search_url return search_url
# }}} # }}}
@ -250,7 +250,7 @@ class Ozon(Source):
return url return url
# }}} # }}}
def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): # {{{ def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): # {{{
cached_url = self.get_cached_cover_url(identifiers) cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None: if cached_url is None:
log.debug('No cached cover found, running identify') log.debug('No cached cover found, running identify')

View File

@ -11,6 +11,7 @@ import os
from threading import Event, Thread from threading import Event, Thread
from Queue import Queue, Empty from Queue import Queue, Empty
from io import BytesIO from io import BytesIO
from collections import Counter
from calibre.utils.date import as_utc from calibre.utils.date import as_utc
from calibre.ebooks.metadata.sources.identify import identify, msprefs from calibre.ebooks.metadata.sources.identify import identify, msprefs
@ -113,13 +114,18 @@ def single_covers(title, authors, identifiers, caches, tdir):
kwargs=dict(title=title, authors=authors, identifiers=identifiers)) kwargs=dict(title=title, authors=authors, identifiers=identifiers))
worker.daemon = True worker.daemon = True
worker.start() worker.start()
c = Counter()
while worker.is_alive(): while worker.is_alive():
try: try:
plugin, width, height, fmt, data = results.get(True, 1) plugin, width, height, fmt, data = results.get(True, 1)
except Empty: except Empty:
continue continue
else: else:
name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt) name = plugin.name
if plugin.can_get_multiple_covers:
name += '{%d}'%c[plugin.name]
c[plugin.name] += 1
name = '%s,,%s,,%s,,%s.cover'%(name, width, height, fmt)
with open(name, 'wb') as f: with open(name, 'wb') as f:
f.write(data) f.write(data)
os.mkdir(name+'.done') os.mkdir(name+'.done')

View File

@ -262,6 +262,35 @@ def from_links(container):
toc.remove(child) toc.remove(child)
return toc return toc
def find_text(node):
LIMIT = 200
pat = re.compile(r'\s+')
for child in node:
if isinstance(child, etree._Element):
text = xml2text(child).strip()
text = pat.sub(' ', text)
if len(text) < 1:
continue
if len(text) > LIMIT:
# Look for less text in a child of this node, recursively
ntext = find_text(child)
return ntext or (text[:LIMIT] + '...')
else:
return text
def from_files(container):
toc = TOC()
for spinepath in container.spine_items:
name = container.abspath_to_name(spinepath)
root = container.parsed(name)
body = XPath('//h:body')(root)
if not body:
continue
text = find_text(body[0])
if text:
toc.add(text, name)
return toc
def add_id(container, name, loc): def add_id(container, name, loc):
root = container.parsed(name) root = container.parsed(name)
body = root.xpath('//*[local-name()="body"]')[0] body = root.xpath('//*[local-name()="body"]')[0]

View File

@ -333,8 +333,8 @@ class OEBReader(object):
guide = self.oeb.guide guide = self.oeb.guide
manifest = self.oeb.manifest manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
href = elem.get('href') ref_href = elem.get('href')
path = urlnormalize(urldefrag(href)[0]) path = urlnormalize(urldefrag(ref_href)[0])
if path not in manifest.hrefs: if path not in manifest.hrefs:
corrected_href = None corrected_href = None
for href in manifest.hrefs: for href in manifest.hrefs:
@ -342,12 +342,12 @@ class OEBReader(object):
corrected_href = href corrected_href = href
break break
if corrected_href is None: if corrected_href is None:
self.logger.warn(u'Guide reference %r not found' % href) self.logger.warn(u'Guide reference %r not found' % ref_href)
continue continue
href = corrected_href ref_href = corrected_href
typ = elem.get('type') typ = elem.get('type')
if typ not in guide: if typ not in guide:
guide.add(typ, elem.get('title'), href) guide.add(typ, elem.get('title'), ref_href)
def _find_ncx(self, opf): def _find_ncx(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@toc') result = xpath(opf, '/o2:package/o2:spine/@toc')

View File

@ -16,13 +16,12 @@ from operator import attrgetter
from Queue import Queue, Empty from Queue import Queue, Empty
from io import BytesIO from io import BytesIO
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, from PyQt4.Qt import (
QApplication, QDialog, QVBoxLayout, QLabel, QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, QApplication,
QDialogButtonBox, QStyle, QStackedWidget, QWidget, QDialog, QVBoxLayout, QLabel, QDialogButtonBox, QStyle, QStackedWidget,
QTableView, QGridLayout, QFontInfo, QPalette, QTimer, QWidget, QTableView, QGridLayout, QFontInfo, QPalette, QTimer, pyqtSignal,
pyqtSignal, QAbstractTableModel, QVariant, QSize, QAbstractTableModel, QVariant, QSize, QListView, QPixmap, QModelIndex,
QListView, QPixmap, QAbstractListModel, QColor, QRect, QAbstractListModel, QColor, QRect, QTextBrowser, QStringListModel)
QTextBrowser, QStringListModel)
from PyQt4.QtWebKit import QWebView from PyQt4.QtWebKit import QWebView
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
@ -654,7 +653,7 @@ class CoversModel(QAbstractListModel): # {{{
for i, plugin in enumerate(metadata_plugins(['cover'])): for i, plugin in enumerate(metadata_plugins(['cover'])):
self.covers.append((plugin.name+'\n'+_('Searching...'), self.covers.append((plugin.name+'\n'+_('Searching...'),
QVariant(self.blank), None, True)) QVariant(self.blank), None, True))
self.plugin_map[plugin] = i+1 self.plugin_map[plugin] = [i+1]
if do_reset: if do_reset:
self.reset() self.reset()
@ -685,48 +684,82 @@ class CoversModel(QAbstractListModel): # {{{
def plugin_for_index(self, index): def plugin_for_index(self, index):
row = index.row() if hasattr(index, 'row') else index row = index.row() if hasattr(index, 'row') else index
for k, v in self.plugin_map.iteritems(): for k, v in self.plugin_map.iteritems():
if v == row: if row in v:
return k return k
def cover_keygen(self, x):
pmap = x[2]
if pmap is None:
return 1
return pmap.width()*pmap.height()
def clear_failed(self): def clear_failed(self):
# Remove entries that are still waiting
good = [] good = []
pmap = {} pmap = {}
dcovers = sorted(self.covers[1:], key=self.cover_keygen, reverse=True) def keygen(x):
cmap = {x:self.covers.index(x) for x in self.covers} pmap = x[2]
if pmap is None:
return 1
return pmap.width()*pmap.height()
dcovers = sorted(self.covers[1:], key=keygen, reverse=True)
cmap = {i:self.plugin_for_index(i) for i in xrange(len(self.covers))}
for i, x in enumerate(self.covers[0:1] + dcovers): for i, x in enumerate(self.covers[0:1] + dcovers):
if not x[-1]: if not x[-1]:
good.append(x) good.append(x)
if i > 0: plugin = cmap[i]
plugin = self.plugin_for_index(cmap[x]) if plugin is not None:
pmap[plugin] = len(good) - 1 try:
pmap[plugin].append(len(good) - 1)
except KeyError:
pmap[plugin] = [len(good)-1]
self.covers = good self.covers = good
self.plugin_map = pmap self.plugin_map = pmap
self.reset() self.reset()
def index_for_plugin(self, plugin): def pointer_from_index(self, index):
idx = self.plugin_map.get(plugin, 0) row = index.row() if hasattr(index, 'row') else index
return self.index(idx) try:
return self.covers[row][2]
except IndexError:
pass
def index_from_pointer(self, pointer):
for r, (text, scaled, pmap, waiting) in enumerate(self.covers):
if pointer == pmap:
return self.index(r)
return self.index(0)
def update_result(self, plugin_name, width, height, data): def update_result(self, plugin_name, width, height, data):
idx = None if plugin_name.endswith('}'):
for plugin, i in self.plugin_map.iteritems(): # multi cover plugin
if plugin.name == plugin_name: plugin_name = plugin_name.partition('{')[0]
idx = i plugin = [plugin for plugin in self.plugin_map if plugin.name == plugin_name]
break if not plugin:
if idx is None: return
return plugin = plugin[0]
pmap = QPixmap() last_row = max(self.plugin_map[plugin])
pmap.loadFromData(data) pmap = QPixmap()
if pmap.isNull(): pmap.loadFromData(data)
return if pmap.isNull():
self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False) return
self.dataChanged.emit(self.index(idx), self.index(idx)) self.beginInsertRows(QModelIndex(), last_row, last_row)
for rows in self.plugin_map.itervalues():
for i in xrange(len(rows)):
if rows[i] >= last_row:
rows[i] += 1
self.plugin_map[plugin].insert(-1, last_row)
self.covers.insert(last_row, self.get_item(plugin_name, pmap, waiting=False))
self.endInsertRows()
else:
# single cover plugin
idx = None
for plugin, rows in self.plugin_map.iteritems():
if plugin.name == plugin_name:
idx = rows[0]
break
if idx is None:
return
pmap = QPixmap()
pmap.loadFromData(data)
if pmap.isNull():
return
self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False)
self.dataChanged.emit(self.index(idx), self.index(idx))
def cover_pixmap(self, index): def cover_pixmap(self, index):
row = index.row() row = index.row()
@ -774,9 +807,12 @@ class CoversView(QListView): # {{{
self.m.reset_covers() self.m.reset_covers()
def clear_failed(self): def clear_failed(self):
plugin = self.m.plugin_for_index(self.currentIndex()) pointer = self.m.pointer_from_index(self.currentIndex())
self.m.clear_failed() self.m.clear_failed()
self.select(self.m.index_for_plugin(plugin).row()) if pointer is None:
self.select(0)
else:
self.select(self.m.index_from_pointer(pointer).row())
# }}} # }}}
@ -852,10 +888,11 @@ class CoversWidget(QWidget): # {{{
if num < 2: if num < 2:
txt = _('Could not find any covers for <b>%s</b>')%self.book.title txt = _('Could not find any covers for <b>%s</b>')%self.book.title
else: else:
txt = _('Found <b>%(num)d</b> covers of %(title)s. ' txt = _('Found <b>%(num)d</b> possible covers for %(title)s. '
'Pick the one you like best.')%dict(num=num-1, 'When the download completes, the covers will be sorted by size.')%dict(num=num-1,
title=self.title) title=self.title)
self.msg.setText(txt) self.msg.setText(txt)
self.msg.setWordWrap(True)
self.finished.emit() self.finished.emit()

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,26 @@ from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any author_article = 'von '
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified. and_word = ' und '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonDEKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'von '
and_word = ' und '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonESKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any aff_id = {'tag': 'charhale09-21'}
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
# when modified. 'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' y '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonESKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale09-21'}
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' y '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,22 @@ from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any author_article = 'de '
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified. and_word = ' et '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonFRKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' et '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
from calibre.gui2 import open_url from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
class AmazonITKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any aff_id = {'tag': 'httpcharles07-21'}
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
# when modified. 'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'di '
and_word = ' e '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonITKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'httpcharles07-21'}
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'di '
and_word = ' e '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin): class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '
and_word = ' and '
# This code is copy/pasted from from here to the other amazon EU. Do not
# modify it in any other amazon EU plugin. Be sure to paste it into all
# other amazon EU plugins when modified.
# ---- Copy from here to end
''' '''
For comments on the implementation, please see amazon_plugin.py For comments on the implementation, please see amazon_plugin.py
''' '''
@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout): def get_details(self, search_result, timeout):
pass pass
class AmazonUKKindleStore(AmazonEUBase):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '
and_word = ' and '

View File

@ -14,11 +14,11 @@ from functools import partial
from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog, from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,
QDialog, QVBoxLayout, QDialogButtonBox, QSize, QStackedWidget, QWidget, QDialog, QVBoxLayout, QDialogButtonBox, QSize, QStackedWidget, QWidget,
QLabel, Qt, pyqtSignal, QIcon, QTreeWidget, QGridLayout, QTreeWidgetItem, QLabel, Qt, pyqtSignal, QIcon, QTreeWidget, QGridLayout, QTreeWidgetItem,
QToolButton, QItemSelectionModel) QToolButton, QItemSelectionModel, QCursor)
from calibre.ebooks.oeb.polish.container import get_container, AZW3Container from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
from calibre.ebooks.oeb.polish.toc import ( from calibre.ebooks.oeb.polish.toc import (
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links) get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
from calibre.gui2 import Application, error_dialog, gprefs from calibre.gui2 import Application, error_dialog, gprefs
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.toc.location import ItemEdit from calibre.gui2.toc.location import ItemEdit
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
go_to_root = pyqtSignal() go_to_root = pyqtSignal()
create_from_xpath = pyqtSignal(object) create_from_xpath = pyqtSignal(object)
create_from_links = pyqtSignal() create_from_links = pyqtSignal()
create_from_files = pyqtSignal()
flatten_toc = pyqtSignal() flatten_toc = pyqtSignal()
def __init__(self, parent): def __init__(self, parent):
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
))) )))
l.addWidget(b) l.addWidget(b)
self.cfb = b = QPushButton(_('Generate ToC from &files'))
b.clicked.connect(self.create_from_files)
b.setToolTip(textwrap.fill(_(
'Generate a Table of Contents from individual files in the book.'
' Each entry in the ToC will point to the start of the file, the'
' text of the entry will be the "first line" of text from the file.'
)))
l.addWidget(b)
self.xpb = b = QPushButton(_('Generate ToC from &XPath')) self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
b.clicked.connect(self.create_from_user_xpath) b.clicked.connect(self.create_from_user_xpath)
b.setToolTip(textwrap.fill(_( b.setToolTip(textwrap.fill(_(
@ -190,7 +200,7 @@ class ItemView(QFrame): # {{{
))) )))
l.addWidget(b) l.addWidget(b)
self.fal = b = QPushButton(_('Flatten the ToC')) self.fal = b = QPushButton(_('&Flatten the ToC'))
b.clicked.connect(self.flatten_toc) b.clicked.connect(self.flatten_toc)
b.setToolTip(textwrap.fill(_( b.setToolTip(textwrap.fill(_(
'Flatten the Table of Contents, putting all entries at the top level' 'Flatten the Table of Contents, putting all entries at the top level'
@ -339,7 +349,7 @@ class ItemView(QFrame): # {{{
# }}} # }}}
class TreeWidget(QTreeWidget): class TreeWidget(QTreeWidget): # {{{
def __init__(self, parent): def __init__(self, parent):
QTreeWidget.__init__(self, parent) QTreeWidget.__init__(self, parent)
@ -357,6 +367,9 @@ class TreeWidget(QTreeWidget):
self.setAnimated(True) self.setAnimated(True)
self.setMouseTracking(True) self.setMouseTracking(True)
self.in_drop_event = False self.in_drop_event = False
self.root = self.invisibleRootItem()
self.setContextMenuPolicy(Qt.CustomContextMenu)
self.customContextMenuRequested.connect(self.show_context_menu)
def iteritems(self, parent=None): def iteritems(self, parent=None):
if parent is None: if parent is None:
@ -384,6 +397,137 @@ class TreeWidget(QTreeWidget):
ans = sorted(ans, key=lambda x:sort_map.get(x, -1), reverse=True) ans = sorted(ans, key=lambda x:sort_map.get(x, -1), reverse=True)
return ans return ans
def highlight_item(self, item):
self.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect)
self.scrollToItem(item)
def move_left(self):
item = self.currentItem()
if item is not None:
parent = item.parent()
if parent is not None:
is_expanded = item.isExpanded() or item.childCount() == 0
gp = parent.parent() or self.invisibleRootItem()
idx = gp.indexOfChild(parent)
for gc in [parent.child(i) for i in xrange(parent.indexOfChild(item)+1, parent.childCount())]:
parent.removeChild(gc)
item.addChild(gc)
parent.removeChild(item)
gp.insertChild(idx+1, item)
if is_expanded:
self.expandItem(item)
self.highlight_item(item)
def move_right(self):
item = self.currentItem()
if item is not None:
parent = item.parent() or self.invisibleRootItem()
idx = parent.indexOfChild(item)
if idx > 0:
is_expanded = item.isExpanded()
np = parent.child(idx-1)
parent.removeChild(item)
np.addChild(item)
if is_expanded:
self.expandItem(item)
self.highlight_item(item)
def move_down(self):
item = self.currentItem()
if item is None:
if self.root.childCount() == 0:
return
item = self.root.child(0)
self.highlight_item(item)
return
parent = item.parent() or self.root
idx = parent.indexOfChild(item)
if idx == parent.childCount() - 1:
# At end of parent, need to become sibling of parent
if parent is self.root:
return
gp = parent.parent() or self.root
parent.removeChild(item)
gp.insertChild(gp.indexOfChild(parent)+1, item)
else:
sibling = parent.child(idx+1)
parent.removeChild(item)
sibling.insertChild(0, item)
self.highlight_item(item)
def move_up(self):
item = self.currentItem()
if item is None:
if self.root.childCount() == 0:
return
item = self.root.child(self.root.childCount()-1)
self.highlight_item(item)
return
parent = item.parent() or self.root
idx = parent.indexOfChild(item)
if idx == 0:
# At end of parent, need to become sibling of parent
if parent is self.root:
return
gp = parent.parent() or self.root
parent.removeChild(item)
gp.insertChild(gp.indexOfChild(parent), item)
else:
sibling = parent.child(idx-1)
parent.removeChild(item)
sibling.addChild(item)
self.highlight_item(item)
def del_items(self):
for item in self.selectedItems():
p = item.parent() or self.root
p.removeChild(item)
def title_case(self):
from calibre.utils.titlecase import titlecase
for item in self.selectedItems():
t = unicode(item.data(0, Qt.DisplayRole).toString())
item.setData(0, Qt.DisplayRole, titlecase(t))
def keyPressEvent(self, ev):
if ev.key() == Qt.Key_Left and ev.modifiers() & Qt.CTRL:
self.move_left()
ev.accept()
elif ev.key() == Qt.Key_Right and ev.modifiers() & Qt.CTRL:
self.move_right()
ev.accept()
elif ev.key() == Qt.Key_Up and ev.modifiers() & Qt.CTRL:
self.move_up()
ev.accept()
elif ev.key() == Qt.Key_Down and ev.modifiers() & Qt.CTRL:
self.move_down()
ev.accept()
elif ev.key() in (Qt.Key_Delete, Qt.Key_Backspace):
self.del_items()
ev.accept()
else:
return super(TreeWidget, self).keyPressEvent(ev)
def show_context_menu(self, point):
item = self.currentItem()
if item is not None:
m = QMenu()
ci = unicode(item.data(0, Qt.DisplayRole).toString())
p = item.parent() or self.invisibleRootItem()
idx = p.indexOfChild(item)
if idx > 0:
m.addAction(QIcon(I('arrow-up.png')), _('Move "%s" up')%ci, self.move_up)
if idx + 1 < p.childCount():
m.addAction(QIcon(I('arrow-down.png')), _('Move "%s" down')%ci, self.move_down)
m.addAction(QIcon(I('trash.png')), _('Remove all selected items'), self.del_items)
if item.parent() is not None:
m.addAction(QIcon(I('back.png')), _('Unindent "%s"')%ci, self.move_left)
if idx > 0:
m.addAction(QIcon(I('forward.png')), _('Indent "%s"')%ci, self.move_right)
m.addAction(_('Change all selected items to title case'), self.title_case)
m.exec_(QCursor.pos())
# }}}
class TOCView(QWidget): # {{{ class TOCView(QWidget): # {{{
add_new_item = pyqtSignal(object, object) add_new_item = pyqtSignal(object, object)
@ -393,27 +537,43 @@ class TOCView(QWidget): # {{{
l = self.l = QGridLayout() l = self.l = QGridLayout()
self.setLayout(l) self.setLayout(l)
self.tocw = t = TreeWidget(self) self.tocw = t = TreeWidget(self)
l.addWidget(t, 0, 0, 5, 3) l.addWidget(t, 0, 0, 7, 3)
self.up_button = b = QToolButton(self) self.up_button = b = QToolButton(self)
b.setIcon(QIcon(I('arrow-up.png'))) b.setIcon(QIcon(I('arrow-up.png')))
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
l.addWidget(b, 0, 3) l.addWidget(b, 0, 3)
b.setToolTip(_('Move current entry up')) b.setToolTip(_('Move current entry up [Ctrl+Up]'))
b.clicked.connect(self.move_up) b.clicked.connect(self.move_up)
self.left_button = b = QToolButton(self)
b.setIcon(QIcon(I('back.png')))
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
l.addWidget(b, 2, 3)
b.setToolTip(_('Unindent the current entry [Ctrl+Left]'))
b.clicked.connect(self.tocw.move_left)
self.del_button = b = QToolButton(self) self.del_button = b = QToolButton(self)
b.setIcon(QIcon(I('trash.png'))) b.setIcon(QIcon(I('trash.png')))
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
l.addWidget(b, 2, 3) l.addWidget(b, 3, 3)
b.setToolTip(_('Remove all selected entries')) b.setToolTip(_('Remove all selected entries'))
b.clicked.connect(self.del_items) b.clicked.connect(self.del_items)
self.left_button = b = QToolButton(self)
b.setIcon(QIcon(I('forward.png')))
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
l.addWidget(b, 4, 3)
b.setToolTip(_('Unindent the current entry [Ctrl+Left]'))
b.clicked.connect(self.tocw.move_right)
self.down_button = b = QToolButton(self) self.down_button = b = QToolButton(self)
b.setIcon(QIcon(I('arrow-down.png'))) b.setIcon(QIcon(I('arrow-down.png')))
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
l.addWidget(b, 4, 3) l.addWidget(b, 6, 3)
b.setToolTip(_('Move current entry down')) b.setToolTip(_('Move current entry down [Ctrl+Down]'))
b.clicked.connect(self.move_down) b.clicked.connect(self.move_down)
self.expand_all_button = b = QPushButton(_('&Expand all')) self.expand_all_button = b = QPushButton(_('&Expand all'))
col = 5 col = 7
l.addWidget(b, col, 0) l.addWidget(b, col, 0)
b.clicked.connect(self.tocw.expandAll) b.clicked.connect(self.tocw.expandAll)
self.collapse_all_button = b = QPushButton(_('&Collapse all')) self.collapse_all_button = b = QPushButton(_('&Collapse all'))
@ -427,6 +587,7 @@ class TOCView(QWidget): # {{{
i.add_new_item.connect(self.add_new_item) i.add_new_item.connect(self.add_new_item)
i.create_from_xpath.connect(self.create_from_xpath) i.create_from_xpath.connect(self.create_from_xpath)
i.create_from_links.connect(self.create_from_links) i.create_from_links.connect(self.create_from_links)
i.create_from_files.connect(self.create_from_files)
i.flatten_item.connect(self.flatten_item) i.flatten_item.connect(self.flatten_item)
i.flatten_toc.connect(self.flatten_toc) i.flatten_toc.connect(self.flatten_toc)
i.go_to_root.connect(self.go_to_root) i.go_to_root.connect(self.go_to_root)
@ -444,9 +605,7 @@ class TOCView(QWidget): # {{{
return unicode(item.data(0, Qt.DisplayRole).toString()) return unicode(item.data(0, Qt.DisplayRole).toString())
def del_items(self): def del_items(self):
for item in self.tocw.selectedItems(): self.tocw.del_items()
p = item.parent() or self.root
p.removeChild(item)
def delete_current_item(self): def delete_current_item(self):
item = self.tocw.currentItem() item = self.tocw.currentItem()
@ -484,54 +643,13 @@ class TOCView(QWidget): # {{{
self.tocw.setCurrentItem(None) self.tocw.setCurrentItem(None)
def highlight_item(self, item): def highlight_item(self, item):
self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect) self.tocw.highlight_item(item)
self.tocw.scrollToItem(item)
def move_down(self):
item = self.tocw.currentItem()
if item is None:
if self.root.childCount() == 0:
return
item = self.root.child(0)
self.highlight_item(item)
return
parent = item.parent() or self.root
idx = parent.indexOfChild(item)
if idx == parent.childCount() - 1:
# At end of parent, need to become sibling of parent
if parent is self.root:
return
gp = parent.parent() or self.root
parent.removeChild(item)
gp.insertChild(gp.indexOfChild(parent)+1, item)
else:
sibling = parent.child(idx+1)
parent.removeChild(item)
sibling.insertChild(0, item)
self.highlight_item(item)
def move_up(self): def move_up(self):
item = self.tocw.currentItem() self.tocw.move_up()
if item is None:
if self.root.childCount() == 0: def move_down(self):
return self.tocw.move_down()
item = self.root.child(self.root.childCount()-1)
self.highlight_item(item)
return
parent = item.parent() or self.root
idx = parent.indexOfChild(item)
if idx == 0:
# At end of parent, need to become sibling of parent
if parent is self.root:
return
gp = parent.parent() or self.root
parent.removeChild(item)
gp.insertChild(gp.indexOfChild(parent), item)
else:
sibling = parent.child(idx-1)
parent.removeChild(item)
sibling.addChild(item)
self.highlight_item(item)
def update_status_tip(self, item): def update_status_tip(self, item):
c = item.data(0, Qt.UserRole).toPyObject() c = item.data(0, Qt.UserRole).toPyObject()
@ -671,6 +789,14 @@ class TOCView(QWidget): # {{{
_('No links were found that could be added to the Table of Contents.'), show=True) _('No links were found that could be added to the Table of Contents.'), show=True)
self.insert_toc_fragment(toc) self.insert_toc_fragment(toc)
def create_from_files(self):
toc = from_files(self.ebook)
if len(toc) == 0:
return error_dialog(self, _('No items found'),
_('No files were found that could be added to the Table of Contents.'), show=True)
self.insert_toc_fragment(toc)
# }}} # }}}
class TOCEditor(QDialog): # {{{ class TOCEditor(QDialog): # {{{

View File

@ -22507,7 +22507,7 @@ msgstr "Autoren beginnend mit '%s'"
#: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3477 #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3477
#, python-format #, python-format
msgid "Authors beginning with '%s'" msgid "Authors beginning with '%s'"
msgstr "Autoren beginnen mit mit %s" msgstr "Autoren beginnen mit %s"
#: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3518 #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3518
msgid "NCX for Recently Added" msgid "NCX for Recently Added"