Merge from trunk

This commit is contained in:
Charles Haley 2013-04-06 14:51:29 +02:00
commit 2f927765a5
141 changed files with 25241 additions and 17757 deletions

View File

@ -40,6 +40,7 @@ recipes/.gitignore
recipes/README.md
recipes/icon_checker.py
recipes/readme_updater.py
recipes/garfield.recipe
recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe
@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
recipes/tv_tvpuls.recipe
recipes/tv_viasathistory.recipe
recipes/icons/katalog_egazeciarz.png
recipes/icons/garfield.png
recipes/icons/tv_axnscifi.png
recipes/icons/tv_comedycentral.png
recipes/icons/tv_discoveryscience.png

View File

@ -20,6 +20,58 @@
# new recipes:
# - title:
- version: 0.9.26
date: 2013-04-05
new features:
- title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
- title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
tickets: [1163520]
- title: "ToC Editor: Add buttons to indent/unindent the current entry"
- title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
- title: "Column icons: Allow use of wide images as column icons"
- title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
tickets: [1162293,1163115]
bug fixes:
- title: "PDF Output: Fix generating page numbers causing links to not work."
tickets: [1162573]
- title: "Wrong filename output in error message when 'Guide reference not found'"
tickets: [1163659]
- title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
- title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
tickets: [1162054]
- title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
tickets: [1161999]
improved recipes:
- Financial Times UK
- Sing Tao Daily
- Apple Daily
- A List Apart
- Business Week
- Harpers printed edition
- Harvard Business Review
new recipes:
- title: AM730
author: Eddie Lau
- title: Arret sur images
author: Francois D
- title: Diario de Noticias
author: Jose Pinto
- version: 0.9.25
date: 2013-03-29

290
recipes/am730.recipe Normal file
View File

@ -0,0 +1,290 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2013, Eddie Lau'
__Date__ = ''
__HiResImg__ = True
'''
Change Log:
2013/03/30 -- first version
'''
from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe):
title = u'AM730'
__author__ = 'Eddie Lau'
publisher = 'AM730'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
language = 'zh'
encoding = 'utf-8'
auto_cleanup = False
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
description = 'http://www.am730.com.hk'
category = 'Chinese, News, Hong Kong'
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
dict(name='div', attrs={'class':'thecontent wordsnap'}),
dict(name='a', attrs={'class':'lightboximg'})]
remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at HKT 6am, all news are available
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
def get_fetchdate(self):
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
# Note: does not work with custom date given by __Date__
def get_weekday(self):
return self.get_dtlocal().weekday()
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def parse_index(self):
feeds = []
soup = self.index_to_soup('http://www.am730.com.hk/')
ul = soup.find(attrs={'class':'nav-section'})
sectionList = []
for li in ul.findAll('li'):
a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
title = li.find('a').get('title', False).strip()
sectionList.append((title, a))
for title, url in sectionList:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def parse_section(self, url):
soup = self.index_to_soup(url)
items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
current_articles = []
for item in items:
a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
title = self.tag_to_string(a)
description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
current_articles.append({'title': title, 'url': articlelink, 'description': description})
return current_articles
def preprocess_html(self, soup):
multia = soup.findAll('a')
for a in multia:
if not (a == None):
image = a.find('img')
if not (image == None):
if __HiResImg__:
image['src'] = image.get('src').replace('/thumbs/', '/')
caption = image.get('alt')
tag = Tag(soup, "photo", [])
tag2 = Tag(soup, "photocaption", [])
tag.insert(0, image)
if not caption == None:
tag2.insert(0, caption)
tag.insert(1, tag2)
a.replaceWith(tag)
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
title = self.short_title()
if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
article_titles, aseen = [], set()
for f in feeds:
for a in f:
if a.title and a.title not in aseen:
aseen.add(a.title)
article_titles.append(force_unicode(a.title, 'utf-8'))
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles))
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -1,161 +1,275 @@
# -*- coding: utf-8 -*-
import re
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2013, Eddie Lau'
__Date__ = ''
from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe):
title = u'蘋果日報'
__author__ = u'蘋果日報'
__publisher__ = u'蘋果日報'
description = u'蘋果日報'
masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
language = 'zh_TW'
encoding = 'UTF-8'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = False
title = u'蘋果日報 (香港)'
__author__ = 'Eddie Lau'
publisher = '蘋果日報'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
language = 'zh'
encoding = 'utf-8'
auto_cleanup = False
remove_javascript = True
remove_tags_before = dict(name=['ul', 'h1'])
remove_tags_after = dict(name='form')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style', 'form'])]
use_embedded_content = False
no_stylesheets = True
extra_css = '''
@font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
body {margin-right: 8pt; font-family: 'uming', serif;}
h1 {font-family: 'uming', serif, sans-serif}
'''
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
description = 'http://hkm.appledaily.com/'
category = 'Chinese, News, Hong Kong'
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
preprocess_regexps = [
(re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
]
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
dict(name='p', attrs={'class':'next'})]
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at HKT 6am, all news are available
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
def get_fetchdate(self):
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
# Note: does not work with custom date given by __Date__
def get_weekday(self):
return self.get_dtlocal().weekday()
def get_cover_url(self):
return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
#def get_browser(self):
#br = BasicNewsRecipe.get_browser(self)
#if self.username is not None and self.password is not None:
# br.open('http://www.nytimes.com/auth/login')
# br.select_form(name='login')
# br['USERID'] = self.username
# br['PASSWORD'] = self.password
# br.submit()
#return br
def preprocess_html(self, soup):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['src'] = iurl
#else:
#print 'not good'
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
iurl = tag['href']
#print 'checking image: ' + iurl
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
m = p.search(iurl)
if m is not None:
iurl = 'http://' + m.group('server') + '/' + m.group('path')
#print 'working! new url: ' + iurl
tag['href'] = iurl
#else:
#print 'not good'
return soup
soup = self.index_to_soup('http://hkm.appledaily.com/')
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover)
except:
cover = None
return cover
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def parse_index(self):
base = 'http://news.hotpot.hk/fruit'
soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
feeds = []
soup = self.index_to_soup('http://hkm.appledaily.com/')
ul = soup.find(attrs={'class':'menu'})
sectionList = []
for li in ul.findAll('li'):
a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
title = li.find('a', text=True).strip()
if not title == u'動新聞':
sectionList.append((title, a))
for title, url in sectionList:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
#def feed_title(div):
# return ''.join(div.findAll(text=True, recursive=False)).strip()
def parse_section(self, url):
soup = self.index_to_soup(url)
ul = soup.find(attrs={'class':'list'})
current_articles = []
for li in ul.findAll('li'):
a = li.find('a', href=True)
title = li.find('p', text=True).strip()
if a is not None:
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
pass
return current_articles
articles = {}
key = None
ans = []
for div in soup.findAll('li'):
key = div.find(text=True, recursive=True);
#if key == u'豪情':
# continue;
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
title = self.short_title()
if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
article_titles, aseen = [], set()
for f in feeds:
for a in f:
if a.title and a.title not in aseen:
aseen.add(a.title)
article_titles.append(force_unicode(a.title, 'utf-8'))
print 'section=' + key
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles))
articles[key] = []
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
ans.append(key)
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
a = div.find('a', href=True)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
if not a:
continue
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
url = base + '/' + a['href']
print 'url=' + url
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
if not articles.has_key(key):
articles[key] = []
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
# sub page
subSoup = self.index_to_soup(url)
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
for subDiv in subSoup.findAll('li'):
subA = subDiv.find('a', href=True)
subTitle = subDiv.find(text=True, recursive=True)
subUrl = base + '/' + subA['href']
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
print 'subUrl' + subUrl
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
articles[key].append(
dict(title=subTitle,
url=subUrl,
date='',
description='',
content=''))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
# elif div['class'] in ['story', 'story headline']:
# a = div.find('a', href=True)
# if not a:
# continue
# url = re.sub(r'\?.*', '', a['href'])
# url += '?pagewanted=all'
# title = self.tag_to_string(a, use_alt=True).strip()
# description = ''
# pubdate = strftime('%a, %d %b')
# summary = div.find(True, attrs={'class':'summary'})
# if summary:
# description = self.tag_to_string(summary, use_alt=False)
#
# feed = key if key is not None else 'Uncategorized'
# if not articles.has_key(feed):
# articles[feed] = []
# if not 'podcasts' in url:
# articles[feed].append(
# dict(title=title, url=url, date=pubdate,
# description=description,
# content=''))
# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
return ans
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
max_articles_per_feed = 12
max_articles_per_feed = 20
linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
language = 'en_GB'
compress_news_images = True
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'

View File

@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language
}
#remove_tags = [
#dict(attrs={'class':'inStory'})
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
#,dict(attrs={'id':['inset','videoDisplay']})
#]
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
remove_attributes = ['lang']
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
feeds = [
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
(u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
]
def get_article_url(self, article):
url = article.get('guid', None)
if 'podcasts' in url:
return None
if 'surveys' in url:
return None
if 'images' in url:
return None
if 'feedroom' in url:
return None
if '/magazine/toc/' in url:
return None
rurl, sep, rest = url.rpartition('?')
if rurl:
return rurl
return rest
def print_version(self, url):
if '/news/' in url or '/blog/ in url':
return url
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
return rurl.replace('/investing/','/investor/')
soup = self.index_to_soup(url)
prntver = soup.find('li', attrs={'class':'print tracked'})
rurl = prntver.find('a', href=True)['href']
return rurl
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict
@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
title=self.tag_to_string(div.a).strip()
url=div.a['href']
soup0 = self.index_to_soup(url)
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
title=self.tag_to_string(div.a).strip()
url=div.a['href']
soup0 = self.index_to_soup(url)
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
if articles:

View File

@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
__author__ = 'Dave Asbury'
description = 'The official website of Countryfile Magazine'
# last updated 8/12/12
# last updated 19/10/12
language = 'en_GB'
oldest_article = 30
max_articles_per_feed = 25
remove_empty_feeds = True
no_stylesheets = True
auto_cleanup = True
compress_news_images = True
ignore_duplicate_articles = {'title', 'url'}
#articles_are_obfuscated = True
#article_already_exists = False

View File

@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
compress_news_images = True
oldest_article = 1
max_articles_per_feed = 1
max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True

View File

@ -0,0 +1,23 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
title ='Diário de Notícias'
oldest_article = 7
language = 'pt'
__author__ = 'Jose Pinto'
max_articles_per_feed = 100
keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
remove_tags = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
]

View File

@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
language = 'pl'
encoding = 'utf-8'
INDEX = 'http://www.esensja.pl'
extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
.t-author {font-size: x-small; text-align: left}
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
.text {font-size: small; text-align: left}
.annot-ref {font-style: italic; text-align: left}
'''
cover_url = ''
masthead_url = 'http://esensja.pl/img/wrss.gif'
use_embedded_content = False

View File

@ -8,6 +8,7 @@ import datetime
from calibre.ptempfile import PersistentTemporaryFile
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from collections import OrderedDict
class FinancialTimes(BasicNewsRecipe):
title = 'Financial Times (UK)'
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
return articles
def parse_index(self):
feeds = []
feeds = OrderedDict()
soup = self.index_to_soup(self.INDEX)
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
self.timefmt = ' [%s]'%dates
wide = soup.find('div',attrs={'class':'wide'})
if not wide:
return feeds
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
if not allsections:
return feeds
count = 0
for item in allsections:
count = count + 1
if self.test and count > 2:
return feeds
fitem = item.h3
if not fitem:
fitem = item.h4
ftitle = self.tag_to_string(fitem)
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
feedarts = self.get_artlinks(item.ul)
feeds.append((ftitle,feedarts))
return feeds
#dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
#self.timefmt = ' [%s]'%dates
section_title = 'Untitled'
for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
for section in column. findAll('div', attrs = {'class':'feedBox'}):
sectiontitle=self.tag_to_string(section.find('h4'))
if '...' not in sectiontitle: section_title=sectiontitle
for article in section.ul.findAll('li'):
articles = []
title=self.tag_to_string(article.a)
url=article.a['href']
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
if articles:
if section_title not in feeds:
feeds[section_title] = []
feeds[section_title] += articles
ans = [(key, val) for key, val in feeds.iteritems()]
return ans
def preprocess_html(self, soup):
items = ['promo-box','promo-title',
@ -177,6 +179,3 @@ class FinancialTimes(BasicNewsRecipe):
tfile.close()
self.temp_files.append(tfile)
return tfile.name
def cleanup(self):
self.browser.open('https://registration.ft.com/registration/login/logout?location=')

53
recipes/forbes_pl.recipe Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import datetime
import re
class forbes_pl(BasicNewsRecipe):
title = u'Forbes.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
oldest_article = 1
index = 'http://www.forbes.pl'
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
max_articles_per_feed = 100
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
remove_javascript = True
no_stylesheets = True
now = datetime.datetime.now()
yesterday = now - datetime.timedelta(hours=24)
yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
pages_count = 4
keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
'''def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup
def append_page(self, soup, appendtag):
cleanup = False
nexturl = appendtag.find('a', attrs={'class':'next'})
if nexturl:
cleanup = True
while nexturl:
soup2 = self.index_to_soup(self.index + nexturl['href'])
nexturl = soup2.find('a', attrs={'class':'next'})
pagetext = soup2.findAll(id='article-body-wrapper')
if not pagetext:
pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if cleanup:
for r in appendtag.findAll(attrs={'class':'paginator'}):
r.extract()'''

108
recipes/galaxys_edge.recipe Normal file
View File

@ -0,0 +1,108 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
from calibre.web.feeds.news import BasicNewsRecipe
class GalaxyEdge(BasicNewsRecipe):
title = u'The Galaxy\'s Edge'
language = 'en'
oldest_article = 7
__author__ = 'Krittika Goyal'
no_stylesheets = True
auto_cleanup = True
#keep_only_tags = [dict(id='content')]
#remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
#dict(id=['email-section', 'right-column', 'printfooter', 'topover',
#'slidebox', 'th_footer'])]
extra_css = '.photo-caption { font-size: smaller }'
def parse_index(self):
soup = self.index_to_soup('http://www.galaxysedge.com/')
main = soup.find('table', attrs={'width':'911'})
toc = main.find('td', attrs={'width':'225'})
current_section = None
current_articles = []
feeds = []
c = 0
for x in toc.findAll(['p']):
c = c+1
if c == 5:
if current_articles and current_section:
feeds.append((current_section, current_articles))
edwo = x.find('a')
current_section = self.tag_to_string(edwo)
current_articles = []
self.log('\tFound section:', current_section)
title = self.tag_to_string(edwo)
url = edwo.get('href', True)
url = 'http://www.galaxysedge.com/'+url
print(title)
print(c)
if not url or not title:
continue
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url,
'description':'', 'date':''})
elif c>5:
current_section = self.tag_to_string(x.find('b'))
current_articles = []
self.log('\tFound section:', current_section)
for y in x.findAll('a'):
title = self.tag_to_string(y)
url = y.get('href', True)
url = 'http://www.galaxysedge.com/'+url
print(title)
if not url or not title:
continue
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url,
'description':'', 'date':''})
if current_articles and current_section:
feeds.append((current_section, current_articles))
return feeds
#def preprocess_raw_html(self, raw, url):
#return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
#def postprocess_html(self, soup, first_fetch):
#for t in soup.findAll(['table', 'tr', 'td','center']):
#t.name = 'div'
#return soup
#def parse_index(self):
#today = time.strftime('%Y-%m-%d')
#soup = self.index_to_soup(
#'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
#div = soup.find(id='left-column')
#feeds = []
#current_section = None
#current_articles = []
#for x in div.findAll(['h3', 'div']):
#if current_section and x.get('class', '') == 'tpaper':
#a = x.find('a', href=True)
#if a is not None:
#current_articles.append({'url':a['href']+'?css=print',
#'title':self.tag_to_string(a), 'date': '',
#'description':''})
#if x.name == 'h3':
#if current_section and current_articles:
#feeds.append((current_section, current_articles))
#current_section = self.tag_to_string(x)
#current_articles = []
#return feeds

View File

@ -10,7 +10,7 @@ krakow.gazeta.pl
from calibre.web.feeds.news import BasicNewsRecipe
class gw_krakow(BasicNewsRecipe):
title = u'Gazeta.pl Kraków'
title = u'Gazeta Wyborcza Kraków'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'

View File

@ -5,7 +5,7 @@ import string
from calibre.web.feeds.news import BasicNewsRecipe
class GazetaPlSzczecin(BasicNewsRecipe):
title = u'Gazeta.pl Szczecin'
title = u'Gazeta Wyborcza Szczecin'
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
__author__ = u'Michał Szkutnik'
__license__ = u'GPL v3'

View File

@ -10,7 +10,7 @@ warszawa.gazeta.pl
from calibre.web.feeds.news import BasicNewsRecipe
class gw_wawa(BasicNewsRecipe):
title = u'Gazeta.pl Warszawa'
title = u'Gazeta Wyborcza Warszawa'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta.pl'
title = u'Gazeta Wyborcza'
__author__ = 'fenuks, Artur Stachecki'
language = 'pl'
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'

View File

@ -1,6 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe):
@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
timefmt = ' [%B %Y]'
language = 'en'
no_stylesheets = True
# recipe_disabled = ('hbr.org has started requiring the use of javascript'
# ' to log into their website. This is unsupported in calibre, so'
# ' this recipe has been disabled. If you would like to see '
# ' HBR supported in calibre, contact hbr.org and ask them'
# ' to provide a javascript free login method.')
LOGIN_URL = 'https://hbr.org/login?request_url=/'
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
INDEX = 'http://hbr.org/archive-toc/BR'
INDEX = 'http://hbr.org'
keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')]
extra_css = '''
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def hbr_get_toc(self):
# return self.index_to_soup(open('/t/toc.html').read())
today = date.today()
future = today + timedelta(days=30)
past = today - timedelta(days=30)
for x in [x.strftime('%y%m') for x in (future, today, past)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if (not soup.find(text='Issue Not Found') and not soup.find(
text="We're Sorry. There was an error processing your request")
and 'Exception: java.io.FileNotFoundException' not in
unicode(soup)):
return soup
raise Exception('Could not find current issue')
def hbr_parse_toc(self, soup):
feeds = []
current_section = None
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):
articles.append({'title':title, 'url':url, 'description':desc,
'date':''})
if current_section is not None and articles:
feeds.append((current_section, articles))
return feeds
def parse_index(self):
soup = self.hbr_get_toc()
# open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
soup0 = self.index_to_soup('http://hbr.org/magazine')
datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
#find date & cover
self.cover_url=datencover.img['src']
dates=self.tag_to_string(datencover.img['alt'])
self.timefmt = u' [%s]'%dates
soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
feeds = self.hbr_parse_toc(soup)
return feeds
def get_cover_url(self):
cover_url = None
index = 'http://hbr.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
if link_item:
cover_url = 'http://hbr.org' + link_item['src']
return cover_url

BIN
recipes/icons/forbes_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 802 B

After

Width:  |  Height:  |  Size: 294 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 802 B

After

Width:  |  Height:  |  Size: 294 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 802 B

After

Width:  |  Height:  |  Size: 294 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 802 B

After

Width:  |  Height:  |  Size: 294 B

BIN
recipes/icons/slashdot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 511 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 B

View File

@ -1,33 +1,23 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class AListApart (BasicNewsRecipe):
__author__ = u'Marc Busqué <marc@lamarciana.com>'
__author__ = 'Marc Busqué <marc@lamarciana.com>'
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0'
__version__ = '2.0'
__license__ = 'GPL v3'
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'A List Apart'
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
language = 'en'
tags = 'web development, software'
oldest_article = 120
remove_empty_feeds = True
no_stylesheets = True
encoding = 'utf8'
cover_url = u'http://alistapart.com/pix/alalogo.gif'
keep_only_tags = [
dict(name='div', attrs={'id': 'content'})
]
remove_tags = [
dict(name='ul', attrs={'id': 'metastuff'}),
dict(name='div', attrs={'class': 'discuss'}),
dict(name='div', attrs={'class': 'discuss'}),
dict(name='div', attrs={'id': 'learnmore'}),
]
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
feeds = [
(u'A List Apart', u'http://www.alistapart.com/site/rss'),
(u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
]

View File

@ -6,10 +6,10 @@ import time
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
description = 'News as provided by The Metro -UK'
description = 'News from The Metro, UK'
#timefmt = ''
__author__ = 'fleclerc & Dave Asbury'
#last update 20/1/13
__author__ = 'Dave Asbury'
#last update 4/4/13
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
compress_news_images = True
def parse_index(self):
articles = {}
key = None

View File

@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
max_articles_per_feed = 20
#auto_cleanup = True
language = 'en_GB'
compress_news_images = True
def get_cover_url(self):
soup = self.index_to_soup('http://www.nme.com/component/subscribe')

View File

@ -1,30 +1,30 @@
# vim:fileencoding=UTF-8
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2011, Eddie Lau'
__copyright__ = '2011-2013, Eddie Lau'
# data source: normal, mobile
__Source__ = 'mobile'
# please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True
# Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False
__UseChineseTitle__ = True
# Set it to False if you want to skip images (Default: True)
__KeepImages__ = True
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
__IncludeSummary__ = True
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
'''
Change Log:
2013/03/31 -- fix cover retrieval code and heading size, and remove &nbsp; in summary
2011/12/29 -- first version done
TODO:
* use alternative source at http://m.singtao.com/index.php
'''
from calibre.utils.date import now as nowf
import os, datetime, re
from datetime import date
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
title = 'Sing Tao Daily - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
if __Source__ == 'normal':
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
#cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29
base = 2660
todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
diff = todaydate - date(2011, 12, 29)
base = base + int(diff.total_seconds()/(3600*24))
cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
soup = self.index_to_soup('http://m.singtao.com/')
cover = soup.find(attrs={'class':'special'}).get('src', False)
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover)
except:
cover = 'http://singtao.com/images/stlogo.gif'
cover = None
return cover
def parse_index(self):
@ -293,7 +289,7 @@ class STHKRecipe(BasicNewsRecipe):
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = self.tag_to_string(p).strip().replace('&nbsp;', '')
if len(summary_candidate) > 0:
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
article.summary = article.text_summary = summary_candidate
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class sportowefakty(BasicNewsRecipe):
title = u'SportoweFakty'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
language = 'pl'
description = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!'
oldest_article = 1
masthead_url='http://www.sportowefakty.pl/images/logo.png'
max_articles_per_feed = 100
simultaneous_downloads = 5
use_embedded_content=False
remove_javascript=True
no_stylesheets=True
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs = {'class' : 'box-article'})]
remove_tags =[]
remove_tags.append(dict(attrs = {'class' : re.compile(r'^newsStream')}))
remove_tags.append(dict(attrs = {'target' : '_blank'}))
feeds = [
(u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'),
(u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'),
(u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'),
(u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'),
(u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'),
(u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'),
(u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'),
(u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss')
]
def get_article_url(self, article):
link = article.get('link', None)
if 'utm_source' in link:
return link.split('?utm')[0]
else:
return link
def print_version(self, url):
print_url = url + '/drukuj'
return print_url
def preprocess_html(self, soup):
head = soup.find('h1')
if 'Fotorelacja' in self.tag_to_string(head):
return None
else:
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
no_stylesheets = True
ignore_duplicate_articles = {'title','url'}
compress_news_images = True
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}

View File

@ -6,17 +6,62 @@ __license__ = 'GPL v3'
www.canada.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
class TimesColonist(BasicNewsRecipe):
# Customization -- remove sections you don't want.
# If your e-reader is an e-ink Kindle and your output profile is
# set properly this recipe will not include images because the
# resulting file is too large. If you have one of these and want
# images you can set kindle_omit_images = False
# and remove sections (typically the e-ink Kindles will
# work with about a dozen of these, but your mileage may vary).
kindle_omit_images = True
section_list = [
('','Web Front Page'),
('news/','News Headlines'),
('news/b-c/','BC News'),
('news/national/','National News'),
('news/world/','World News'),
('opinion/','Opinion'),
('opinion/letters/','Letters'),
('business/','Business'),
('business/money/','Money'),
('business/technology/','Technology'),
('business/working/','Working'),
('sports/','Sports'),
('sports/hockey/','Hockey'),
('sports/football/','Football'),
('sports/basketball/','Basketball'),
('sports/golf/','Golf'),
('entertainment/','entertainment'),
('entertainment/go/','Go!'),
('entertainment/music/','Music'),
('entertainment/books/','Books'),
('entertainment/Movies/','Movies'),
('entertainment/television/','Television'),
('life/','Life'),
('life/health/','Health'),
('life/travel/','Travel'),
('life/driving/','Driving'),
('life/homes/','Homes'),
('life/food-drink/','Food & Drink')
]
title = u'Victoria Times Colonist'
url_prefix = 'http://www.timescolonist.com'
description = u'News from Victoria, BC'
fp_tag = 'CAN_TC'
masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
'''
keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
remove_tags = [{'class':'comments'},
def __init__(self, options, log, progress_reporter):
self.remove_tags = [{'class':'comments'},
{'id':'photocredit'},
dict(name='div', attrs={'class':re.compile('top.controls')}),
dict(name='div', attrs={'class':re.compile('^comments')}),
dict(name='div', attrs={'class':re.compile('social')}),
dict(name='div', attrs={'class':re.compile('tools')}),
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
dict(name='div', attrs={'class':re.compile('window')}),
dict(name='div', attrs={'class':re.compile('related.news.element')})]
print("PROFILE NAME = "+options.output_profile.short_name)
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
BasicNewsRecipe.__init__(self, options, log, progress_reporter)
def get_cover_url(self):
from datetime import timedelta, date
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
def preprocess_html(self,soup):
byline = soup.find('p',attrs={'class':re.compile('ancillary')})
if byline is not None:
byline.find('a')
authstr = self.tag_to_string(byline,False)
authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
atag = htag.a
if atag is not None:
url = atag['href']
#print("Checking "+url)
if atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
url = url.strip()
# print("Checking >>"+url+'<<\n\r')
if url.startswith('/'):
url = self.url_prefix+url
if url in self.url_list:
return
self.url_list.append(url)
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
if dtag is not None:
description = self.tag_to_string(dtag,False)
article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
#print(sectitle+title+": description = "+description+" URL="+url)
print(sectitle+title+": description = "+description+" URL="+url+'\n\r')
def add_section_index(self,ans,securl,sectitle):
print("Add section url="+self.url_prefix+'/'+securl)
print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
try:
soup = self.index_to_soup(self.url_prefix+'/'+securl)
except:
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):
def parse_index(self):
ans = []
ans = self.add_section_index(ans,'','Web Front Page')
ans = self.add_section_index(ans,'news/','News Headlines')
ans = self.add_section_index(ans,'news/b-c/','BC News')
ans = self.add_section_index(ans,'news/national/','Natioanl News')
ans = self.add_section_index(ans,'news/world/','World News')
ans = self.add_section_index(ans,'opinion/','Opinion')
ans = self.add_section_index(ans,'opinion/letters/','Letters')
ans = self.add_section_index(ans,'business/','Business')
ans = self.add_section_index(ans,'business/money/','Money')
ans = self.add_section_index(ans,'business/technology/','Technology')
ans = self.add_section_index(ans,'business/working/','Working')
ans = self.add_section_index(ans,'sports/','Sports')
ans = self.add_section_index(ans,'sports/hockey/','Hockey')
ans = self.add_section_index(ans,'sports/football/','Football')
ans = self.add_section_index(ans,'sports/basketball/','Basketball')
ans = self.add_section_index(ans,'sports/golf/','Golf')
ans = self.add_section_index(ans,'entertainment/','entertainment')
ans = self.add_section_index(ans,'entertainment/go/','Go!')
ans = self.add_section_index(ans,'entertainment/music/','Music')
ans = self.add_section_index(ans,'entertainment/books/','Books')
ans = self.add_section_index(ans,'entertainment/Movies/','movies')
ans = self.add_section_index(ans,'entertainment/television/','Television')
ans = self.add_section_index(ans,'life/','Life')
ans = self.add_section_index(ans,'life/health/','Health')
ans = self.add_section_index(ans,'life/travel/','Travel')
ans = self.add_section_index(ans,'life/driving/','Driving')
ans = self.add_section_index(ans,'life/homes/','Homes')
ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
for (url,title) in self.section_list:
ans = self.add_section_index(ans,url,title)
return ans

View File

@ -1,144 +0,0 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
class GazetaWyborczaDuzyForma(BasicNewsRecipe):
cover_url = 'http://bi.gazeta.pl/im/8/5415/m5415058.gif'
title = u"Gazeta Wyborcza Duzy Format"
__author__ = 'ravcio - rlelusz[at]gmail.com'
description = u"Articles from Gazeta's website"
language = 'pl'
max_articles_per_feed = 50 #you can increade it event up to maybe 600, should still work
recursions = 0
encoding = 'iso-8859-2'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
keep_only_tags = [
dict(name='div', attrs={'id':['k1']})
]
remove_tags = [
dict(name='div', attrs={'class':['zdjM', 'rel_video', 'zdjP', 'rel_box', 'index mod_zi_dolStrony']})
,dict(name='div', attrs={'id':['source', 'banP4', 'article_toolbar', 'rel', 'inContext_disabled']})
,dict(name='ul', attrs={'id':['articleToolbar']})
,dict(name='img', attrs={'class':['brand']})
,dict(name='h5', attrs={'class':['author']})
,dict(name='h6', attrs={'class':['date']})
,dict(name='p', attrs={'class':['txt_upl']})
]
remove_tags_after = [
dict(name='div', attrs={'id':['Str']}) #nawigator numerow linii
]
def load_article_links(self, url, count):
print '--- load_article_links', url, count
#page with link to articles
soup = self.index_to_soup(url)
#table with articles
list = soup.find('div', attrs={'class':'GWdalt'})
#single articles (link, title, ...)
links = list.findAll('div', attrs={'class':['GWdaltE']})
if len(links) < count:
#load links to more articles...
#remove new link
pages_nav = list.find('div', attrs={'class':'pages'})
next = pages_nav.find('a', attrs={'class':'next'})
if next:
print 'next=', next['href']
url = 'http://wyborcza.pl' + next['href']
#e.g. url = 'http://wyborcza.pl/0,75480.html?str=2'
older_links = self.load_article_links(url, count - len(links))
links.extend(older_links)
return links
#produce list of articles to download
def parse_index(self):
print '--- parse_index'
max_articles = 8000
links = self.load_article_links('http://wyborcza.pl/0,75480.html', max_articles)
ans = []
key = None
articles = {}
key = 'Uncategorized'
articles[key] = []
for div_art in links:
div_date = div_art.find('div', attrs={'class':'kL'})
div = div_art.find('div', attrs={'class':'kR'})
a = div.find('a', href=True)
url = a['href']
title = a.string
description = ''
pubdate = div_date.string.rstrip().lstrip()
summary = div.find('span', attrs={'class':'lead'})
desc = summary.find('a', href=True)
if desc:
desc.extract()
description = self.tag_to_string(summary, use_alt=False)
description = description.rstrip().lstrip()
feed = key if key is not None else 'Duzy Format'
if not articles.has_key(feed):
articles[feed] = []
if description != '': # skip just pictures atricle
articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
ans = [(key, articles[key])]
return ans
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'id':'Str'})
if pager:
#seek for 'a' element with nast value (if not found exit)
list = pager.findAll('a')
for elem in list:
if 'nast' in elem.string:
nexturl = elem['href']
soup2 = self.index_to_soup('http://warszawa.gazeta.pl' + nexturl)
texttag = soup2.find('div', attrs={'id':'artykul'})
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
# finally remove some tags
pager = soup.find('div',attrs={'id':'Str'})
if pager:
pager.extract()
pager = soup.find('div',attrs={'class':'tylko_int'})
if pager:
pager.extract()
return soup

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class WysokieObcasyRecipe(BasicNewsRecipe):
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Wysokie Obcasy'
publisher = 'Agora SA'
description = u'Serwis sobotniego dodatku do Gazety Wyborczej'
category='magazine'
language = 'pl'
publication_type = 'magazine'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 5
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
remove_tags =[]
remove_tags.append(dict(name = 'img'))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'info'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: left;}
'''
feeds = [
('Wszystkie Artykuly', 'feed://www.wysokieobcasy.pl/pub/rss/wysokieobcasy.xml'),
]
def print_version(self,url):
baseURL='http://www.wysokieobcasy.pl/wysokie-obcasy'
segments = url.split(',')
subPath= '/2029020,'
articleURL1 = segments[1]
articleURL2 = segments[2]
printVerString=articleURL1 + ',' + articleURL2
s= baseURL + subPath + printVerString + '.html'
return s
def get_cover_url(self):
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -390,7 +390,6 @@
<xsl:output method = "xml"/>
<xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
@ -415,13 +414,11 @@
</xsl:template>
<xsl:template match="rtf:page-break">
<xsl:element name="br">
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
</xsl:element>
<br style = "page-break-after:always"/>
</xsl:template>
<xsl:template match="rtf:hardline-break">
<xsl:element name="br"/>
<br/>
</xsl:template>
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
@ -472,9 +469,7 @@
</xsl:template>
<xsl:template match="rtf:pict">
<xsl:element name="img">
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
</xsl:element>
<img src = "{@num}"/>
</xsl:template>
<xsl:template match="*">

View File

@ -47,6 +47,10 @@ binary_includes = [
'/usr/lib/libgthread-2.0.so.0',
'/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0',
# Ensure that libimobiledevice is compiled against openssl, not gnutls
'/usr/lib/libimobiledevice.so.3',
'/usr/lib/libusbmuxd.so.2',
'/usr/lib/libplist.so.1',
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
'/usr/lib/libgcrypt.so.11',

View File

@ -399,7 +399,8 @@ class Py2App(object):
@flush
def add_fontconfig(self):
info('\nAdding fontconfig')
for x in ('fontconfig.1', 'freetype.6', 'expat.1'):
for x in ('fontconfig.1', 'freetype.6', 'expat.1',
'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
self.install_dylib(src)
dst = os.path.join(self.resources_dir, 'fonts')

View File

@ -12,13 +12,13 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-03-27 13:07+0000\n"
"PO-Revision-Date: 2013-03-28 13:01+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-03-28 04:41+0000\n"
"X-Launchpad-Export-Date: 2013-03-29 04:36+0000\n"
"X-Generator: Launchpad (build 16546)\n"
"Language: ca\n"
@ -1884,7 +1884,7 @@ msgstr "Awera"
#. name for aws
msgid "Awyu; South"
msgstr "Awyu meridional"
msgstr "Awyu; meridional"
#. name for awt
msgid "Araweté"
@ -1892,7 +1892,7 @@ msgstr "Araweté"
#. name for awu
msgid "Awyu; Central"
msgstr "Awyu central"
msgstr "Awyu; Central"
#. name for awv
msgid "Awyu; Jair"
@ -4052,7 +4052,7 @@ msgstr "Buginès"
#. name for buh
msgid "Bunu; Younuo"
msgstr "Bunu; Younuo"
msgstr "Bunu; Younou"
#. name for bui
msgid "Bongili"
@ -4308,7 +4308,7 @@ msgstr "Bwa"
#. name for bwx
msgid "Bunu; Bu-Nao"
msgstr "Bunu; Bu-Nao"
msgstr "Bunu; Bu Nao"
#. name for bwy
msgid "Bwamu; Cwi"
@ -19804,7 +19804,7 @@ msgstr "Minoà"
#. name for omo
msgid "Utarmbung"
msgstr ""
msgstr "Utarmbung"
#. name for omp
msgid "Manipuri; Old"
@ -20344,7 +20344,7 @@ msgstr "Pear"
#. name for pcc
msgid "Bouyei"
msgstr ""
msgstr "Buyí"
#. name for pcd
msgid "Picard"
@ -20456,11 +20456,11 @@ msgstr "Pengo"
#. name for peh
msgid "Bonan"
msgstr ""
msgstr "Bonan"
#. name for pei
msgid "Chichimeca-Jonaz"
msgstr ""
msgstr "Chichimec"
#. name for pej
msgid "Pomo; Northern"
@ -20484,7 +20484,7 @@ msgstr "Persa Antic"
#. name for pep
msgid "Kunja"
msgstr ""
msgstr "Kunja"
#. name for peq
msgid "Pomo; Southern"
@ -20536,7 +20536,7 @@ msgstr "Pagi"
#. name for pgk
msgid "Rerep"
msgstr ""
msgstr "Rerep"
#. name for pgl
msgid "Irish; Primitive"
@ -20624,7 +20624,7 @@ msgstr "Pima Baix"
#. name for pib
msgid "Yine"
msgstr ""
msgstr "Yine"
#. name for pic
msgid "Pinji"
@ -20660,7 +20660,7 @@ msgstr "Pijao"
#. name for pil
msgid "Yom"
msgstr ""
msgstr "Yom"
#. name for pim
msgid "Powhatan"
@ -20760,7 +20760,7 @@ msgstr "Llenguatge de signes pakistaní"
#. name for pkt
msgid "Maleng"
msgstr ""
msgstr "Maleng"
#. name for pku
msgid "Paku"
@ -20768,7 +20768,7 @@ msgstr "Paku"
#. name for pla
msgid "Miani"
msgstr ""
msgstr "Miani"
#. name for plb
msgid "Polonombauk"
@ -20804,7 +20804,7 @@ msgstr "Polci"
#. name for plk
msgid "Shina; Kohistani"
msgstr ""
msgstr "Shina; Kohistani"
#. name for pll
msgid "Palaung; Shwe"
@ -20852,7 +20852,7 @@ msgstr "Palawà; Brooke"
#. name for ply
msgid "Bolyu"
msgstr ""
msgstr "Bolyu"
#. name for plz
msgid "Paluan"
@ -20896,7 +20896,7 @@ msgstr "Algonquí Carolina"
#. name for pml
msgid "Lingua Franca"
msgstr ""
msgstr "Aljamia"
#. name for pmm
msgid "Pomo"
@ -20924,7 +20924,7 @@ msgstr "Piemontès"
#. name for pmt
msgid "Tuamotuan"
msgstr ""
msgstr "Tuamotu"
#. name for pmu
msgid "Panjabi; Mirpur"
@ -20972,7 +20972,7 @@ msgstr "Penrhyn"
#. name for pni
msgid "Aoheng"
msgstr ""
msgstr "Aoheng"
#. name for pnm
msgid "Punan Batu 1"
@ -21008,7 +21008,7 @@ msgstr "Pontic"
#. name for pnu
msgid "Bunu; Jiongnai"
msgstr ""
msgstr "Bunu; Jiongnai"
#. name for pnv
msgid "Pinigura"
@ -21100,7 +21100,7 @@ msgstr "Potavatomi"
#. name for pov
msgid "Crioulo; Upper Guinea"
msgstr ""
msgstr "Crioll guineà"
#. name for pow
msgid "Popoloca; San Felipe Otlaltepec"
@ -21128,7 +21128,7 @@ msgstr "Paipai"
#. name for ppk
msgid "Uma"
msgstr ""
msgstr "Uma"
#. name for ppl
msgid "Pipil"
@ -21144,7 +21144,7 @@ msgstr "Papapana"
#. name for ppo
msgid "Folopa"
msgstr ""
msgstr "Folopa"
#. name for ppp
msgid "Pelende"
@ -21180,7 +21180,7 @@ msgstr "Malecite-Passamaquoddy"
#. name for prb
msgid "Lua'"
msgstr ""
msgstr "Lua"
#. name for prc
msgid "Parachi"
@ -21220,7 +21220,7 @@ msgstr "Llenguatge de signes peruà"
#. name for prm
msgid "Kibiri"
msgstr ""
msgstr "Kibiri"
#. name for prn
msgid "Prasuni"
@ -21272,7 +21272,7 @@ msgstr "Llenguatge de signes de Providencia"
#. name for psa
msgid "Awyu; Asue"
msgstr ""
msgstr "Awyu; Asue"
#. name for psc
msgid "Persian Sign Language"
@ -21328,7 +21328,7 @@ msgstr "Llenguatge de signes portuguès"
#. name for pss
msgid "Kaulong"
msgstr ""
msgstr "Kaulong"
#. name for pst
msgid "Pashto; Central"
@ -21376,11 +21376,11 @@ msgstr "Pìamatsina"
#. name for ptt
msgid "Enrekang"
msgstr ""
msgstr "Enrekang"
#. name for ptu
msgid "Bambam"
msgstr ""
msgstr "Bambam"
#. name for ptv
msgid "Port Vato"
@ -29584,7 +29584,7 @@ msgstr ""
#. name for yir
msgid "Awyu; North"
msgstr ""
msgstr "Awyu; Septentrional"
#. name for yis
msgid "Yis"

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 25)
numeric_version = (0, 9, 26)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
from calibre.ebooks.metadata.sources.ozon import Ozon
# from calibre.ebooks.metadata.sources.google_images import GoogleImages
from calibre.ebooks.metadata.sources.google_images import GoogleImages
from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
# }}}

View File

@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep
default_disabled_plugins = set([
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
])
def is_disabled(plugin):

View File

@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
# Google covers are often poor quality (scans/errors) but they have high
# resolution, so they trump covers from better sources. So make sure they
# are only used if no other covers are found.
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
def create_log(ostream=None):
from calibre.utils.logging import ThreadSafeLog, FileStream
@ -429,6 +429,40 @@ class Source(Plugin):
mi.tags = list(map(fixcase, mi.tags))
mi.isbn = check_isbn(mi.isbn)
def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
if not urls:
log('No images found for, title: %r and authors: %r'%(title, authors))
return
from threading import Thread
import time
if prefs_name:
urls = urls[:self.prefs[prefs_name]]
if get_best_cover:
urls = urls[:1]
log('Downloading %d covers'%len(urls))
workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
for w in workers:
w.daemon = True
w.start()
alive = True
start_time = time.time()
while alive and not abort.is_set() and time.time() - start_time < timeout:
alive = False
for w in workers:
if w.is_alive():
alive = True
break
abort.wait(0.1)
def download_image(self, url, timeout, log, result_queue):
try:
ans = self.browser.open_novisit(url, timeout=timeout).read()
result_queue.put((self, ans))
log('Downloaded cover from: %s'%url)
except Exception:
self.log.exception('Failed to download cover from: %r'%url)
# }}}
# Metadata API {{{

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source, Option
def get_urls(br, tokens):
from urllib import quote_plus
from mechanize import Request
from lxml import html
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
q = b'+'.join(escaped)
url = 'http://bigbooksearch.com/books/'+q
br.open(url).read()
req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.add_header('Referer', url)
raw = br.open(req).read()
root = html.fromstring(raw.decode('utf-8'))
urls = [i.get('src') for i in root.xpath('//img[@src]')]
return urls
class BigBookSearch(Source):
name = 'Big Book Search'
description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
capabilities = frozenset(['cover'])
config_help_message = _('Configure the Big Book Search plugin')
can_get_multiple_covers = True
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
_('The maximum number of covers to process from the search result')),
)
supports_gzip_transfer_encoding = True
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if not title:
return
br = self.browser
tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
urls = get_urls(br, tokens)
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
def test():
from calibre import browser
import pprint
br = browser()
urls = get_urls(br, ['consider', 'phlebas', 'banks'])
pprint.pprint(urls)
if __name__ == '__main__':
test()

View File

@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to
class Worker(Thread):
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
Thread.__init__(self)
self.daemon = True
self.plugin = plugin
self.abort = abort
self.get_best_cover = get_best_cover
self.buf = BytesIO()
self.log = create_log(self.buf)
self.title, self.authors, self.identifiers = (title, authors,
@ -37,7 +38,7 @@ class Worker(Thread):
try:
if self.plugin.can_get_multiple_covers:
self.plugin.download_cover(self.log, self.rq, self.abort,
title=self.title, authors=self.authors, get_best_cover=True,
title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
identifiers=self.identifiers, timeout=self.timeout)
else:
self.plugin.download_cover(self.log, self.rq, self.abort,
@ -72,7 +73,7 @@ def process_result(log, result):
return (plugin, width, height, fmt, data)
def run_download(log, results, abort,
title=None, authors=None, identifiers={}, timeout=30):
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
'''
Run the cover download, putting results into the queue :param:`results`.
@ -89,7 +90,7 @@ def run_download(log, results, abort,
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
rq = Queue()
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
in plugins]
for w in workers:
w.start()
@ -163,7 +164,7 @@ def download_cover(log,
abort = Event()
run_download(log, rq, abort, title=title, authors=authors,
identifiers=identifiers, timeout=timeout)
identifiers=identifiers, timeout=timeout, get_best_cover=True)
results = []

View File

@ -39,39 +39,11 @@ class GoogleImages(Source):
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if not title:
return
from threading import Thread
import time
timeout = max(60, timeout) # Needs at least a minute
title = ' '.join(self.get_title_tokens(title))
author = ' '.join(self.get_author_tokens(authors))
urls = self.get_image_urls(title, author, log, abort, timeout)
if not urls:
log('No images found in Google for, title: %r and authors: %r'%(title, author))
return
urls = urls[:self.prefs['max_covers']]
if get_best_cover:
urls = urls[:1]
workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
for w in workers:
w.daemon = True
w.start()
alive = True
start_time = time.time()
while alive and not abort.is_set() and time.time() - start_time < timeout:
alive = False
for w in workers:
if w.is_alive():
alive = True
break
abort.wait(0.1)
def download_image(self, url, timeout, log, result_queue):
try:
ans = self.browser.open_novisit(url, timeout=timeout).read()
result_queue.put((self, ans))
log('Downloaded cover from: %s'%url)
except Exception:
self.log.exception('Failed to download cover from: %r'%url)
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
def get_image_urls(self, title, author, log, abort, timeout):
from calibre.utils.ipc.simple_worker import fork_job, WorkerError

View File

@ -262,6 +262,35 @@ def from_links(container):
toc.remove(child)
return toc
def find_text(node):
LIMIT = 200
pat = re.compile(r'\s+')
for child in node:
if isinstance(child, etree._Element):
text = xml2text(child).strip()
text = pat.sub(' ', text)
if len(text) < 1:
continue
if len(text) > LIMIT:
# Look for less text in a child of this node, recursively
ntext = find_text(child)
return ntext or (text[:LIMIT] + '...')
else:
return text
def from_files(container):
toc = TOC()
for spinepath in container.spine_items:
name = container.abspath_to_name(spinepath)
root = container.parsed(name)
body = XPath('//h:body')(root)
if not body:
continue
text = find_text(body[0])
if text:
toc.add(text, name)
return toc
def add_id(container, name, loc):
root = container.parsed(name)
body = root.xpath('//*[local-name()="body"]')[0]

View File

@ -333,8 +333,8 @@ class OEBReader(object):
guide = self.oeb.guide
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
href = elem.get('href')
path = urlnormalize(urldefrag(href)[0])
ref_href = elem.get('href')
path = urlnormalize(urldefrag(ref_href)[0])
if path not in manifest.hrefs:
corrected_href = None
for href in manifest.hrefs:
@ -342,12 +342,12 @@ class OEBReader(object):
corrected_href = href
break
if corrected_href is None:
self.logger.warn(u'Guide reference %r not found' % href)
self.logger.warn(u'Guide reference %r not found' % ref_href)
continue
href = corrected_href
ref_href = corrected_href
typ = elem.get('type')
if typ not in guide:
guide.add(typ, elem.get('title'), href)
guide.add(typ, elem.get('title'), ref_href)
def _find_ncx(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@toc')

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading
store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,26 @@ from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
author_article = 'von '
and_word = ' und '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout):
pass
class AmazonDEKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'von '
and_word = ' und '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading
store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
class AmazonESKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
aff_id = {'tag': 'charhale09-21'}
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' y '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout):
pass
class AmazonESKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale09-21'}
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' y '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading
store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,13 +18,22 @@ from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
author_article = 'de '
and_word = ' et '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout):
pass
class AmazonFRKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'de '
and_word = ' et '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading
store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -18,12 +18,25 @@ from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store.search_result import SearchResult
class AmazonITKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
aff_id = {'tag': 'httpcharles07-21'}
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'di '
and_word = ' e '
# ---- Copy from here to end
class AmazonEUBase(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout):
pass
class AmazonITKindleStore(AmazonEUBase):
'''
For comments on the implementation, please see amazon_plugin.py
'''
aff_id = {'tag': 'httpcharles07-21'}
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'di '
and_word = ' e '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading
store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
# when modified.
class AmazonEUBase(StorePlugin):
class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '
and_word = ' and '
# This code is copy/pasted from from here to the other amazon EU. Do not
# modify it in any other amazon EU plugin. Be sure to paste it into all
# other amazon EU plugins when modified.
# ---- Copy from here to end
'''
For comments on the implementation, please see amazon_plugin.py
'''
@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin):
def get_details(self, search_result, timeout):
pass
class AmazonUKKindleStore(AmazonEUBase):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
author_article = 'by '
and_word = ' and '

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 1 # Needed for dynamic plugin loading
store_version = 2 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -24,8 +24,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://clkuk.tradedoubler.com/click?p=51196&a=1951604&g=19333484'
url_details = 'http://clkuk.tradedoubler.com/click?p(51196)a(1951604)g(16460516)url({0})'
url = 'http://www.awin1.com/awclick.php?mid=3787&id=120917'
url_details = 'http://www.awin1.com/cread.php?awinmid=3787&awinaffid=120917&clickref=&p={0}'
if external or self.config.get('open_external', False):
if detail_item:

View File

@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,
from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
from calibre.ebooks.oeb.polish.toc import (
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links)
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
from calibre.gui2 import Application, error_dialog, gprefs
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.toc.location import ItemEdit
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
go_to_root = pyqtSignal()
create_from_xpath = pyqtSignal(object)
create_from_links = pyqtSignal()
create_from_files = pyqtSignal()
flatten_toc = pyqtSignal()
def __init__(self, parent):
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
)))
l.addWidget(b)
self.cfb = b = QPushButton(_('Generate ToC from &files'))
b.clicked.connect(self.create_from_files)
b.setToolTip(textwrap.fill(_(
'Generate a Table of Contents from individual files in the book.'
' Each entry in the ToC will point to the start of the file, the'
' text of the entry will be the "first line" of text from the file.'
)))
l.addWidget(b)
self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
b.clicked.connect(self.create_from_user_xpath)
b.setToolTip(textwrap.fill(_(
@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{
i.add_new_item.connect(self.add_new_item)
i.create_from_xpath.connect(self.create_from_xpath)
i.create_from_links.connect(self.create_from_links)
i.create_from_files.connect(self.create_from_files)
i.flatten_item.connect(self.flatten_item)
i.flatten_toc.connect(self.flatten_toc)
i.go_to_root.connect(self.go_to_root)
@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{
_('No links were found that could be added to the Table of Contents.'), show=True)
self.insert_toc_fragment(toc)
def create_from_files(self):
toc = from_files(self.ebook)
if len(toc) == 0:
return error_dialog(self, _('No items found'),
_('No files were found that could be added to the Table of Contents.'), show=True)
self.insert_toc_fragment(toc)
# }}}
class TOCEditor(QDialog): # {{{

View File

@ -54,7 +54,7 @@ def get_parser(usage):
def get_db(dbpath, options):
global do_notify
if options.library_path is not None:
dbpath = options.library_path
dbpath = os.path.expanduser(options.library_path)
if dbpath is None:
raise ValueError('No saved library path, either run the GUI or use the'
' --with-library option')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More