Merge from trunk
@ -40,6 +40,7 @@ recipes/.gitignore
|
||||
recipes/README.md
|
||||
recipes/icon_checker.py
|
||||
recipes/readme_updater.py
|
||||
recipes/garfield.recipe
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
recipes/tv_axnscifi.recipe
|
||||
recipes/tv_comedycentral.recipe
|
||||
@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
|
||||
recipes/tv_tvpuls.recipe
|
||||
recipes/tv_viasathistory.recipe
|
||||
recipes/icons/katalog_egazeciarz.png
|
||||
recipes/icons/garfield.png
|
||||
recipes/icons/tv_axnscifi.png
|
||||
recipes/icons/tv_comedycentral.png
|
||||
recipes/icons/tv_discoveryscience.png
|
||||
|
@ -20,6 +20,58 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.9.26
|
||||
date: 2013-04-05
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
|
||||
|
||||
- title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
|
||||
tickets: [1163520]
|
||||
|
||||
- title: "ToC Editor: Add buttons to indent/unindent the current entry"
|
||||
|
||||
- title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
|
||||
|
||||
- title: "Column icons: Allow use of wide images as column icons"
|
||||
|
||||
- title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
|
||||
tickets: [1162293,1163115]
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Fix generating page numbers causing links to not work."
|
||||
tickets: [1162573]
|
||||
|
||||
- title: "Wrong filename output in error message when 'Guide reference not found'"
|
||||
tickets: [1163659]
|
||||
|
||||
- title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
|
||||
|
||||
- title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
|
||||
tickets: [1162054]
|
||||
|
||||
- title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
|
||||
tickets: [1161999]
|
||||
|
||||
improved recipes:
|
||||
- Financial Times UK
|
||||
- Sing Tao Daily
|
||||
- Apple Daily
|
||||
- A List Apart
|
||||
- Business Week
|
||||
- Harpers printed edition
|
||||
- Harvard Business Review
|
||||
|
||||
new recipes:
|
||||
- title: AM730
|
||||
author: Eddie Lau
|
||||
|
||||
- title: Arret sur images
|
||||
author: Francois D
|
||||
|
||||
- title: Diario de Noticias
|
||||
author: Jose Pinto
|
||||
|
||||
- version: 0.9.25
|
||||
date: 2013-03-29
|
||||
|
||||
|
290
recipes/am730.recipe
Normal file
@ -0,0 +1,290 @@
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Eddie Lau'
|
||||
__Date__ = ''
|
||||
__HiResImg__ = True
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2013/03/30 -- first version
|
||||
'''
|
||||
|
||||
from calibre import (__appname__, force_unicode, strftime)
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
class AppleDaily(BasicNewsRecipe):
|
||||
title = u'AM730'
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'AM730'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
language = 'zh'
|
||||
encoding = 'utf-8'
|
||||
auto_cleanup = False
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
description = 'http://www.am730.com.hk'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
||||
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
|
||||
dict(name='div', attrs={'class':'thecontent wordsnap'}),
|
||||
dict(name='a', attrs={'class':'lightboximg'})]
|
||||
remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
|
||||
dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||
|
||||
def get_fetchdate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
# Note: does not work with custom date given by __Date__
|
||||
def get_weekday(self):
|
||||
return self.get_dtlocal().weekday()
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup('http://www.am730.com.hk/')
|
||||
ul = soup.find(attrs={'class':'nav-section'})
|
||||
sectionList = []
|
||||
for li in ul.findAll('li'):
|
||||
a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
|
||||
title = li.find('a').get('title', False).strip()
|
||||
sectionList.append((title, a))
|
||||
for title, url in sectionList:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
|
||||
current_articles = []
|
||||
for item in items:
|
||||
a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
|
||||
articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
|
||||
title = self.tag_to_string(a)
|
||||
description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
|
||||
current_articles.append({'title': title, 'url': articlelink, 'description': description})
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
multia = soup.findAll('a')
|
||||
for a in multia:
|
||||
if not (a == None):
|
||||
image = a.find('img')
|
||||
if not (image == None):
|
||||
if __HiResImg__:
|
||||
image['src'] = image.get('src').replace('/thumbs/', '/')
|
||||
caption = image.get('alt')
|
||||
tag = Tag(soup, "photo", [])
|
||||
tag2 = Tag(soup, "photocaption", [])
|
||||
tag.insert(0, image)
|
||||
if not caption == None:
|
||||
tag2.insert(0, caption)
|
||||
tag.insert(1, tag2)
|
||||
a.replaceWith(tag)
|
||||
return soup
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
title = self.short_title()
|
||||
if self.output_profile.periodical_date_in_title:
|
||||
title += strftime(self.timefmt)
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
if self.publication_type:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
article_titles, aseen = [], set()
|
||||
for f in feeds:
|
||||
for a in f:
|
||||
if a.title and a.title not in aseen:
|
||||
aseen.add(a.title)
|
||||
article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
'\n\n'.join(article_titles))
|
||||
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
@ -1,161 +1,275 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Eddie Lau'
|
||||
__Date__ = ''
|
||||
|
||||
from calibre import (__appname__, force_unicode, strftime)
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
title = u'蘋果日報'
|
||||
__author__ = u'蘋果日報'
|
||||
__publisher__ = u'蘋果日報'
|
||||
description = u'蘋果日報'
|
||||
masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
|
||||
language = 'zh_TW'
|
||||
encoding = 'UTF-8'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
needs_subscription = False
|
||||
title = u'蘋果日報 (香港)'
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = '蘋果日報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
language = 'zh'
|
||||
encoding = 'utf-8'
|
||||
auto_cleanup = False
|
||||
remove_javascript = True
|
||||
remove_tags_before = dict(name=['ul', 'h1'])
|
||||
remove_tags_after = dict(name='form')
|
||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
|
||||
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
|
||||
dict(name=['script', 'noscript', 'style', 'form'])]
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
@font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
|
||||
body {margin-right: 8pt; font-family: 'uming', serif;}
|
||||
h1 {font-family: 'uming', serif, sans-serif}
|
||||
'''
|
||||
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||
description = 'http://hkm.appledaily.com/'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
|
||||
]
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
|
||||
dict(name='p', attrs={'class':'next'})]
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||
|
||||
def get_fetchdate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
# Note: does not work with custom date given by __Date__
|
||||
def get_weekday(self):
|
||||
return self.get_dtlocal().weekday()
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
|
||||
|
||||
|
||||
#def get_browser(self):
|
||||
#br = BasicNewsRecipe.get_browser(self)
|
||||
#if self.username is not None and self.password is not None:
|
||||
# br.open('http://www.nytimes.com/auth/login')
|
||||
# br.select_form(name='login')
|
||||
# br['USERID'] = self.username
|
||||
# br['PASSWORD'] = self.password
|
||||
# br.submit()
|
||||
#return br
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
#print 'checking image: ' + iurl
|
||||
|
||||
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
|
||||
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
|
||||
|
||||
m = p.search(iurl)
|
||||
|
||||
if m is not None:
|
||||
iurl = 'http://' + m.group('server') + '/' + m.group('path')
|
||||
#print 'working! new url: ' + iurl
|
||||
tag['src'] = iurl
|
||||
#else:
|
||||
#print 'not good'
|
||||
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
|
||||
iurl = tag['href']
|
||||
#print 'checking image: ' + iurl
|
||||
|
||||
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
|
||||
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
|
||||
|
||||
m = p.search(iurl)
|
||||
|
||||
if m is not None:
|
||||
iurl = 'http://' + m.group('server') + '/' + m.group('path')
|
||||
#print 'working! new url: ' + iurl
|
||||
tag['href'] = iurl
|
||||
#else:
|
||||
#print 'not good'
|
||||
|
||||
return soup
|
||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
|
||||
def parse_index(self):
|
||||
base = 'http://news.hotpot.hk/fruit'
|
||||
soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
|
||||
feeds = []
|
||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||
ul = soup.find(attrs={'class':'menu'})
|
||||
sectionList = []
|
||||
for li in ul.findAll('li'):
|
||||
a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
|
||||
title = li.find('a', text=True).strip()
|
||||
if not title == u'動新聞':
|
||||
sectionList.append((title, a))
|
||||
for title, url in sectionList:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
#def feed_title(div):
|
||||
# return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||
def parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
ul = soup.find(attrs={'class':'list'})
|
||||
current_articles = []
|
||||
for li in ul.findAll('li'):
|
||||
a = li.find('a', href=True)
|
||||
title = li.find('p', text=True).strip()
|
||||
if a is not None:
|
||||
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
|
||||
pass
|
||||
return current_articles
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
for div in soup.findAll('li'):
|
||||
key = div.find(text=True, recursive=True);
|
||||
#if key == u'豪情':
|
||||
# continue;
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
title = self.short_title()
|
||||
if self.output_profile.periodical_date_in_title:
|
||||
title += strftime(self.timefmt)
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
if self.publication_type:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
article_titles, aseen = [], set()
|
||||
for f in feeds:
|
||||
for a in f:
|
||||
if a.title and a.title not in aseen:
|
||||
aseen.add(a.title)
|
||||
article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
print 'section=' + key
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
'\n\n'.join(article_titles))
|
||||
|
||||
articles[key] = []
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
ans.append(key)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
a = div.find('a', href=True)
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
if not a:
|
||||
continue
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
url = base + '/' + a['href']
|
||||
print 'url=' + url
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
# sub page
|
||||
subSoup = self.index_to_soup(url)
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
for subDiv in subSoup.findAll('li'):
|
||||
subA = subDiv.find('a', href=True)
|
||||
subTitle = subDiv.find(text=True, recursive=True)
|
||||
subUrl = base + '/' + subA['href']
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
print 'subUrl' + subUrl
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
articles[key].append(
|
||||
dict(title=subTitle,
|
||||
url=subUrl,
|
||||
date='',
|
||||
description='',
|
||||
content=''))
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
# elif div['class'] in ['story', 'story headline']:
|
||||
# a = div.find('a', href=True)
|
||||
# if not a:
|
||||
# continue
|
||||
# url = re.sub(r'\?.*', '', a['href'])
|
||||
# url += '?pagewanted=all'
|
||||
# title = self.tag_to_string(a, use_alt=True).strip()
|
||||
# description = ''
|
||||
# pubdate = strftime('%a, %d %b')
|
||||
# summary = div.find(True, attrs={'class':'summary'})
|
||||
# if summary:
|
||||
# description = self.tag_to_string(summary, use_alt=False)
|
||||
#
|
||||
# feed = key if key is not None else 'Uncategorized'
|
||||
# if not articles.has_key(feed):
|
||||
# articles[feed] = []
|
||||
# if not 'podcasts' in url:
|
||||
# articles[feed].append(
|
||||
# dict(title=title, url=url, date=pubdate,
|
||||
# description=description,
|
||||
# content=''))
|
||||
# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
||||
ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 12
|
||||
max_articles_per_feed = 20
|
||||
linearize_tables = True
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
compress_news_images = True
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
|
||||
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
||||
|
@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
#remove_tags = [
|
||||
#dict(attrs={'class':'inStory'})
|
||||
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
|
||||
#,dict(attrs={'id':['inset','videoDisplay']})
|
||||
#]
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
|
||||
remove_attributes = ['lang']
|
||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
||||
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
|
||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
||||
(u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
if 'podcasts' in url:
|
||||
return None
|
||||
if 'surveys' in url:
|
||||
return None
|
||||
if 'images' in url:
|
||||
return None
|
||||
if 'feedroom' in url:
|
||||
return None
|
||||
if '/magazine/toc/' in url:
|
||||
return None
|
||||
rurl, sep, rest = url.rpartition('?')
|
||||
if rurl:
|
||||
return rurl
|
||||
return rest
|
||||
|
||||
def print_version(self, url):
|
||||
if '/news/' in url or '/blog/ in url':
|
||||
return url
|
||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
|
||||
return rurl.replace('/investing/','/investor/')
|
||||
soup = self.index_to_soup(url)
|
||||
prntver = soup.find('li', attrs={'class':'print tracked'})
|
||||
rurl = prntver.find('a', href=True)['href']
|
||||
return rurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
||||
|
||||
|
||||
@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
|
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 8/12/12
|
||||
# last updated 19/10/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
compress_news_images = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
#articles_are_obfuscated = True
|
||||
#article_already_exists = False
|
||||
|
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||
|
||||
|
||||
compress_news_images = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 1
|
||||
max_articles_per_feed = 12
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
23
recipes/diario_de_noticias.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
# vim:fileencoding=UTF-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
|
||||
title ='Diário de Notícias'
|
||||
oldest_article = 7
|
||||
language = 'pt'
|
||||
__author__ = 'Jose Pinto'
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
|
||||
remove_tags = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
|
||||
|
||||
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
|
||||
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
|
||||
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
|
||||
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
|
||||
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
|
||||
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
|
||||
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
|
||||
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
|
||||
]
|
@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
encoding = 'utf-8'
|
||||
INDEX = 'http://www.esensja.pl'
|
||||
extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
|
||||
.t-author {font-size: x-small; text-align: left}
|
||||
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
|
||||
.text {font-size: small; text-align: left}
|
||||
.annot-ref {font-style: italic; text-align: left}
|
||||
'''
|
||||
cover_url = ''
|
||||
masthead_url = 'http://esensja.pl/img/wrss.gif'
|
||||
use_embedded_content = False
|
||||
|
@ -8,6 +8,7 @@ import datetime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class FinancialTimes(BasicNewsRecipe):
|
||||
title = 'Financial Times (UK)'
|
||||
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds = OrderedDict()
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
self.timefmt = ' [%s]'%dates
|
||||
wide = soup.find('div',attrs={'class':'wide'})
|
||||
if not wide:
|
||||
return feeds
|
||||
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
|
||||
if not allsections:
|
||||
return feeds
|
||||
count = 0
|
||||
for item in allsections:
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return feeds
|
||||
fitem = item.h3
|
||||
if not fitem:
|
||||
fitem = item.h4
|
||||
ftitle = self.tag_to_string(fitem)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||
feedarts = self.get_artlinks(item.ul)
|
||||
feeds.append((ftitle,feedarts))
|
||||
return feeds
|
||||
#dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
#self.timefmt = ' [%s]'%dates
|
||||
section_title = 'Untitled'
|
||||
|
||||
for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
|
||||
for section in column. findAll('div', attrs = {'class':'feedBox'}):
|
||||
sectiontitle=self.tag_to_string(section.find('h4'))
|
||||
if '...' not in sectiontitle: section_title=sectiontitle
|
||||
for article in section.ul.findAll('li'):
|
||||
articles = []
|
||||
title=self.tag_to_string(article.a)
|
||||
url=article.a['href']
|
||||
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
items = ['promo-box','promo-title',
|
||||
@ -177,6 +179,3 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
tfile.close()
|
||||
self.temp_files.append(tfile)
|
||||
return tfile.name
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('https://registration.ft.com/registration/login/logout?location=')
|
53
recipes/forbes_pl.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import datetime
|
||||
import re
|
||||
|
||||
class forbes_pl(BasicNewsRecipe):
|
||||
title = u'Forbes.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
|
||||
oldest_article = 1
|
||||
index = 'http://www.forbes.pl'
|
||||
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
|
||||
preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
now = datetime.datetime.now()
|
||||
yesterday = now - datetime.timedelta(hours=24)
|
||||
yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
|
||||
pages_count = 4
|
||||
keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
|
||||
remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
|
||||
|
||||
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
|
||||
|
||||
'''def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
cleanup = False
|
||||
nexturl = appendtag.find('a', attrs={'class':'next'})
|
||||
if nexturl:
|
||||
cleanup = True
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup(self.index + nexturl['href'])
|
||||
nexturl = soup2.find('a', attrs={'class':'next'})
|
||||
pagetext = soup2.findAll(id='article-body-wrapper')
|
||||
if not pagetext:
|
||||
pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
|
||||
for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if cleanup:
|
||||
for r in appendtag.findAll(attrs={'class':'paginator'}):
|
||||
r.extract()'''
|
108
recipes/galaxys_edge.recipe
Normal file
@ -0,0 +1,108 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GalaxyEdge(BasicNewsRecipe):
|
||||
title = u'The Galaxy\'s Edge'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
__author__ = 'Krittika Goyal'
|
||||
no_stylesheets = True
|
||||
|
||||
auto_cleanup = True
|
||||
|
||||
#keep_only_tags = [dict(id='content')]
|
||||
#remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
|
||||
#dict(id=['email-section', 'right-column', 'printfooter', 'topover',
|
||||
#'slidebox', 'th_footer'])]
|
||||
|
||||
extra_css = '.photo-caption { font-size: smaller }'
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.galaxysedge.com/')
|
||||
main = soup.find('table', attrs={'width':'911'})
|
||||
toc = main.find('td', attrs={'width':'225'})
|
||||
|
||||
|
||||
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
c = 0
|
||||
for x in toc.findAll(['p']):
|
||||
c = c+1
|
||||
if c == 5:
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
edwo = x.find('a')
|
||||
current_section = self.tag_to_string(edwo)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
title = self.tag_to_string(edwo)
|
||||
url = edwo.get('href', True)
|
||||
url = 'http://www.galaxysedge.com/'+url
|
||||
print(title)
|
||||
print(c)
|
||||
if not url or not title:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
elif c>5:
|
||||
current_section = self.tag_to_string(x.find('b'))
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
for y in x.findAll('a'):
|
||||
title = self.tag_to_string(y)
|
||||
url = y.get('href', True)
|
||||
url = 'http://www.galaxysedge.com/'+url
|
||||
print(title)
|
||||
if not url or not title:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
|
||||
|
||||
#def preprocess_raw_html(self, raw, url):
|
||||
#return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
|
||||
|
||||
#def postprocess_html(self, soup, first_fetch):
|
||||
#for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||
#t.name = 'div'
|
||||
#return soup
|
||||
|
||||
#def parse_index(self):
|
||||
#today = time.strftime('%Y-%m-%d')
|
||||
#soup = self.index_to_soup(
|
||||
#'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
|
||||
#div = soup.find(id='left-column')
|
||||
#feeds = []
|
||||
#current_section = None
|
||||
#current_articles = []
|
||||
#for x in div.findAll(['h3', 'div']):
|
||||
#if current_section and x.get('class', '') == 'tpaper':
|
||||
#a = x.find('a', href=True)
|
||||
#if a is not None:
|
||||
#current_articles.append({'url':a['href']+'?css=print',
|
||||
#'title':self.tag_to_string(a), 'date': '',
|
||||
#'description':''})
|
||||
#if x.name == 'h3':
|
||||
#if current_section and current_articles:
|
||||
#feeds.append((current_section, current_articles))
|
||||
#current_section = self.tag_to_string(x)
|
||||
#current_articles = []
|
||||
#return feeds
|
||||
|
||||
|
@ -10,7 +10,7 @@ krakow.gazeta.pl
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_krakow(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Kraków'
|
||||
title = u'Gazeta Wyborcza Kraków'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
|
||||
|
@ -5,7 +5,7 @@ import string
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GazetaPlSzczecin(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Szczecin'
|
||||
title = u'Gazeta Wyborcza Szczecin'
|
||||
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
|
||||
__author__ = u'Michał Szkutnik'
|
||||
__license__ = u'GPL v3'
|
||||
|
@ -10,7 +10,7 @@ warszawa.gazeta.pl
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_wawa(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Warszawa'
|
||||
title = u'Gazeta Wyborcza Warszawa'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Comment
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl'
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
language = 'pl'
|
||||
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
|
||||
|
@ -1,6 +1,4 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
|
||||
class HBR(BasicNewsRecipe):
|
||||
|
||||
@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
|
||||
timefmt = ' [%B %Y]'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
# recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
# ' to log into their website. This is unsupported in calibre, so'
|
||||
# ' this recipe has been disabled. If you would like to see '
|
||||
# ' HBR supported in calibre, contact hbr.org and ask them'
|
||||
# ' to provide a javascript free login method.')
|
||||
|
||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
||||
|
||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||
INDEX = 'http://hbr.org'
|
||||
|
||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||
'superNavHeadContainer', 'hbrDisqus',
|
||||
'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
|
||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||
dict(name='iframe')]
|
||||
extra_css = '''
|
||||
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
|
||||
if url.endswith('/ar/1'):
|
||||
return url[:-1]+'pr'
|
||||
|
||||
def hbr_get_toc(self):
|
||||
# return self.index_to_soup(open('/t/toc.html').read())
|
||||
|
||||
today = date.today()
|
||||
future = today + timedelta(days=30)
|
||||
past = today - timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today, past)]:
|
||||
url = self.INDEX + x
|
||||
soup = self.index_to_soup(url)
|
||||
if (not soup.find(text='Issue Not Found') and not soup.find(
|
||||
text="We're Sorry. There was an error processing your request")
|
||||
and 'Exception: java.io.FileNotFoundException' not in
|
||||
unicode(soup)):
|
||||
return soup
|
||||
raise Exception('Could not find current issue')
|
||||
|
||||
def hbr_parse_toc(self, soup):
|
||||
feeds = []
|
||||
current_section = None
|
||||
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
|
||||
if current_section is not None and articles:
|
||||
feeds.append((current_section, articles))
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.hbr_get_toc()
|
||||
# open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||
soup0 = self.index_to_soup('http://hbr.org/magazine')
|
||||
datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
|
||||
#find date & cover
|
||||
self.cover_url=datencover.img['src']
|
||||
dates=self.tag_to_string(datencover.img['alt'])
|
||||
self.timefmt = u' [%s]'%dates
|
||||
soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
return feeds
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://hbr.org/current'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
|
||||
|
||||
if link_item:
|
||||
cover_url = 'http://hbr.org' + link_item['src']
|
||||
|
||||
return cover_url
|
||||
|
||||
|
||||
|
BIN
recipes/icons/forbes_pl.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
BIN
recipes/icons/slashdot.png
Normal file
After Width: | Height: | Size: 250 B |
BIN
recipes/icons/sportowefakty.png
Normal file
After Width: | Height: | Size: 511 B |
BIN
recipes/icons/wysokie_obcasy.png
Normal file
After Width: | Height: | Size: 205 B |
@ -1,33 +1,23 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AListApart (BasicNewsRecipe):
|
||||
__author__ = u'Marc Busqué <marc@lamarciana.com>'
|
||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
__version__ = '1.0'
|
||||
__version__ = '2.0'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'A List Apart'
|
||||
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
|
||||
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
|
||||
language = 'en'
|
||||
tags = 'web development, software'
|
||||
oldest_article = 120
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
cover_url = u'http://alistapart.com/pix/alalogo.gif'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'content'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id': 'metastuff'}),
|
||||
dict(name='div', attrs={'class': 'discuss'}),
|
||||
dict(name='div', attrs={'class': 'discuss'}),
|
||||
dict(name='div', attrs={'id': 'learnmore'}),
|
||||
]
|
||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
|
||||
extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
|
||||
extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
|
||||
|
||||
feeds = [
|
||||
(u'A List Apart', u'http://www.alistapart.com/site/rss'),
|
||||
(u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
|
||||
]
|
||||
|
@ -6,10 +6,10 @@ import time
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro UK'
|
||||
description = 'News as provided by The Metro -UK'
|
||||
description = 'News from The Metro, UK'
|
||||
#timefmt = ''
|
||||
__author__ = 'fleclerc & Dave Asbury'
|
||||
#last update 20/1/13
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 4/4/13
|
||||
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||
|
||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
||||
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
language = 'en_GB'
|
||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||
|
||||
compress_news_images = True
|
||||
def parse_index(self):
|
||||
articles = {}
|
||||
key = None
|
||||
|
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
max_articles_per_feed = 20
|
||||
#auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
compress_news_images = True
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.nme.com/component/subscribe')
|
||||
|
@ -1,30 +1,30 @@
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Eddie Lau'
|
||||
__copyright__ = '2011-2013, Eddie Lau'
|
||||
|
||||
# data source: normal, mobile
|
||||
__Source__ = 'mobile'
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
__UseChineseTitle__ = True
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
__IncludeSummary__ = True
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2013/03/31 -- fix cover retrieval code and heading size, and remove in summary
|
||||
2011/12/29 -- first version done
|
||||
TODO:
|
||||
* use alternative source at http://m.singtao.com/index.php
|
||||
'''
|
||||
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
from datetime import date
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
|
||||
title = 'Sing Tao Daily - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
|
||||
if __Source__ == 'normal':
|
||||
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
|
||||
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
#cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29
|
||||
base = 2660
|
||||
todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
|
||||
diff = todaydate - date(2011, 12, 29)
|
||||
base = base + int(diff.total_seconds()/(3600*24))
|
||||
cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
|
||||
soup = self.index_to_soup('http://m.singtao.com/')
|
||||
cover = soup.find(attrs={'class':'special'}).get('src', False)
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = 'http://singtao.com/images/stlogo.gif'
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
@ -293,7 +289,7 @@ class STHKRecipe(BasicNewsRecipe):
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = self.tag_to_string(p).strip().replace(' ', '')
|
||||
if len(summary_candidate) > 0:
|
||||
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):
|
||||
|
||||
|
||||
|
||||
|
||||
|
70
recipes/sportowefakty.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class sportowefakty(BasicNewsRecipe):
|
||||
title = u'SportoweFakty'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!'
|
||||
oldest_article = 1
|
||||
masthead_url='http://www.sportowefakty.pl/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
use_embedded_content=False
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs = {'class' : 'box-article'})]
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(attrs = {'class' : re.compile(r'^newsStream')}))
|
||||
remove_tags.append(dict(attrs = {'target' : '_blank'}))
|
||||
|
||||
feeds = [
|
||||
(u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'),
|
||||
(u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'),
|
||||
(u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'),
|
||||
(u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'),
|
||||
(u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'),
|
||||
(u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'),
|
||||
(u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'),
|
||||
(u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link', None)
|
||||
if 'utm_source' in link:
|
||||
return link.split('?utm')[0]
|
||||
else:
|
||||
return link
|
||||
|
||||
def print_version(self, url):
|
||||
print_url = url + '/drukuj'
|
||||
return print_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
head = soup.find('h1')
|
||||
if 'Fotorelacja' in self.tag_to_string(head):
|
||||
return None
|
||||
else:
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
ignore_duplicate_articles = {'title','url'}
|
||||
|
||||
compress_news_images = True
|
||||
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
@ -6,17 +6,62 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class TimesColonist(BasicNewsRecipe):
|
||||
|
||||
# Customization -- remove sections you don't want.
|
||||
# If your e-reader is an e-ink Kindle and your output profile is
|
||||
# set properly this recipe will not include images because the
|
||||
# resulting file is too large. If you have one of these and want
|
||||
# images you can set kindle_omit_images = False
|
||||
# and remove sections (typically the e-ink Kindles will
|
||||
# work with about a dozen of these, but your mileage may vary).
|
||||
|
||||
kindle_omit_images = True
|
||||
|
||||
section_list = [
|
||||
('','Web Front Page'),
|
||||
('news/','News Headlines'),
|
||||
('news/b-c/','BC News'),
|
||||
('news/national/','National News'),
|
||||
('news/world/','World News'),
|
||||
('opinion/','Opinion'),
|
||||
('opinion/letters/','Letters'),
|
||||
('business/','Business'),
|
||||
('business/money/','Money'),
|
||||
('business/technology/','Technology'),
|
||||
('business/working/','Working'),
|
||||
('sports/','Sports'),
|
||||
('sports/hockey/','Hockey'),
|
||||
('sports/football/','Football'),
|
||||
('sports/basketball/','Basketball'),
|
||||
('sports/golf/','Golf'),
|
||||
('entertainment/','entertainment'),
|
||||
('entertainment/go/','Go!'),
|
||||
('entertainment/music/','Music'),
|
||||
('entertainment/books/','Books'),
|
||||
('entertainment/Movies/','Movies'),
|
||||
('entertainment/television/','Television'),
|
||||
('life/','Life'),
|
||||
('life/health/','Health'),
|
||||
('life/travel/','Travel'),
|
||||
('life/driving/','Driving'),
|
||||
('life/homes/','Homes'),
|
||||
('life/food-drink/','Food & Drink')
|
||||
]
|
||||
|
||||
title = u'Victoria Times Colonist'
|
||||
url_prefix = 'http://www.timescolonist.com'
|
||||
description = u'News from Victoria, BC'
|
||||
fp_tag = 'CAN_TC'
|
||||
|
||||
masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
|
||||
|
||||
|
||||
url_list = []
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
|
||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
'''
|
||||
keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
|
||||
def __init__(self, options, log, progress_reporter):
|
||||
self.remove_tags = [{'class':'comments'},
|
||||
{'id':'photocredit'},
|
||||
dict(name='div', attrs={'class':re.compile('top.controls')}),
|
||||
dict(name='div', attrs={'class':re.compile('^comments')}),
|
||||
dict(name='div', attrs={'class':re.compile('social')}),
|
||||
dict(name='div', attrs={'class':re.compile('tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('window')}),
|
||||
dict(name='div', attrs={'class':re.compile('related.news.element')})]
|
||||
|
||||
print("PROFILE NAME = "+options.output_profile.short_name)
|
||||
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
|
||||
self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
|
||||
BasicNewsRecipe.__init__(self, options, log, progress_reporter)
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
|
||||
def preprocess_html(self,soup):
|
||||
byline = soup.find('p',attrs={'class':re.compile('ancillary')})
|
||||
if byline is not None:
|
||||
byline.find('a')
|
||||
authstr = self.tag_to_string(byline,False)
|
||||
authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
|
||||
authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
|
||||
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
|
||||
atag = htag.a
|
||||
if atag is not None:
|
||||
url = atag['href']
|
||||
#print("Checking "+url)
|
||||
if atag['href'].startswith('/'):
|
||||
url = self.url_prefix+atag['href']
|
||||
url = url.strip()
|
||||
# print("Checking >>"+url+'<<\n\r')
|
||||
if url.startswith('/'):
|
||||
url = self.url_prefix+url
|
||||
if url in self.url_list:
|
||||
return
|
||||
self.url_list.append(url)
|
||||
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
|
||||
if dtag is not None:
|
||||
description = self.tag_to_string(dtag,False)
|
||||
article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
#print(sectitle+title+": description = "+description+" URL="+url)
|
||||
print(sectitle+title+": description = "+description+" URL="+url+'\n\r')
|
||||
|
||||
def add_section_index(self,ans,securl,sectitle):
|
||||
print("Add section url="+self.url_prefix+'/'+securl)
|
||||
print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix+'/'+securl)
|
||||
except:
|
||||
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
ans = []
|
||||
ans = self.add_section_index(ans,'','Web Front Page')
|
||||
ans = self.add_section_index(ans,'news/','News Headlines')
|
||||
ans = self.add_section_index(ans,'news/b-c/','BC News')
|
||||
ans = self.add_section_index(ans,'news/national/','Natioanl News')
|
||||
ans = self.add_section_index(ans,'news/world/','World News')
|
||||
ans = self.add_section_index(ans,'opinion/','Opinion')
|
||||
ans = self.add_section_index(ans,'opinion/letters/','Letters')
|
||||
ans = self.add_section_index(ans,'business/','Business')
|
||||
ans = self.add_section_index(ans,'business/money/','Money')
|
||||
ans = self.add_section_index(ans,'business/technology/','Technology')
|
||||
ans = self.add_section_index(ans,'business/working/','Working')
|
||||
ans = self.add_section_index(ans,'sports/','Sports')
|
||||
ans = self.add_section_index(ans,'sports/hockey/','Hockey')
|
||||
ans = self.add_section_index(ans,'sports/football/','Football')
|
||||
ans = self.add_section_index(ans,'sports/basketball/','Basketball')
|
||||
ans = self.add_section_index(ans,'sports/golf/','Golf')
|
||||
ans = self.add_section_index(ans,'entertainment/','entertainment')
|
||||
ans = self.add_section_index(ans,'entertainment/go/','Go!')
|
||||
ans = self.add_section_index(ans,'entertainment/music/','Music')
|
||||
ans = self.add_section_index(ans,'entertainment/books/','Books')
|
||||
ans = self.add_section_index(ans,'entertainment/Movies/','movies')
|
||||
ans = self.add_section_index(ans,'entertainment/television/','Television')
|
||||
ans = self.add_section_index(ans,'life/','Life')
|
||||
ans = self.add_section_index(ans,'life/health/','Health')
|
||||
ans = self.add_section_index(ans,'life/travel/','Travel')
|
||||
ans = self.add_section_index(ans,'life/driving/','Driving')
|
||||
ans = self.add_section_index(ans,'life/homes/','Homes')
|
||||
ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
|
||||
for (url,title) in self.section_list:
|
||||
ans = self.add_section_index(ans,url,title)
|
||||
return ans
|
||||
|
||||
|
@ -1,144 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class GazetaWyborczaDuzyForma(BasicNewsRecipe):
|
||||
cover_url = 'http://bi.gazeta.pl/im/8/5415/m5415058.gif'
|
||||
title = u"Gazeta Wyborcza Duzy Format"
|
||||
__author__ = 'ravcio - rlelusz[at]gmail.com'
|
||||
description = u"Articles from Gazeta's website"
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 50 #you can increade it event up to maybe 600, should still work
|
||||
recursions = 0
|
||||
encoding = 'iso-8859-2'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['k1']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['zdjM', 'rel_video', 'zdjP', 'rel_box', 'index mod_zi_dolStrony']})
|
||||
,dict(name='div', attrs={'id':['source', 'banP4', 'article_toolbar', 'rel', 'inContext_disabled']})
|
||||
,dict(name='ul', attrs={'id':['articleToolbar']})
|
||||
,dict(name='img', attrs={'class':['brand']})
|
||||
,dict(name='h5', attrs={'class':['author']})
|
||||
,dict(name='h6', attrs={'class':['date']})
|
||||
,dict(name='p', attrs={'class':['txt_upl']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':['Str']}) #nawigator numerow linii
|
||||
]
|
||||
|
||||
def load_article_links(self, url, count):
|
||||
print '--- load_article_links', url, count
|
||||
|
||||
#page with link to articles
|
||||
soup = self.index_to_soup(url)
|
||||
|
||||
#table with articles
|
||||
list = soup.find('div', attrs={'class':'GWdalt'})
|
||||
|
||||
#single articles (link, title, ...)
|
||||
links = list.findAll('div', attrs={'class':['GWdaltE']})
|
||||
|
||||
if len(links) < count:
|
||||
#load links to more articles...
|
||||
|
||||
#remove new link
|
||||
pages_nav = list.find('div', attrs={'class':'pages'})
|
||||
next = pages_nav.find('a', attrs={'class':'next'})
|
||||
if next:
|
||||
print 'next=', next['href']
|
||||
url = 'http://wyborcza.pl' + next['href']
|
||||
#e.g. url = 'http://wyborcza.pl/0,75480.html?str=2'
|
||||
|
||||
older_links = self.load_article_links(url, count - len(links))
|
||||
links.extend(older_links)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
#produce list of articles to download
|
||||
def parse_index(self):
|
||||
print '--- parse_index'
|
||||
|
||||
max_articles = 8000
|
||||
links = self.load_article_links('http://wyborcza.pl/0,75480.html', max_articles)
|
||||
|
||||
ans = []
|
||||
key = None
|
||||
articles = {}
|
||||
|
||||
key = 'Uncategorized'
|
||||
articles[key] = []
|
||||
|
||||
for div_art in links:
|
||||
div_date = div_art.find('div', attrs={'class':'kL'})
|
||||
div = div_art.find('div', attrs={'class':'kR'})
|
||||
|
||||
a = div.find('a', href=True)
|
||||
|
||||
url = a['href']
|
||||
title = a.string
|
||||
description = ''
|
||||
pubdate = div_date.string.rstrip().lstrip()
|
||||
summary = div.find('span', attrs={'class':'lead'})
|
||||
|
||||
desc = summary.find('a', href=True)
|
||||
if desc:
|
||||
desc.extract()
|
||||
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
description = description.rstrip().lstrip()
|
||||
|
||||
feed = key if key is not None else 'Duzy Format'
|
||||
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
|
||||
if description != '': # skip just pictures atricle
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
|
||||
ans = [(key, articles[key])]
|
||||
return ans
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'id':'Str'})
|
||||
if pager:
|
||||
#seek for 'a' element with nast value (if not found exit)
|
||||
list = pager.findAll('a')
|
||||
|
||||
for elem in list:
|
||||
if 'nast' in elem.string:
|
||||
nexturl = elem['href']
|
||||
|
||||
soup2 = self.index_to_soup('http://warszawa.gazeta.pl' + nexturl)
|
||||
|
||||
texttag = soup2.find('div', attrs={'id':'artykul'})
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
# finally remove some tags
|
||||
pager = soup.find('div',attrs={'id':'Str'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
|
||||
pager = soup.find('div',attrs={'class':'tylko_int'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
|
||||
return soup
|
57
recipes/wysokie_obcasy.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class WysokieObcasyRecipe(BasicNewsRecipe):
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'Wysokie Obcasy'
|
||||
publisher = 'Agora SA'
|
||||
description = u'Serwis sobotniego dodatku do Gazety Wyborczej'
|
||||
category='magazine'
|
||||
language = 'pl'
|
||||
publication_type = 'magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 5
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'img'))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'info'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: left;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
('Wszystkie Artykuly', 'feed://www.wysokieobcasy.pl/pub/rss/wysokieobcasy.xml'),
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
baseURL='http://www.wysokieobcasy.pl/wysokie-obcasy'
|
||||
segments = url.split(',')
|
||||
subPath= '/2029020,'
|
||||
articleURL1 = segments[1]
|
||||
articleURL2 = segments[2]
|
||||
printVerString=articleURL1 + ',' + articleURL2
|
||||
s= baseURL + subPath + printVerString + '.html'
|
||||
return s
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
|
||||
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -390,7 +390,6 @@
|
||||
|
||||
|
||||
<xsl:output method = "xml"/>
|
||||
|
||||
<xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
|
||||
|
||||
|
||||
@ -415,13 +414,11 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:page-break">
|
||||
<xsl:element name="br">
|
||||
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
|
||||
</xsl:element>
|
||||
<br style = "page-break-after:always"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:hardline-break">
|
||||
<xsl:element name="br"/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
||||
@ -472,9 +469,7 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:pict">
|
||||
<xsl:element name="img">
|
||||
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
|
||||
</xsl:element>
|
||||
<img src = "{@num}"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
|
@ -47,6 +47,10 @@ binary_includes = [
|
||||
'/usr/lib/libgthread-2.0.so.0',
|
||||
'/usr/lib/libpng14.so.14',
|
||||
'/usr/lib/libexslt.so.0',
|
||||
# Ensure that libimobiledevice is compiled against openssl, not gnutls
|
||||
'/usr/lib/libimobiledevice.so.3',
|
||||
'/usr/lib/libusbmuxd.so.2',
|
||||
'/usr/lib/libplist.so.1',
|
||||
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
||||
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
||||
'/usr/lib/libgcrypt.so.11',
|
||||
|
@ -399,7 +399,8 @@ class Py2App(object):
|
||||
@flush
|
||||
def add_fontconfig(self):
|
||||
info('\nAdding fontconfig')
|
||||
for x in ('fontconfig.1', 'freetype.6', 'expat.1'):
|
||||
for x in ('fontconfig.1', 'freetype.6', 'expat.1',
|
||||
'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
|
||||
src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
|
||||
self.install_dylib(src)
|
||||
dst = os.path.join(self.resources_dir, 'fonts')
|
||||
|
@ -12,13 +12,13 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2013-03-27 13:07+0000\n"
|
||||
"PO-Revision-Date: 2013-03-28 13:01+0000\n"
|
||||
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2013-03-28 04:41+0000\n"
|
||||
"X-Launchpad-Export-Date: 2013-03-29 04:36+0000\n"
|
||||
"X-Generator: Launchpad (build 16546)\n"
|
||||
"Language: ca\n"
|
||||
|
||||
@ -1884,7 +1884,7 @@ msgstr "Awera"
|
||||
|
||||
#. name for aws
|
||||
msgid "Awyu; South"
|
||||
msgstr "Awyu meridional"
|
||||
msgstr "Awyu; meridional"
|
||||
|
||||
#. name for awt
|
||||
msgid "Araweté"
|
||||
@ -1892,7 +1892,7 @@ msgstr "Araweté"
|
||||
|
||||
#. name for awu
|
||||
msgid "Awyu; Central"
|
||||
msgstr "Awyu central"
|
||||
msgstr "Awyu; Central"
|
||||
|
||||
#. name for awv
|
||||
msgid "Awyu; Jair"
|
||||
@ -4052,7 +4052,7 @@ msgstr "Buginès"
|
||||
|
||||
#. name for buh
|
||||
msgid "Bunu; Younuo"
|
||||
msgstr "Bunu; Younuo"
|
||||
msgstr "Bunu; Younou"
|
||||
|
||||
#. name for bui
|
||||
msgid "Bongili"
|
||||
@ -4308,7 +4308,7 @@ msgstr "Bwa"
|
||||
|
||||
#. name for bwx
|
||||
msgid "Bunu; Bu-Nao"
|
||||
msgstr "Bunu; Bu-Nao"
|
||||
msgstr "Bunu; Bu Nao"
|
||||
|
||||
#. name for bwy
|
||||
msgid "Bwamu; Cwi"
|
||||
@ -19804,7 +19804,7 @@ msgstr "Minoà"
|
||||
|
||||
#. name for omo
|
||||
msgid "Utarmbung"
|
||||
msgstr ""
|
||||
msgstr "Utarmbung"
|
||||
|
||||
#. name for omp
|
||||
msgid "Manipuri; Old"
|
||||
@ -20344,7 +20344,7 @@ msgstr "Pear"
|
||||
|
||||
#. name for pcc
|
||||
msgid "Bouyei"
|
||||
msgstr ""
|
||||
msgstr "Buyí"
|
||||
|
||||
#. name for pcd
|
||||
msgid "Picard"
|
||||
@ -20456,11 +20456,11 @@ msgstr "Pengo"
|
||||
|
||||
#. name for peh
|
||||
msgid "Bonan"
|
||||
msgstr ""
|
||||
msgstr "Bonan"
|
||||
|
||||
#. name for pei
|
||||
msgid "Chichimeca-Jonaz"
|
||||
msgstr ""
|
||||
msgstr "Chichimec"
|
||||
|
||||
#. name for pej
|
||||
msgid "Pomo; Northern"
|
||||
@ -20484,7 +20484,7 @@ msgstr "Persa Antic"
|
||||
|
||||
#. name for pep
|
||||
msgid "Kunja"
|
||||
msgstr ""
|
||||
msgstr "Kunja"
|
||||
|
||||
#. name for peq
|
||||
msgid "Pomo; Southern"
|
||||
@ -20536,7 +20536,7 @@ msgstr "Pagi"
|
||||
|
||||
#. name for pgk
|
||||
msgid "Rerep"
|
||||
msgstr ""
|
||||
msgstr "Rerep"
|
||||
|
||||
#. name for pgl
|
||||
msgid "Irish; Primitive"
|
||||
@ -20624,7 +20624,7 @@ msgstr "Pima Baix"
|
||||
|
||||
#. name for pib
|
||||
msgid "Yine"
|
||||
msgstr ""
|
||||
msgstr "Yine"
|
||||
|
||||
#. name for pic
|
||||
msgid "Pinji"
|
||||
@ -20660,7 +20660,7 @@ msgstr "Pijao"
|
||||
|
||||
#. name for pil
|
||||
msgid "Yom"
|
||||
msgstr ""
|
||||
msgstr "Yom"
|
||||
|
||||
#. name for pim
|
||||
msgid "Powhatan"
|
||||
@ -20760,7 +20760,7 @@ msgstr "Llenguatge de signes pakistaní"
|
||||
|
||||
#. name for pkt
|
||||
msgid "Maleng"
|
||||
msgstr ""
|
||||
msgstr "Maleng"
|
||||
|
||||
#. name for pku
|
||||
msgid "Paku"
|
||||
@ -20768,7 +20768,7 @@ msgstr "Paku"
|
||||
|
||||
#. name for pla
|
||||
msgid "Miani"
|
||||
msgstr ""
|
||||
msgstr "Miani"
|
||||
|
||||
#. name for plb
|
||||
msgid "Polonombauk"
|
||||
@ -20804,7 +20804,7 @@ msgstr "Polci"
|
||||
|
||||
#. name for plk
|
||||
msgid "Shina; Kohistani"
|
||||
msgstr ""
|
||||
msgstr "Shina; Kohistani"
|
||||
|
||||
#. name for pll
|
||||
msgid "Palaung; Shwe"
|
||||
@ -20852,7 +20852,7 @@ msgstr "Palawà; Brooke"
|
||||
|
||||
#. name for ply
|
||||
msgid "Bolyu"
|
||||
msgstr ""
|
||||
msgstr "Bolyu"
|
||||
|
||||
#. name for plz
|
||||
msgid "Paluan"
|
||||
@ -20896,7 +20896,7 @@ msgstr "Algonquí Carolina"
|
||||
|
||||
#. name for pml
|
||||
msgid "Lingua Franca"
|
||||
msgstr ""
|
||||
msgstr "Aljamia"
|
||||
|
||||
#. name for pmm
|
||||
msgid "Pomo"
|
||||
@ -20924,7 +20924,7 @@ msgstr "Piemontès"
|
||||
|
||||
#. name for pmt
|
||||
msgid "Tuamotuan"
|
||||
msgstr ""
|
||||
msgstr "Tuamotu"
|
||||
|
||||
#. name for pmu
|
||||
msgid "Panjabi; Mirpur"
|
||||
@ -20972,7 +20972,7 @@ msgstr "Penrhyn"
|
||||
|
||||
#. name for pni
|
||||
msgid "Aoheng"
|
||||
msgstr ""
|
||||
msgstr "Aoheng"
|
||||
|
||||
#. name for pnm
|
||||
msgid "Punan Batu 1"
|
||||
@ -21008,7 +21008,7 @@ msgstr "Pontic"
|
||||
|
||||
#. name for pnu
|
||||
msgid "Bunu; Jiongnai"
|
||||
msgstr ""
|
||||
msgstr "Bunu; Jiongnai"
|
||||
|
||||
#. name for pnv
|
||||
msgid "Pinigura"
|
||||
@ -21100,7 +21100,7 @@ msgstr "Potavatomi"
|
||||
|
||||
#. name for pov
|
||||
msgid "Crioulo; Upper Guinea"
|
||||
msgstr ""
|
||||
msgstr "Crioll guineà"
|
||||
|
||||
#. name for pow
|
||||
msgid "Popoloca; San Felipe Otlaltepec"
|
||||
@ -21128,7 +21128,7 @@ msgstr "Paipai"
|
||||
|
||||
#. name for ppk
|
||||
msgid "Uma"
|
||||
msgstr ""
|
||||
msgstr "Uma"
|
||||
|
||||
#. name for ppl
|
||||
msgid "Pipil"
|
||||
@ -21144,7 +21144,7 @@ msgstr "Papapana"
|
||||
|
||||
#. name for ppo
|
||||
msgid "Folopa"
|
||||
msgstr ""
|
||||
msgstr "Folopa"
|
||||
|
||||
#. name for ppp
|
||||
msgid "Pelende"
|
||||
@ -21180,7 +21180,7 @@ msgstr "Malecite-Passamaquoddy"
|
||||
|
||||
#. name for prb
|
||||
msgid "Lua'"
|
||||
msgstr ""
|
||||
msgstr "Lua"
|
||||
|
||||
#. name for prc
|
||||
msgid "Parachi"
|
||||
@ -21220,7 +21220,7 @@ msgstr "Llenguatge de signes peruà"
|
||||
|
||||
#. name for prm
|
||||
msgid "Kibiri"
|
||||
msgstr ""
|
||||
msgstr "Kibiri"
|
||||
|
||||
#. name for prn
|
||||
msgid "Prasuni"
|
||||
@ -21272,7 +21272,7 @@ msgstr "Llenguatge de signes de Providencia"
|
||||
|
||||
#. name for psa
|
||||
msgid "Awyu; Asue"
|
||||
msgstr ""
|
||||
msgstr "Awyu; Asue"
|
||||
|
||||
#. name for psc
|
||||
msgid "Persian Sign Language"
|
||||
@ -21328,7 +21328,7 @@ msgstr "Llenguatge de signes portuguès"
|
||||
|
||||
#. name for pss
|
||||
msgid "Kaulong"
|
||||
msgstr ""
|
||||
msgstr "Kaulong"
|
||||
|
||||
#. name for pst
|
||||
msgid "Pashto; Central"
|
||||
@ -21376,11 +21376,11 @@ msgstr "Pìamatsina"
|
||||
|
||||
#. name for ptt
|
||||
msgid "Enrekang"
|
||||
msgstr ""
|
||||
msgstr "Enrekang"
|
||||
|
||||
#. name for ptu
|
||||
msgid "Bambam"
|
||||
msgstr ""
|
||||
msgstr "Bambam"
|
||||
|
||||
#. name for ptv
|
||||
msgid "Port Vato"
|
||||
@ -29584,7 +29584,7 @@ msgstr ""
|
||||
|
||||
#. name for yir
|
||||
msgid "Awyu; North"
|
||||
msgstr ""
|
||||
msgstr "Awyu; Septentrional"
|
||||
|
||||
#. name for yis
|
||||
msgid "Yis"
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 9, 25)
|
||||
numeric_version = (0, 9, 26)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
# from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
||||
from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
||||
from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
|
||||
|
||||
plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
||||
config['enabled_plugins'] = ep
|
||||
|
||||
default_disabled_plugins = set([
|
||||
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
|
||||
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
|
||||
])
|
||||
|
||||
def is_disabled(plugin):
|
||||
|
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
|
||||
# Google covers are often poor quality (scans/errors) but they have high
|
||||
# resolution, so they trump covers from better sources. So make sure they
|
||||
# are only used if no other covers are found.
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
|
||||
|
||||
def create_log(ostream=None):
|
||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||
@ -429,6 +429,40 @@ class Source(Plugin):
|
||||
mi.tags = list(map(fixcase, mi.tags))
|
||||
mi.isbn = check_isbn(mi.isbn)
|
||||
|
||||
def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
|
||||
if not urls:
|
||||
log('No images found for, title: %r and authors: %r'%(title, authors))
|
||||
return
|
||||
from threading import Thread
|
||||
import time
|
||||
if prefs_name:
|
||||
urls = urls[:self.prefs[prefs_name]]
|
||||
if get_best_cover:
|
||||
urls = urls[:1]
|
||||
log('Downloading %d covers'%len(urls))
|
||||
workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
|
||||
for w in workers:
|
||||
w.daemon = True
|
||||
w.start()
|
||||
alive = True
|
||||
start_time = time.time()
|
||||
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
||||
alive = False
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
alive = True
|
||||
break
|
||||
abort.wait(0.1)
|
||||
|
||||
def download_image(self, url, timeout, log, result_queue):
|
||||
try:
|
||||
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
result_queue.put((self, ans))
|
||||
log('Downloaded cover from: %s'%url)
|
||||
except Exception:
|
||||
self.log.exception('Failed to download cover from: %r'%url)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata API {{{
|
||||
|
58
src/calibre/ebooks/metadata/sources/big_book_search.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
|
||||
def get_urls(br, tokens):
|
||||
from urllib import quote_plus
|
||||
from mechanize import Request
|
||||
from lxml import html
|
||||
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
|
||||
q = b'+'.join(escaped)
|
||||
url = 'http://bigbooksearch.com/books/'+q
|
||||
br.open(url).read()
|
||||
req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', url)
|
||||
raw = br.open(req).read()
|
||||
root = html.fromstring(raw.decode('utf-8'))
|
||||
urls = [i.get('src') for i in root.xpath('//img[@src]')]
|
||||
return urls
|
||||
|
||||
class BigBookSearch(Source):
|
||||
|
||||
name = 'Big Book Search'
|
||||
description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
|
||||
capabilities = frozenset(['cover'])
|
||||
config_help_message = _('Configure the Big Book Search plugin')
|
||||
can_get_multiple_covers = True
|
||||
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
|
||||
_('The maximum number of covers to process from the search result')),
|
||||
)
|
||||
supports_gzip_transfer_encoding = True
|
||||
|
||||
def download_cover(self, log, result_queue, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
if not title:
|
||||
return
|
||||
br = self.browser
|
||||
tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
|
||||
urls = get_urls(br, tokens)
|
||||
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||
|
||||
def test():
|
||||
from calibre import browser
|
||||
import pprint
|
||||
br = browser()
|
||||
urls = get_urls(br, ['consider', 'phlebas', 'banks'])
|
||||
pprint.pprint(urls)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to
|
||||
|
||||
class Worker(Thread):
|
||||
|
||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
|
||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
self.plugin = plugin
|
||||
self.abort = abort
|
||||
self.get_best_cover = get_best_cover
|
||||
self.buf = BytesIO()
|
||||
self.log = create_log(self.buf)
|
||||
self.title, self.authors, self.identifiers = (title, authors,
|
||||
@ -37,7 +38,7 @@ class Worker(Thread):
|
||||
try:
|
||||
if self.plugin.can_get_multiple_covers:
|
||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||
title=self.title, authors=self.authors, get_best_cover=True,
|
||||
title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
|
||||
identifiers=self.identifiers, timeout=self.timeout)
|
||||
else:
|
||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||
@ -72,7 +73,7 @@ def process_result(log, result):
|
||||
return (plugin, width, height, fmt, data)
|
||||
|
||||
def run_download(log, results, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
'''
|
||||
Run the cover download, putting results into the queue :param:`results`.
|
||||
|
||||
@ -89,7 +90,7 @@ def run_download(log, results, abort,
|
||||
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
|
||||
|
||||
rq = Queue()
|
||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
|
||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
|
||||
in plugins]
|
||||
for w in workers:
|
||||
w.start()
|
||||
@ -163,7 +164,7 @@ def download_cover(log,
|
||||
abort = Event()
|
||||
|
||||
run_download(log, rq, abort, title=title, authors=authors,
|
||||
identifiers=identifiers, timeout=timeout)
|
||||
identifiers=identifiers, timeout=timeout, get_best_cover=True)
|
||||
|
||||
results = []
|
||||
|
||||
|
@ -39,39 +39,11 @@ class GoogleImages(Source):
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
if not title:
|
||||
return
|
||||
from threading import Thread
|
||||
import time
|
||||
timeout = max(60, timeout) # Needs at least a minute
|
||||
title = ' '.join(self.get_title_tokens(title))
|
||||
author = ' '.join(self.get_author_tokens(authors))
|
||||
urls = self.get_image_urls(title, author, log, abort, timeout)
|
||||
if not urls:
|
||||
log('No images found in Google for, title: %r and authors: %r'%(title, author))
|
||||
return
|
||||
urls = urls[:self.prefs['max_covers']]
|
||||
if get_best_cover:
|
||||
urls = urls[:1]
|
||||
workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
|
||||
for w in workers:
|
||||
w.daemon = True
|
||||
w.start()
|
||||
alive = True
|
||||
start_time = time.time()
|
||||
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
||||
alive = False
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
alive = True
|
||||
break
|
||||
abort.wait(0.1)
|
||||
|
||||
def download_image(self, url, timeout, log, result_queue):
|
||||
try:
|
||||
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
result_queue.put((self, ans))
|
||||
log('Downloaded cover from: %s'%url)
|
||||
except Exception:
|
||||
self.log.exception('Failed to download cover from: %r'%url)
|
||||
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||
|
||||
def get_image_urls(self, title, author, log, abort, timeout):
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
|
@ -262,6 +262,35 @@ def from_links(container):
|
||||
toc.remove(child)
|
||||
return toc
|
||||
|
||||
def find_text(node):
|
||||
LIMIT = 200
|
||||
pat = re.compile(r'\s+')
|
||||
for child in node:
|
||||
if isinstance(child, etree._Element):
|
||||
text = xml2text(child).strip()
|
||||
text = pat.sub(' ', text)
|
||||
if len(text) < 1:
|
||||
continue
|
||||
if len(text) > LIMIT:
|
||||
# Look for less text in a child of this node, recursively
|
||||
ntext = find_text(child)
|
||||
return ntext or (text[:LIMIT] + '...')
|
||||
else:
|
||||
return text
|
||||
|
||||
def from_files(container):
|
||||
toc = TOC()
|
||||
for spinepath in container.spine_items:
|
||||
name = container.abspath_to_name(spinepath)
|
||||
root = container.parsed(name)
|
||||
body = XPath('//h:body')(root)
|
||||
if not body:
|
||||
continue
|
||||
text = find_text(body[0])
|
||||
if text:
|
||||
toc.add(text, name)
|
||||
return toc
|
||||
|
||||
def add_id(container, name, loc):
|
||||
root = container.parsed(name)
|
||||
body = root.xpath('//*[local-name()="body"]')[0]
|
||||
|
@ -333,8 +333,8 @@ class OEBReader(object):
|
||||
guide = self.oeb.guide
|
||||
manifest = self.oeb.manifest
|
||||
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
||||
href = elem.get('href')
|
||||
path = urlnormalize(urldefrag(href)[0])
|
||||
ref_href = elem.get('href')
|
||||
path = urlnormalize(urldefrag(ref_href)[0])
|
||||
if path not in manifest.hrefs:
|
||||
corrected_href = None
|
||||
for href in manifest.hrefs:
|
||||
@ -342,12 +342,12 @@ class OEBReader(object):
|
||||
corrected_href = href
|
||||
break
|
||||
if corrected_href is None:
|
||||
self.logger.warn(u'Guide reference %r not found' % href)
|
||||
self.logger.warn(u'Guide reference %r not found' % ref_href)
|
||||
continue
|
||||
href = corrected_href
|
||||
ref_href = corrected_href
|
||||
typ = elem.get('type')
|
||||
if typ not in guide:
|
||||
guide.add(typ, elem.get('title'), href)
|
||||
guide.add(typ, elem.get('title'), ref_href)
|
||||
|
||||
def _find_ncx(self, opf):
|
||||
result = xpath(opf, '/o2:package/o2:spine/@toc')
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -18,13 +18,26 @@ from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonDEKindleStore(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'charhale0a-21'}
|
||||
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
|
||||
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
|
||||
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
|
||||
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
|
||||
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
|
||||
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
|
||||
# when modified.
|
||||
author_article = 'von '
|
||||
|
||||
and_word = ' und '
|
||||
|
||||
# ---- Copy from here to end
|
||||
|
||||
class AmazonEUBase(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin):
|
||||
def get_details(self, search_result, timeout):
|
||||
pass
|
||||
|
||||
class AmazonDEKindleStore(AmazonEUBase):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'charhale0a-21'}
|
||||
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
|
||||
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
|
||||
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
|
||||
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
|
||||
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'von '
|
||||
|
||||
and_word = ' und '
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -18,12 +18,25 @@ from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonESKindleStore(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
|
||||
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
|
||||
# when modified.
|
||||
aff_id = {'tag': 'charhale09-21'}
|
||||
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
|
||||
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
|
||||
'&linkCode=ur2&camp=3626&creative=24790')
|
||||
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'de '
|
||||
|
||||
and_word = ' y '
|
||||
|
||||
# ---- Copy from here to end
|
||||
|
||||
class AmazonEUBase(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin):
|
||||
def get_details(self, search_result, timeout):
|
||||
pass
|
||||
|
||||
class AmazonESKindleStore(AmazonEUBase):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'charhale09-21'}
|
||||
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
|
||||
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
|
||||
'&linkCode=ur2&camp=3626&creative=24790')
|
||||
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'de '
|
||||
|
||||
and_word = ' y '
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -18,13 +18,22 @@ from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonFRKindleStore(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'charhale-21'}
|
||||
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
|
||||
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
|
||||
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
|
||||
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
|
||||
# when modified.
|
||||
author_article = 'de '
|
||||
|
||||
and_word = ' et '
|
||||
|
||||
# ---- Copy from here to end
|
||||
|
||||
class AmazonEUBase(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin):
|
||||
def get_details(self, search_result, timeout):
|
||||
pass
|
||||
|
||||
class AmazonFRKindleStore(AmazonEUBase):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'charhale-21'}
|
||||
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
|
||||
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
|
||||
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'de '
|
||||
|
||||
and_word = ' et '
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -18,12 +18,25 @@ from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonITKindleStore(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
|
||||
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
|
||||
# when modified.
|
||||
aff_id = {'tag': 'httpcharles07-21'}
|
||||
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
|
||||
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=3370&creative=23322')
|
||||
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'di '
|
||||
|
||||
and_word = ' e '
|
||||
|
||||
# ---- Copy from here to end
|
||||
|
||||
class AmazonEUBase(StorePlugin):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin):
|
||||
|
||||
def get_details(self, search_result, timeout):
|
||||
pass
|
||||
|
||||
class AmazonITKindleStore(AmazonEUBase):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
aff_id = {'tag': 'httpcharles07-21'}
|
||||
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
|
||||
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=3370&creative=23322')
|
||||
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'di '
|
||||
|
||||
and_word = ' e '
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
|
||||
# This class is copy/pasted from amason_uk_plugin. Do not modify it in any
|
||||
# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins
|
||||
# when modified.
|
||||
|
||||
class AmazonEUBase(StorePlugin):
|
||||
class AmazonUKKindleStore(StorePlugin):
|
||||
aff_id = {'tag': 'calcharles-21'}
|
||||
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
|
||||
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=19450')
|
||||
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=6738')
|
||||
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'by '
|
||||
|
||||
and_word = ' and '
|
||||
|
||||
# This code is copy/pasted from from here to the other amazon EU. Do not
|
||||
# modify it in any other amazon EU plugin. Be sure to paste it into all
|
||||
# other amazon EU plugins when modified.
|
||||
|
||||
# ---- Copy from here to end
|
||||
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin):
|
||||
def get_details(self, search_result, timeout):
|
||||
pass
|
||||
|
||||
class AmazonUKKindleStore(AmazonEUBase):
|
||||
aff_id = {'tag': 'calcharles-21'}
|
||||
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
|
||||
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=19450')
|
||||
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=6738')
|
||||
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
author_article = 'by '
|
||||
|
||||
and_word = ' and '
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 1 # Needed for dynamic plugin loading
|
||||
store_version = 2 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -24,8 +24,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://clkuk.tradedoubler.com/click?p=51196&a=1951604&g=19333484'
|
||||
url_details = 'http://clkuk.tradedoubler.com/click?p(51196)a(1951604)g(16460516)url({0})'
|
||||
url = 'http://www.awin1.com/awclick.php?mid=3787&id=120917'
|
||||
url_details = 'http://www.awin1.com/cread.php?awinmid=3787&awinaffid=120917&clickref=&p={0}'
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
if detail_item:
|
||||
|
@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,
|
||||
|
||||
from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
|
||||
from calibre.ebooks.oeb.polish.toc import (
|
||||
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links)
|
||||
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
|
||||
from calibre.gui2 import Application, error_dialog, gprefs
|
||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||
from calibre.gui2.toc.location import ItemEdit
|
||||
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
|
||||
go_to_root = pyqtSignal()
|
||||
create_from_xpath = pyqtSignal(object)
|
||||
create_from_links = pyqtSignal()
|
||||
create_from_files = pyqtSignal()
|
||||
flatten_toc = pyqtSignal()
|
||||
|
||||
def __init__(self, parent):
|
||||
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
|
||||
)))
|
||||
l.addWidget(b)
|
||||
|
||||
self.cfb = b = QPushButton(_('Generate ToC from &files'))
|
||||
b.clicked.connect(self.create_from_files)
|
||||
b.setToolTip(textwrap.fill(_(
|
||||
'Generate a Table of Contents from individual files in the book.'
|
||||
' Each entry in the ToC will point to the start of the file, the'
|
||||
' text of the entry will be the "first line" of text from the file.'
|
||||
)))
|
||||
l.addWidget(b)
|
||||
|
||||
self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
|
||||
b.clicked.connect(self.create_from_user_xpath)
|
||||
b.setToolTip(textwrap.fill(_(
|
||||
@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{
|
||||
i.add_new_item.connect(self.add_new_item)
|
||||
i.create_from_xpath.connect(self.create_from_xpath)
|
||||
i.create_from_links.connect(self.create_from_links)
|
||||
i.create_from_files.connect(self.create_from_files)
|
||||
i.flatten_item.connect(self.flatten_item)
|
||||
i.flatten_toc.connect(self.flatten_toc)
|
||||
i.go_to_root.connect(self.go_to_root)
|
||||
@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{
|
||||
_('No links were found that could be added to the Table of Contents.'), show=True)
|
||||
self.insert_toc_fragment(toc)
|
||||
|
||||
def create_from_files(self):
|
||||
toc = from_files(self.ebook)
|
||||
if len(toc) == 0:
|
||||
return error_dialog(self, _('No items found'),
|
||||
_('No files were found that could be added to the Table of Contents.'), show=True)
|
||||
self.insert_toc_fragment(toc)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
class TOCEditor(QDialog): # {{{
|
||||
|
@ -54,7 +54,7 @@ def get_parser(usage):
|
||||
def get_db(dbpath, options):
|
||||
global do_notify
|
||||
if options.library_path is not None:
|
||||
dbpath = options.library_path
|
||||
dbpath = os.path.expanduser(options.library_path)
|
||||
if dbpath is None:
|
||||
raise ValueError('No saved library path, either run the GUI or use the'
|
||||
' --with-library option')
|
||||
|