mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
sync with Kovid's branch
This commit is contained in:
commit
f6fee32395
@ -20,6 +20,58 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.9.26
|
||||||
|
date: 2013-04-05
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
|
||||||
|
|
||||||
|
- title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
|
||||||
|
tickets: [1163520]
|
||||||
|
|
||||||
|
- title: "ToC Editor: Add buttons to indent/unindent the current entry"
|
||||||
|
|
||||||
|
- title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
|
||||||
|
|
||||||
|
- title: "Column icons: Allow use of wide images as column icons"
|
||||||
|
|
||||||
|
- title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
|
||||||
|
tickets: [1162293,1163115]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "PDF Output: Fix generating page numbers causing links to not work."
|
||||||
|
tickets: [1162573]
|
||||||
|
|
||||||
|
- title: "Wrong filename output in error message when 'Guide reference not found'"
|
||||||
|
tickets: [1163659]
|
||||||
|
|
||||||
|
- title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
|
||||||
|
|
||||||
|
- title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
|
||||||
|
tickets: [1162054]
|
||||||
|
|
||||||
|
- title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
|
||||||
|
tickets: [1161999]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Financial Times UK
|
||||||
|
- Sing Tao Daily
|
||||||
|
- Apple Daily
|
||||||
|
- A List Apart
|
||||||
|
- Business Week
|
||||||
|
- Harpers printed edition
|
||||||
|
- Harvard Business Review
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: AM730
|
||||||
|
author: Eddie Lau
|
||||||
|
|
||||||
|
- title: Arret sur images
|
||||||
|
author: Francois D
|
||||||
|
|
||||||
|
- title: Diario de Noticias
|
||||||
|
author: Jose Pinto
|
||||||
|
|
||||||
- version: 0.9.25
|
- version: 0.9.25
|
||||||
date: 2013-03-29
|
date: 2013-03-29
|
||||||
|
|
||||||
|
290
recipes/am730.recipe
Normal file
290
recipes/am730.recipe
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
# vim:fileencoding=UTF-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Eddie Lau'
|
||||||
|
__Date__ = ''
|
||||||
|
__HiResImg__ = True
|
||||||
|
|
||||||
|
'''
|
||||||
|
Change Log:
|
||||||
|
2013/03/30 -- first version
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import (__appname__, force_unicode, strftime)
|
||||||
|
from calibre.utils.date import now as nowf
|
||||||
|
import os, datetime, re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from contextlib import nested
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
|
class AppleDaily(BasicNewsRecipe):
|
||||||
|
title = u'AM730'
|
||||||
|
__author__ = 'Eddie Lau'
|
||||||
|
publisher = 'AM730'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = False
|
||||||
|
language = 'zh'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
auto_cleanup = False
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
description = 'http://www.am730.com.hk'
|
||||||
|
category = 'Chinese, News, Hong Kong'
|
||||||
|
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
||||||
|
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
|
||||||
|
dict(name='div', attrs={'class':'thecontent wordsnap'}),
|
||||||
|
dict(name='a', attrs={'class':'lightboximg'})]
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
|
||||||
|
dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
|
||||||
|
|
||||||
|
def get_dtlocal(self):
|
||||||
|
dt_utc = datetime.datetime.utcnow()
|
||||||
|
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||||
|
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchformatteddate(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def get_fetchyear(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[0:4]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
|
def get_fetchmonth(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[4:6]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%m")
|
||||||
|
|
||||||
|
def get_fetchday(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[6:8]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
# Note: does not work with custom date given by __Date__
|
||||||
|
def get_weekday(self):
|
||||||
|
return self.get_dtlocal().weekday()
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup('http://www.am730.com.hk/')
|
||||||
|
ul = soup.find(attrs={'class':'nav-section'})
|
||||||
|
sectionList = []
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
|
||||||
|
title = li.find('a').get('title', False).strip()
|
||||||
|
sectionList.append((title, a))
|
||||||
|
for title, url in sectionList:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_section(self, url):
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
|
||||||
|
current_articles = []
|
||||||
|
for item in items:
|
||||||
|
a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
|
||||||
|
articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
|
||||||
|
current_articles.append({'title': title, 'url': articlelink, 'description': description})
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
multia = soup.findAll('a')
|
||||||
|
for a in multia:
|
||||||
|
if not (a == None):
|
||||||
|
image = a.find('img')
|
||||||
|
if not (image == None):
|
||||||
|
if __HiResImg__:
|
||||||
|
image['src'] = image.get('src').replace('/thumbs/', '/')
|
||||||
|
caption = image.get('alt')
|
||||||
|
tag = Tag(soup, "photo", [])
|
||||||
|
tag2 = Tag(soup, "photocaption", [])
|
||||||
|
tag.insert(0, image)
|
||||||
|
if not caption == None:
|
||||||
|
tag2.insert(0, caption)
|
||||||
|
tag.insert(1, tag2)
|
||||||
|
a.replaceWith(tag)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def create_opf(self, feeds, dir=None):
|
||||||
|
if dir is None:
|
||||||
|
dir = self.output_dir
|
||||||
|
title = self.short_title()
|
||||||
|
if self.output_profile.periodical_date_in_title:
|
||||||
|
title += strftime(self.timefmt)
|
||||||
|
mi = MetaInformation(title, [__appname__])
|
||||||
|
mi.publisher = __appname__
|
||||||
|
mi.author_sort = __appname__
|
||||||
|
if self.publication_type:
|
||||||
|
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||||
|
mi.timestamp = nowf()
|
||||||
|
article_titles, aseen = [], set()
|
||||||
|
for f in feeds:
|
||||||
|
for a in f:
|
||||||
|
if a.title and a.title not in aseen:
|
||||||
|
aseen.add(a.title)
|
||||||
|
article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||||
|
|
||||||
|
mi.comments = self.description
|
||||||
|
if not isinstance(mi.comments, unicode):
|
||||||
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
|
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||||
|
'\n\n'.join(article_titles))
|
||||||
|
|
||||||
|
language = canonicalize_lang(self.language)
|
||||||
|
if language is not None:
|
||||||
|
mi.language = language
|
||||||
|
# This one affects the pub date shown in kindle title
|
||||||
|
#mi.pubdate = nowf()
|
||||||
|
# now appears to need the time field to be > 12.00noon as well
|
||||||
|
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||||
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
|
||||||
|
opf = OPFCreator(dir, mi)
|
||||||
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
|
# Get cover
|
||||||
|
cpath = getattr(self, 'cover_path', None)
|
||||||
|
if cpath is None:
|
||||||
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
|
if self.default_cover(pf):
|
||||||
|
cpath = pf.name
|
||||||
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
|
opf.cover = cpath
|
||||||
|
manifest.append(cpath)
|
||||||
|
|
||||||
|
# Get masthead
|
||||||
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
|
manifest.append(mpath)
|
||||||
|
|
||||||
|
opf.create_manifest_from_files_in(manifest)
|
||||||
|
for mani in opf.manifest:
|
||||||
|
if mani.path.endswith('.ncx'):
|
||||||
|
mani.id = 'ncx'
|
||||||
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
|
mani.id = 'masthead-image'
|
||||||
|
|
||||||
|
entries = ['index.html']
|
||||||
|
toc = TOC(base_path=dir)
|
||||||
|
self.play_order_counter = 0
|
||||||
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
|
||||||
|
def feed_index(num, parent):
|
||||||
|
f = feeds[num]
|
||||||
|
for j, a in enumerate(f):
|
||||||
|
if getattr(a, 'downloaded', False):
|
||||||
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
|
auth = a.author
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = a.text_summary
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
else:
|
||||||
|
desc = self.description_limiter(desc)
|
||||||
|
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||||
|
entries.append('%sindex.html'%adir)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
parent.add_item('%sindex.html'%adir, None,
|
||||||
|
a.title if a.title else _('Untitled Article'),
|
||||||
|
play_order=po, author=auth,
|
||||||
|
description=desc, toc_thumbnail=tt)
|
||||||
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
|
for sp in a.sub_pages:
|
||||||
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
|
relp = sp[len(prefix):]
|
||||||
|
entries.append(relp.replace(os.sep, '/'))
|
||||||
|
last = sp
|
||||||
|
|
||||||
|
if os.path.exists(last):
|
||||||
|
with open(last, 'rb') as fi:
|
||||||
|
src = fi.read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(src)
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None:
|
||||||
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
|
not self.has_single_feed,
|
||||||
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
|
center=self.center_navbar)
|
||||||
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
|
with open(last, 'wb') as fi:
|
||||||
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
if len(feeds) == 0:
|
||||||
|
raise Exception('All feeds are empty, aborting.')
|
||||||
|
|
||||||
|
if len(feeds) > 1:
|
||||||
|
for i, f in enumerate(feeds):
|
||||||
|
entries.append('feed_%d/index.html'%i)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
auth = getattr(f, 'author', None)
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = getattr(f, 'description', None)
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries.append('feed_%d/index.html'%0)
|
||||||
|
feed_index(0, toc)
|
||||||
|
|
||||||
|
for i, p in enumerate(entries):
|
||||||
|
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||||
|
opf.create_spine(entries)
|
||||||
|
opf.set_toc(toc)
|
||||||
|
|
||||||
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
@ -1,161 +1,275 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# vim:fileencoding=UTF-8
|
||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Eddie Lau'
|
||||||
|
__Date__ = ''
|
||||||
|
|
||||||
|
from calibre import (__appname__, force_unicode, strftime)
|
||||||
|
from calibre.utils.date import now as nowf
|
||||||
|
import os, datetime, re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from contextlib import nested
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
class AppleDaily(BasicNewsRecipe):
|
class AppleDaily(BasicNewsRecipe):
|
||||||
|
title = u'蘋果日報 (香港)'
|
||||||
title = u'蘋果日報'
|
__author__ = 'Eddie Lau'
|
||||||
__author__ = u'蘋果日報'
|
publisher = '蘋果日報'
|
||||||
__publisher__ = u'蘋果日報'
|
oldest_article = 1
|
||||||
description = u'蘋果日報'
|
max_articles_per_feed = 100
|
||||||
masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
|
auto_cleanup = False
|
||||||
language = 'zh_TW'
|
language = 'zh'
|
||||||
encoding = 'UTF-8'
|
encoding = 'utf-8'
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
auto_cleanup = False
|
||||||
needs_subscription = False
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_tags_before = dict(name=['ul', 'h1'])
|
use_embedded_content = False
|
||||||
remove_tags_after = dict(name='form')
|
|
||||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
|
|
||||||
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
|
|
||||||
dict(name=['script', 'noscript', 'style', 'form'])]
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '''
|
description = 'http://hkm.appledaily.com/'
|
||||||
@font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
|
category = 'Chinese, News, Hong Kong'
|
||||||
body {margin-right: 8pt; font-family: 'uming', serif;}
|
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
|
||||||
h1 {font-family: 'uming', serif, sans-serif}
|
|
||||||
'''
|
|
||||||
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||||
(re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
|
keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
|
||||||
lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
|
remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
|
||||||
]
|
dict(name='p', attrs={'class':'next'})]
|
||||||
|
|
||||||
|
def get_dtlocal(self):
|
||||||
|
dt_utc = datetime.datetime.utcnow()
|
||||||
|
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||||
|
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchformatteddate(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def get_fetchyear(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[0:4]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
|
def get_fetchmonth(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[4:6]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%m")
|
||||||
|
|
||||||
|
def get_fetchday(self):
|
||||||
|
if __Date__ <> '':
|
||||||
|
return __Date__[6:8]
|
||||||
|
else:
|
||||||
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
# Note: does not work with custom date given by __Date__
|
||||||
|
def get_weekday(self):
|
||||||
|
return self.get_dtlocal().weekday()
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
|
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||||
|
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
#def get_browser(self):
|
try:
|
||||||
#br = BasicNewsRecipe.get_browser(self)
|
br.open(cover)
|
||||||
#if self.username is not None and self.password is not None:
|
except:
|
||||||
# br.open('http://www.nytimes.com/auth/login')
|
cover = None
|
||||||
# br.select_form(name='login')
|
return cover
|
||||||
# br['USERID'] = self.username
|
|
||||||
# br['PASSWORD'] = self.password
|
|
||||||
# br.submit()
|
|
||||||
#return br
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
#process all the images
|
|
||||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
|
||||||
iurl = tag['src']
|
|
||||||
#print 'checking image: ' + iurl
|
|
||||||
|
|
||||||
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
|
|
||||||
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
|
|
||||||
|
|
||||||
m = p.search(iurl)
|
|
||||||
|
|
||||||
if m is not None:
|
|
||||||
iurl = 'http://' + m.group('server') + '/' + m.group('path')
|
|
||||||
#print 'working! new url: ' + iurl
|
|
||||||
tag['src'] = iurl
|
|
||||||
#else:
|
|
||||||
#print 'not good'
|
|
||||||
|
|
||||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
|
|
||||||
iurl = tag['href']
|
|
||||||
#print 'checking image: ' + iurl
|
|
||||||
|
|
||||||
#img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
|
|
||||||
p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
|
|
||||||
|
|
||||||
m = p.search(iurl)
|
|
||||||
|
|
||||||
if m is not None:
|
|
||||||
iurl = 'http://' + m.group('server') + '/' + m.group('path')
|
|
||||||
#print 'working! new url: ' + iurl
|
|
||||||
tag['href'] = iurl
|
|
||||||
#else:
|
|
||||||
#print 'not good'
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
base = 'http://news.hotpot.hk/fruit'
|
feeds = []
|
||||||
soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
|
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||||
|
ul = soup.find(attrs={'class':'menu'})
|
||||||
|
sectionList = []
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
|
||||||
|
title = li.find('a', text=True).strip()
|
||||||
|
if not title == u'動新聞':
|
||||||
|
sectionList.append((title, a))
|
||||||
|
for title, url in sectionList:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
#def feed_title(div):
|
def parse_section(self, url):
|
||||||
# return ''.join(div.findAll(text=True, recursive=False)).strip()
|
soup = self.index_to_soup(url)
|
||||||
|
ul = soup.find(attrs={'class':'list'})
|
||||||
|
current_articles = []
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
a = li.find('a', href=True)
|
||||||
|
title = li.find('p', text=True).strip()
|
||||||
|
if a is not None:
|
||||||
|
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
|
||||||
|
pass
|
||||||
|
return current_articles
|
||||||
|
|
||||||
articles = {}
|
def create_opf(self, feeds, dir=None):
|
||||||
key = None
|
if dir is None:
|
||||||
ans = []
|
dir = self.output_dir
|
||||||
for div in soup.findAll('li'):
|
title = self.short_title()
|
||||||
key = div.find(text=True, recursive=True);
|
if self.output_profile.periodical_date_in_title:
|
||||||
#if key == u'豪情':
|
title += strftime(self.timefmt)
|
||||||
# continue;
|
mi = MetaInformation(title, [__appname__])
|
||||||
|
mi.publisher = __appname__
|
||||||
|
mi.author_sort = __appname__
|
||||||
|
if self.publication_type:
|
||||||
|
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||||
|
mi.timestamp = nowf()
|
||||||
|
article_titles, aseen = [], set()
|
||||||
|
for f in feeds:
|
||||||
|
for a in f:
|
||||||
|
if a.title and a.title not in aseen:
|
||||||
|
aseen.add(a.title)
|
||||||
|
article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||||
|
|
||||||
print 'section=' + key
|
mi.comments = self.description
|
||||||
|
if not isinstance(mi.comments, unicode):
|
||||||
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
|
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||||
|
'\n\n'.join(article_titles))
|
||||||
|
|
||||||
articles[key] = []
|
language = canonicalize_lang(self.language)
|
||||||
|
if language is not None:
|
||||||
|
mi.language = language
|
||||||
|
# This one affects the pub date shown in kindle title
|
||||||
|
#mi.pubdate = nowf()
|
||||||
|
# now appears to need the time field to be > 12.00noon as well
|
||||||
|
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||||
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
|
||||||
ans.append(key)
|
opf = OPFCreator(dir, mi)
|
||||||
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
a = div.find('a', href=True)
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
if not a:
|
# Get cover
|
||||||
continue
|
cpath = getattr(self, 'cover_path', None)
|
||||||
|
if cpath is None:
|
||||||
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
|
if self.default_cover(pf):
|
||||||
|
cpath = pf.name
|
||||||
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
|
opf.cover = cpath
|
||||||
|
manifest.append(cpath)
|
||||||
|
|
||||||
url = base + '/' + a['href']
|
# Get masthead
|
||||||
print 'url=' + url
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
|
manifest.append(mpath)
|
||||||
|
|
||||||
if not articles.has_key(key):
|
opf.create_manifest_from_files_in(manifest)
|
||||||
articles[key] = []
|
for mani in opf.manifest:
|
||||||
else:
|
if mani.path.endswith('.ncx'):
|
||||||
# sub page
|
mani.id = 'ncx'
|
||||||
subSoup = self.index_to_soup(url)
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
|
mani.id = 'masthead-image'
|
||||||
|
|
||||||
for subDiv in subSoup.findAll('li'):
|
entries = ['index.html']
|
||||||
subA = subDiv.find('a', href=True)
|
toc = TOC(base_path=dir)
|
||||||
subTitle = subDiv.find(text=True, recursive=True)
|
self.play_order_counter = 0
|
||||||
subUrl = base + '/' + subA['href']
|
self.play_order_map = {}
|
||||||
|
|
||||||
print 'subUrl' + subUrl
|
|
||||||
|
|
||||||
articles[key].append(
|
|
||||||
dict(title=subTitle,
|
|
||||||
url=subUrl,
|
|
||||||
date='',
|
|
||||||
description='',
|
|
||||||
content=''))
|
|
||||||
|
|
||||||
|
|
||||||
# elif div['class'] in ['story', 'story headline']:
|
def feed_index(num, parent):
|
||||||
# a = div.find('a', href=True)
|
f = feeds[num]
|
||||||
# if not a:
|
for j, a in enumerate(f):
|
||||||
# continue
|
if getattr(a, 'downloaded', False):
|
||||||
# url = re.sub(r'\?.*', '', a['href'])
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
# url += '?pagewanted=all'
|
auth = a.author
|
||||||
# title = self.tag_to_string(a, use_alt=True).strip()
|
if not auth:
|
||||||
# description = ''
|
auth = None
|
||||||
# pubdate = strftime('%a, %d %b')
|
desc = a.text_summary
|
||||||
# summary = div.find(True, attrs={'class':'summary'})
|
if not desc:
|
||||||
# if summary:
|
desc = None
|
||||||
# description = self.tag_to_string(summary, use_alt=False)
|
else:
|
||||||
#
|
desc = self.description_limiter(desc)
|
||||||
# feed = key if key is not None else 'Uncategorized'
|
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||||
# if not articles.has_key(feed):
|
entries.append('%sindex.html'%adir)
|
||||||
# articles[feed] = []
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
# if not 'podcasts' in url:
|
if po is None:
|
||||||
# articles[feed].append(
|
self.play_order_counter += 1
|
||||||
# dict(title=title, url=url, date=pubdate,
|
po = self.play_order_counter
|
||||||
# description=description,
|
parent.add_item('%sindex.html'%adir, None,
|
||||||
# content=''))
|
a.title if a.title else _('Untitled Article'),
|
||||||
# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
play_order=po, author=auth,
|
||||||
ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
|
description=desc, toc_thumbnail=tt)
|
||||||
return ans
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
|
for sp in a.sub_pages:
|
||||||
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
|
relp = sp[len(prefix):]
|
||||||
|
entries.append(relp.replace(os.sep, '/'))
|
||||||
|
last = sp
|
||||||
|
|
||||||
|
if os.path.exists(last):
|
||||||
|
with open(last, 'rb') as fi:
|
||||||
|
src = fi.read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(src)
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None:
|
||||||
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
|
not self.has_single_feed,
|
||||||
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
|
center=self.center_navbar)
|
||||||
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
|
with open(last, 'wb') as fi:
|
||||||
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
if len(feeds) == 0:
|
||||||
|
raise Exception('All feeds are empty, aborting.')
|
||||||
|
|
||||||
|
if len(feeds) > 1:
|
||||||
|
for i, f in enumerate(feeds):
|
||||||
|
entries.append('feed_%d/index.html'%i)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
auth = getattr(f, 'author', None)
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = getattr(f, 'description', None)
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries.append('feed_%d/index.html'%0)
|
||||||
|
feed_index(0, toc)
|
||||||
|
|
||||||
|
for i, p in enumerate(entries):
|
||||||
|
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||||
|
opf.create_spine(entries)
|
||||||
|
opf.set_toc(toc)
|
||||||
|
|
||||||
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 20
|
||||||
linearize_tables = True
|
linearize_tables = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
compress_news_images = True
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||||
|
|
||||||
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
|||||||
title=self.tag_to_string(div.a).strip()
|
title=self.tag_to_string(div.a).strip()
|
||||||
url=div.a['href']
|
url=div.a['href']
|
||||||
soup0 = self.index_to_soup(url)
|
soup0 = self.index_to_soup(url)
|
||||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||||
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
||||||
|
|
||||||
|
|
||||||
@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
|||||||
title=self.tag_to_string(div.a).strip()
|
title=self.tag_to_string(div.a).strip()
|
||||||
url=div.a['href']
|
url=div.a['href']
|
||||||
soup0 = self.index_to_soup(url)
|
soup0 = self.index_to_soup(url)
|
||||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||||
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
||||||
|
|
||||||
if articles:
|
if articles:
|
||||||
|
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
description = 'The official website of Countryfile Magazine'
|
description = 'The official website of Countryfile Magazine'
|
||||||
# last updated 8/12/12
|
# last updated 19/10/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
compress_news_images = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
#articles_are_obfuscated = True
|
#articles_are_obfuscated = True
|
||||||
#article_already_exists = False
|
#article_already_exists = False
|
||||||
|
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
|
|
||||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||||
|
|
||||||
|
compress_news_images = True
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 1
|
max_articles_per_feed = 12
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
23
recipes/diario_de_noticias.recipe
Normal file
23
recipes/diario_de_noticias.recipe
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# vim:fileencoding=UTF-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
|
||||||
|
title ='Diário de Notícias'
|
||||||
|
oldest_article = 7
|
||||||
|
language = 'pt'
|
||||||
|
__author__ = 'Jose Pinto'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
|
||||||
|
remove_tags = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
|
||||||
|
|
||||||
|
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
|
||||||
|
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
|
||||||
|
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
|
||||||
|
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
|
||||||
|
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
|
||||||
|
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
|
||||||
|
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
|
||||||
|
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
|
||||||
|
]
|
17
recipes/economia.recipe
Normal file
17
recipes/economia.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1314326622(BasicNewsRecipe):
|
||||||
|
title = u'Economia'
|
||||||
|
__author__ = 'Manish Bhattarai'
|
||||||
|
description = 'Economia - Intelligence & Insight for ICAEW Members'
|
||||||
|
language = 'en_GB'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
masthead_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
|
||||||
|
cover_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags_before = dict(id='content')
|
||||||
|
remove_tags_after = dict(id='stars-wrapper')
|
||||||
|
remove_tags = [dict(attrs={'class':['floatR', 'sharethis', 'rating clearfix']})]
|
||||||
|
feeds = [(u'News', u'http://feedity.com/icaew-com/VlNTVFRa.rss'),(u'Business', u'http://feedity.com/icaew-com/VlNTVFtS.rss'),(u'People', u'http://feedity.com/icaew-com/VlNTVFtX.rss'),(u'Opinion', u'http://feedity.com/icaew-com/VlNTVFtW.rss'),(u'Finance', u'http://feedity.com/icaew-com/VlNTVFtV.rss')]
|
@ -8,6 +8,7 @@ import datetime
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
class FinancialTimes(BasicNewsRecipe):
|
class FinancialTimes(BasicNewsRecipe):
|
||||||
title = 'Financial Times (UK)'
|
title = 'Financial Times (UK)'
|
||||||
@ -93,7 +94,7 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
try:
|
try:
|
||||||
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
title = self.tag_to_string(item)
|
title = self.tag_to_string(item)
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
articles.append({
|
articles.append({
|
||||||
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
return articles
|
return articles
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = OrderedDict()
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
#dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||||
self.timefmt = ' [%s]'%dates
|
#self.timefmt = ' [%s]'%dates
|
||||||
wide = soup.find('div',attrs={'class':'wide'})
|
section_title = 'Untitled'
|
||||||
if not wide:
|
|
||||||
return feeds
|
for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
|
||||||
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
|
for section in column. findAll('div', attrs = {'class':'feedBox'}):
|
||||||
if not allsections:
|
sectiontitle=self.tag_to_string(section.find('h4'))
|
||||||
return feeds
|
if '...' not in sectiontitle: section_title=sectiontitle
|
||||||
count = 0
|
for article in section.ul.findAll('li'):
|
||||||
for item in allsections:
|
articles = []
|
||||||
count = count + 1
|
title=self.tag_to_string(article.a)
|
||||||
if self.test and count > 2:
|
url=article.a['href']
|
||||||
return feeds
|
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
|
||||||
fitem = item.h3
|
|
||||||
if not fitem:
|
if articles:
|
||||||
fitem = item.h4
|
if section_title not in feeds:
|
||||||
ftitle = self.tag_to_string(fitem)
|
feeds[section_title] = []
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
feeds[section_title] += articles
|
||||||
feedarts = self.get_artlinks(item.ul)
|
|
||||||
feeds.append((ftitle,feedarts))
|
|
||||||
return feeds
|
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||||
|
return ans
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
items = ['promo-box','promo-title',
|
items = ['promo-box','promo-title',
|
||||||
@ -174,9 +176,6 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
count += 1
|
count += 1
|
||||||
tfile = PersistentTemporaryFile('_fa.html')
|
tfile = PersistentTemporaryFile('_fa.html')
|
||||||
tfile.write(html)
|
tfile.write(html)
|
||||||
tfile.close()
|
tfile.close()
|
||||||
self.temp_files.append(tfile)
|
self.temp_files.append(tfile)
|
||||||
return tfile.name
|
return tfile.name
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
self.browser.open('https://registration.ft.com/registration/login/logout?location=')
|
|
@ -5,7 +5,6 @@ __license__ = 'GPL v3'
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
from calibre.ebooks.BeautifulSoup import Comment
|
|
||||||
|
|
||||||
class forbes_pl(BasicNewsRecipe):
|
class forbes_pl(BasicNewsRecipe):
|
||||||
title = u'Forbes.pl'
|
title = u'Forbes.pl'
|
||||||
@ -26,9 +25,9 @@ class forbes_pl(BasicNewsRecipe):
|
|||||||
pages_count = 4
|
pages_count = 4
|
||||||
keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
|
keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
|
||||||
remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
|
remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
|
||||||
|
|
||||||
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
|
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
|
||||||
|
|
||||||
'''def preprocess_html(self, soup):
|
'''def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body)
|
self.append_page(soup, soup.body)
|
||||||
return soup
|
return soup
|
||||||
@ -51,4 +50,4 @@ class forbes_pl(BasicNewsRecipe):
|
|||||||
appendtag.insert(pos, pagetext)
|
appendtag.insert(pos, pagetext)
|
||||||
if cleanup:
|
if cleanup:
|
||||||
for r in appendtag.findAll(attrs={'class':'paginator'}):
|
for r in appendtag.findAll(attrs={'class':'paginator'}):
|
||||||
r.extract()'''
|
r.extract()'''
|
||||||
|
108
recipes/galaxys_edge.recipe
Normal file
108
recipes/galaxys_edge.recipe
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GalaxyEdge(BasicNewsRecipe):
|
||||||
|
title = u'The Galaxy\'s Edge'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
#keep_only_tags = [dict(id='content')]
|
||||||
|
#remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
|
||||||
|
#dict(id=['email-section', 'right-column', 'printfooter', 'topover',
|
||||||
|
#'slidebox', 'th_footer'])]
|
||||||
|
|
||||||
|
extra_css = '.photo-caption { font-size: smaller }'
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup('http://www.galaxysedge.com/')
|
||||||
|
main = soup.find('table', attrs={'width':'911'})
|
||||||
|
toc = main.find('td', attrs={'width':'225'})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
current_section = None
|
||||||
|
current_articles = []
|
||||||
|
feeds = []
|
||||||
|
c = 0
|
||||||
|
for x in toc.findAll(['p']):
|
||||||
|
c = c+1
|
||||||
|
if c == 5:
|
||||||
|
if current_articles and current_section:
|
||||||
|
feeds.append((current_section, current_articles))
|
||||||
|
edwo = x.find('a')
|
||||||
|
current_section = self.tag_to_string(edwo)
|
||||||
|
current_articles = []
|
||||||
|
self.log('\tFound section:', current_section)
|
||||||
|
title = self.tag_to_string(edwo)
|
||||||
|
url = edwo.get('href', True)
|
||||||
|
url = 'http://www.galaxysedge.com/'+url
|
||||||
|
print(title)
|
||||||
|
print(c)
|
||||||
|
if not url or not title:
|
||||||
|
continue
|
||||||
|
self.log('\t\tFound article:', title)
|
||||||
|
self.log('\t\t\t', url)
|
||||||
|
current_articles.append({'title': title, 'url':url,
|
||||||
|
'description':'', 'date':''})
|
||||||
|
elif c>5:
|
||||||
|
current_section = self.tag_to_string(x.find('b'))
|
||||||
|
current_articles = []
|
||||||
|
self.log('\tFound section:', current_section)
|
||||||
|
for y in x.findAll('a'):
|
||||||
|
title = self.tag_to_string(y)
|
||||||
|
url = y.get('href', True)
|
||||||
|
url = 'http://www.galaxysedge.com/'+url
|
||||||
|
print(title)
|
||||||
|
if not url or not title:
|
||||||
|
continue
|
||||||
|
self.log('\t\tFound article:', title)
|
||||||
|
self.log('\t\t\t', url)
|
||||||
|
current_articles.append({'title': title, 'url':url,
|
||||||
|
'description':'', 'date':''})
|
||||||
|
if current_articles and current_section:
|
||||||
|
feeds.append((current_section, current_articles))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#def preprocess_raw_html(self, raw, url):
|
||||||
|
#return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
|
||||||
|
|
||||||
|
#def postprocess_html(self, soup, first_fetch):
|
||||||
|
#for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||||
|
#t.name = 'div'
|
||||||
|
#return soup
|
||||||
|
|
||||||
|
#def parse_index(self):
|
||||||
|
#today = time.strftime('%Y-%m-%d')
|
||||||
|
#soup = self.index_to_soup(
|
||||||
|
#'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
|
||||||
|
#div = soup.find(id='left-column')
|
||||||
|
#feeds = []
|
||||||
|
#current_section = None
|
||||||
|
#current_articles = []
|
||||||
|
#for x in div.findAll(['h3', 'div']):
|
||||||
|
#if current_section and x.get('class', '') == 'tpaper':
|
||||||
|
#a = x.find('a', href=True)
|
||||||
|
#if a is not None:
|
||||||
|
#current_articles.append({'url':a['href']+'?css=print',
|
||||||
|
#'title':self.tag_to_string(a), 'date': '',
|
||||||
|
#'description':''})
|
||||||
|
#if x.name == 'h3':
|
||||||
|
#if current_section and current_articles:
|
||||||
|
#feeds.append((current_section, current_articles))
|
||||||
|
#current_section = self.tag_to_string(x)
|
||||||
|
#current_articles = []
|
||||||
|
#return feeds
|
||||||
|
|
||||||
|
|
@ -1,6 +1,4 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
from datetime import date, timedelta
|
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
|
|||||||
timefmt = ' [%B %Y]'
|
timefmt = ' [%B %Y]'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
# recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
|
||||||
# ' to log into their website. This is unsupported in calibre, so'
|
|
||||||
# ' this recipe has been disabled. If you would like to see '
|
|
||||||
# ' HBR supported in calibre, contact hbr.org and ask them'
|
|
||||||
# ' to provide a javascript free login method.')
|
|
||||||
|
|
||||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
||||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
||||||
|
|
||||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
INDEX = 'http://hbr.org'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||||
'superNavHeadContainer', 'hbrDisqus',
|
'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
|
||||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
|
|||||||
if url.endswith('/ar/1'):
|
if url.endswith('/ar/1'):
|
||||||
return url[:-1]+'pr'
|
return url[:-1]+'pr'
|
||||||
|
|
||||||
def hbr_get_toc(self):
|
|
||||||
# return self.index_to_soup(open('/t/toc.html').read())
|
|
||||||
|
|
||||||
today = date.today()
|
|
||||||
future = today + timedelta(days=30)
|
|
||||||
past = today - timedelta(days=30)
|
|
||||||
for x in [x.strftime('%y%m') for x in (future, today, past)]:
|
|
||||||
url = self.INDEX + x
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
if (not soup.find(text='Issue Not Found') and not soup.find(
|
|
||||||
text="We're Sorry. There was an error processing your request")
|
|
||||||
and 'Exception: java.io.FileNotFoundException' not in
|
|
||||||
unicode(soup)):
|
|
||||||
return soup
|
|
||||||
raise Exception('Could not find current issue')
|
|
||||||
|
|
||||||
def hbr_parse_toc(self, soup):
|
def hbr_parse_toc(self, soup):
|
||||||
feeds = []
|
feeds = []
|
||||||
current_section = None
|
current_section = None
|
||||||
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):
|
|||||||
|
|
||||||
articles.append({'title':title, 'url':url, 'description':desc,
|
articles.append({'title':title, 'url':url, 'description':desc,
|
||||||
'date':''})
|
'date':''})
|
||||||
|
|
||||||
|
if current_section is not None and articles:
|
||||||
|
feeds.append((current_section, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.hbr_get_toc()
|
soup0 = self.index_to_soup('http://hbr.org/magazine')
|
||||||
# open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
|
||||||
|
#find date & cover
|
||||||
|
self.cover_url=datencover.img['src']
|
||||||
|
dates=self.tag_to_string(datencover.img['alt'])
|
||||||
|
self.timefmt = u' [%s]'%dates
|
||||||
|
soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
|
||||||
feeds = self.hbr_parse_toc(soup)
|
feeds = self.hbr_parse_toc(soup)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
index = 'http://hbr.org/current'
|
|
||||||
soup = self.index_to_soup(index)
|
|
||||||
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
|
|
||||||
|
|
||||||
if link_item:
|
|
||||||
cover_url = 'http://hbr.org' + link_item['src']
|
|
||||||
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,33 +1,23 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AListApart (BasicNewsRecipe):
|
class AListApart (BasicNewsRecipe):
|
||||||
__author__ = u'Marc Busqué <marc@lamarciana.com>'
|
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||||
__url__ = 'http://www.lamarciana.com'
|
__url__ = 'http://www.lamarciana.com'
|
||||||
__version__ = '1.0'
|
__version__ = '2.0'
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
|
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||||
title = u'A List Apart'
|
title = u'A List Apart'
|
||||||
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
|
description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
tags = 'web development, software'
|
tags = 'web development, software'
|
||||||
oldest_article = 120
|
oldest_article = 120
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = u'http://alistapart.com/pix/alalogo.gif'
|
cover_url = u'http://alistapart.com/pix/alalogo.gif'
|
||||||
keep_only_tags = [
|
extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
|
||||||
dict(name='div', attrs={'id': 'content'})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='ul', attrs={'id': 'metastuff'}),
|
|
||||||
dict(name='div', attrs={'class': 'discuss'}),
|
|
||||||
dict(name='div', attrs={'class': 'discuss'}),
|
|
||||||
dict(name='div', attrs={'id': 'learnmore'}),
|
|
||||||
]
|
|
||||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
|
|
||||||
extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'A List Apart', u'http://www.alistapart.com/site/rss'),
|
(u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
|
||||||
]
|
]
|
||||||
|
@ -6,10 +6,10 @@ import time
|
|||||||
|
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
title = u'Metro UK'
|
title = u'Metro UK'
|
||||||
description = 'News as provided by The Metro -UK'
|
description = 'News from The Metro, UK'
|
||||||
#timefmt = ''
|
#timefmt = ''
|
||||||
__author__ = 'fleclerc & Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
#last update 20/1/13
|
#last update 4/4/13
|
||||||
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||||
|
|
||||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
||||||
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
|
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||||
|
compress_news_images = True
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
|
@ -1,64 +1,44 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
newyorker.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.canada.com
|
||||||
|
'''
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||||
|
|
||||||
class NewYorker(BasicNewsRecipe):
|
class NewYorker(BasicNewsRecipe):
|
||||||
title = 'The New Yorker'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'The best of US journalism'
|
|
||||||
oldest_article = 15
|
|
||||||
language = 'en'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
publisher = 'Conde Nast Publications'
|
|
||||||
category = 'news, politics, USA'
|
|
||||||
encoding = 'cp1252'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
|
||||||
extra_css = """
|
|
||||||
body {font-family: "Times New Roman",Times,serif}
|
|
||||||
.articleauthor{color: #9F9F9F;
|
|
||||||
font-family: Arial, sans-serif;
|
|
||||||
font-size: small;
|
|
||||||
text-transform: uppercase}
|
|
||||||
.rubric,.dd,h6#credit{color: #CD0021;
|
|
||||||
font-family: Arial, sans-serif;
|
|
||||||
font-size: small;
|
|
||||||
text-transform: uppercase}
|
|
||||||
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
|
||||||
.dd,h6#credit{color: gray}
|
|
||||||
.c{display: block}
|
|
||||||
.caption,h2#articleintro{font-style: italic}
|
|
||||||
.caption{font-size: small}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment' : description
|
|
||||||
, 'tags' : category
|
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'pagebody'})]
|
title = u'New Yorker Magazine'
|
||||||
remove_tags = [
|
newyorker_prefix = 'http://m.newyorker.com'
|
||||||
dict(name=['meta','iframe','base','link','embed','object'])
|
description = u'Content from the New Yorker website'
|
||||||
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons','social-utils-top','entry-keywords','entry-categories','utilsPrintEmail'] })
|
fp_tag = 'CAN_TC'
|
||||||
,dict(attrs={'id':['show-header','show-footer'] })
|
|
||||||
]
|
|
||||||
remove_tags_after = dict(attrs={'class':'entry-content'})
|
|
||||||
remove_attributes = ['lang']
|
|
||||||
feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
masthead_url = 'http://www.newyorker.com/images/elements/print/newyorker_printlogo.gif'
|
||||||
return url + '?printable=true¤tPage=all'
|
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
compress_news_images = True
|
||||||
return url.strip()
|
compress_news_images_auto_size = 8
|
||||||
|
scale_news_images_to_device = False
|
||||||
|
scale_news_images = (768, 1024)
|
||||||
|
|
||||||
|
url_list = []
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Nick Redding'
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%b %d]'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
extra_css = '''
|
||||||
|
.byline { font-size:xx-small; font-weight: bold;}
|
||||||
|
h3 { margin-bottom: 6px; }
|
||||||
|
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
|
'''
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':re.compile('pagebody')})]
|
||||||
|
|
||||||
|
remove_tags = [{'class':'socialUtils'},{'class':'entry-keywords'}]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
|
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
|
||||||
@ -68,13 +48,233 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
|
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def fixChars(self,string):
|
||||||
for item in soup.findAll(style=True):
|
# Replace lsquo (\x91)
|
||||||
del item['style']
|
fixed = re.sub("\x91","‘",string)
|
||||||
auth = soup.find(attrs={'id':'articleauthor'})
|
# Replace rsquo (\x92)
|
||||||
if auth:
|
fixed = re.sub("\x92","’",fixed)
|
||||||
alink = auth.find('a')
|
# Replace ldquo (\x93)
|
||||||
if alink and alink.string is not None:
|
fixed = re.sub("\x93","“",fixed)
|
||||||
txt = alink.string
|
# Replace rdquo (\x94)
|
||||||
alink.replaceWith(txt)
|
fixed = re.sub("\x94","”",fixed)
|
||||||
|
# Replace ndash (\x96)
|
||||||
|
fixed = re.sub("\x96","–",fixed)
|
||||||
|
# Replace mdash (\x97)
|
||||||
|
fixed = re.sub("\x97","—",fixed)
|
||||||
|
fixed = re.sub("’","’",fixed)
|
||||||
|
return fixed
|
||||||
|
|
||||||
|
def massageNCXText(self, description):
|
||||||
|
# Kindle TOC descriptions won't render certain characters
|
||||||
|
if description:
|
||||||
|
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||||
|
# Replace '&' with '&'
|
||||||
|
massaged = re.sub("&","&", massaged)
|
||||||
|
return self.fixChars(massaged)
|
||||||
|
else:
|
||||||
|
return description
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first:
|
||||||
|
picdiv = soup.find('body').find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||||
|
xtitle = article.text_summary.strip()
|
||||||
|
if len(xtitle) == 0:
|
||||||
|
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||||
|
if desc is not None:
|
||||||
|
article.summary = article.text_summary = desc['content']
|
||||||
|
shortparagraph = ""
|
||||||
|
## try:
|
||||||
|
if len(article.text_summary.strip()) == 0:
|
||||||
|
articlebodies = soup.findAll('div',attrs={'class':'entry-content'})
|
||||||
|
if articlebodies:
|
||||||
|
for articlebody in articlebodies:
|
||||||
|
if articlebody:
|
||||||
|
paras = articlebody.findAll('p')
|
||||||
|
for p in paras:
|
||||||
|
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
|
||||||
|
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
||||||
|
if len(refparagraph) > 0:
|
||||||
|
if len(refparagraph) > 70: #approximately one line of text
|
||||||
|
newpara = shortparagraph + refparagraph
|
||||||
|
article.summary = article.text_summary = newpara.strip()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
shortparagraph = refparagraph + " "
|
||||||
|
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
||||||
|
shortparagraph = shortparagraph + "- "
|
||||||
|
else:
|
||||||
|
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
|
||||||
|
## except:
|
||||||
|
## self.log("Error creating article descriptions")
|
||||||
|
## return
|
||||||
|
|
||||||
|
|
||||||
|
def strip_anchors(self,soup):
|
||||||
|
paras = soup.findAll(True)
|
||||||
|
for para in paras:
|
||||||
|
aTags = para.findAll('a')
|
||||||
|
for a in aTags:
|
||||||
|
if a.img is None:
|
||||||
|
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def preprocess_html(self,soup):
|
||||||
|
dateline = soup.find('div','published')
|
||||||
|
byline = soup.find('div','byline')
|
||||||
|
title = soup.find('h1','entry-title')
|
||||||
|
if title is None:
|
||||||
|
return self.strip_anchors(soup)
|
||||||
|
if byline is None:
|
||||||
|
title.append(dateline)
|
||||||
|
return self.strip_anchors(soup)
|
||||||
|
byline.append(dateline)
|
||||||
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
def load_global_nav(self,soup):
|
||||||
|
seclist = []
|
||||||
|
ul = soup.find('ul',attrs={'id':re.compile('global-nav-menu')})
|
||||||
|
if ul is not None:
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
if li.a is not None:
|
||||||
|
securl = li.a['href']
|
||||||
|
if securl != '/' and securl != '/magazine' and securl.startswith('/'):
|
||||||
|
seclist.append((self.tag_to_string(li.a),self.newyorker_prefix+securl))
|
||||||
|
return seclist
|
||||||
|
|
||||||
|
def exclude_url(self,url):
|
||||||
|
if url in self.url_list:
|
||||||
|
return True
|
||||||
|
if not url.endswith('html'):
|
||||||
|
return True
|
||||||
|
if 'goings-on-about-town-app' in url:
|
||||||
|
return True
|
||||||
|
if 'something-to-be-thankful-for' in url:
|
||||||
|
return True
|
||||||
|
if '/shouts/' in url:
|
||||||
|
return True
|
||||||
|
if 'out-loud' in url:
|
||||||
|
return True
|
||||||
|
if '/rss/' in url:
|
||||||
|
return True
|
||||||
|
if '/video-' in url:
|
||||||
|
return True
|
||||||
|
self.url_list.append(url)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_index_page(self,soup):
|
||||||
|
article_list = []
|
||||||
|
for div in soup.findAll('div',attrs={'class':re.compile('^rotator')}):
|
||||||
|
h2 = div.h2
|
||||||
|
if h2 is not None:
|
||||||
|
a = h2.a
|
||||||
|
if a is not None:
|
||||||
|
url = a['href']
|
||||||
|
if not self.exclude_url(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.newyorker_prefix+url
|
||||||
|
byline = h2.span
|
||||||
|
if byline is not None:
|
||||||
|
author = self.tag_to_string(byline)
|
||||||
|
if author.startswith('by '):
|
||||||
|
author.replace('by ','')
|
||||||
|
byline.extract()
|
||||||
|
else:
|
||||||
|
author = ''
|
||||||
|
if h2.br is not None:
|
||||||
|
h2.br.replaceWith(' ')
|
||||||
|
title = self.tag_to_string(h2)
|
||||||
|
desc = div.find(attrs={'class':['rotator-ad-body','feature-blurb-text']})
|
||||||
|
if desc is not None:
|
||||||
|
description = self.tag_to_string(desc)
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
article_list.append(dict(title=title,url=url,date='',description=description,author=author,content=''))
|
||||||
|
ul = div.find('ul','feature-blurb-links')
|
||||||
|
if ul is not None:
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
a = li.a
|
||||||
|
if a is not None:
|
||||||
|
url = a['href']
|
||||||
|
if not self.exclude_url(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.newyorker_prefix+url
|
||||||
|
if a.br is not None:
|
||||||
|
a.br.replaceWith(' ')
|
||||||
|
title = '>>'+self.tag_to_string(a)
|
||||||
|
article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
|
||||||
|
for h3 in soup.findAll('h3','header'):
|
||||||
|
a = h3.a
|
||||||
|
if a is not None:
|
||||||
|
url = a['href']
|
||||||
|
if not self.exclude_url(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.newyorker_prefix+url
|
||||||
|
byline = h3.span
|
||||||
|
if byline is not None:
|
||||||
|
author = self.tag_to_string(byline)
|
||||||
|
if author.startswith('by '):
|
||||||
|
author = author.replace('by ','')
|
||||||
|
byline.extract()
|
||||||
|
else:
|
||||||
|
author = ''
|
||||||
|
if h3.br is not None:
|
||||||
|
h3.br.replaceWith(' ')
|
||||||
|
title = self.tag_to_string(h3).strip()
|
||||||
|
article_list.append(dict(title=title,url=url,date='',description='',author=author,content=''))
|
||||||
|
return article_list
|
||||||
|
|
||||||
|
def load_global_section(self,securl):
|
||||||
|
article_list = []
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(securl)
|
||||||
|
except:
|
||||||
|
return article_list
|
||||||
|
if '/blogs/' not in securl:
|
||||||
|
return self.load_index_page(soup)
|
||||||
|
for div in soup.findAll('div',attrs={'id':re.compile('^entry')}):
|
||||||
|
h3 = div.h3
|
||||||
|
if h3 is not None:
|
||||||
|
a = h3.a
|
||||||
|
if a is not None:
|
||||||
|
url = a['href']
|
||||||
|
if not self.exclude_url(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.newyorker_prefix+url
|
||||||
|
if h3.br is not None:
|
||||||
|
h3.br.replaceWith(' ')
|
||||||
|
title = self.tag_to_string(h3)
|
||||||
|
article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
|
||||||
|
return article_list
|
||||||
|
|
||||||
|
def filter_ans(self, ans) :
|
||||||
|
total_article_count = 0
|
||||||
|
idx = 0
|
||||||
|
idx_max = len(ans)-1
|
||||||
|
while idx <= idx_max:
|
||||||
|
if True: #self.verbose
|
||||||
|
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||||
|
for article in ans[idx][1]:
|
||||||
|
total_article_count += 1
|
||||||
|
if True: #self.verbose
|
||||||
|
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||||
|
article['url'].replace('http://m.newyorker.com','').encode('cp1252','replace')))
|
||||||
|
idx = idx+1
|
||||||
|
self.log( "Queued %d articles" % total_article_count )
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
ans = []
|
||||||
|
try:
|
||||||
|
soup = self.index_to_soup(self.newyorker_prefix)
|
||||||
|
except:
|
||||||
|
return ans
|
||||||
|
seclist = self.load_global_nav(soup)
|
||||||
|
ans.append(('Front Page',self.load_index_page(soup)))
|
||||||
|
for (sectitle,securl) in seclist:
|
||||||
|
ans.append((sectitle,self.load_global_section(securl)))
|
||||||
|
return self.filter_ans(ans)
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
#auto_cleanup = True
|
#auto_cleanup = True
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
compress_news_images = True
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.nme.com/component/subscribe')
|
soup = self.index_to_soup('http://www.nme.com/component/subscribe')
|
||||||
@ -27,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
br.open_novisit(cov2)
|
br.open_novisit(cov2)
|
||||||
cover_url = str(cov2)
|
cover_url = str(cov2)
|
||||||
except:
|
except:
|
||||||
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||||
|
@ -1,30 +1,30 @@
|
|||||||
|
# vim:fileencoding=UTF-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Eddie Lau'
|
__copyright__ = '2011-2013, Eddie Lau'
|
||||||
|
|
||||||
# data source: normal, mobile
|
# data source: normal, mobile
|
||||||
__Source__ = 'mobile'
|
__Source__ = 'mobile'
|
||||||
# please replace the following "True" with "False". (Default: True)
|
# please replace the following "True" with "False". (Default: True)
|
||||||
__MakePeriodical__ = True
|
__MakePeriodical__ = True
|
||||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||||
__UseChineseTitle__ = False
|
__UseChineseTitle__ = True
|
||||||
# Set it to False if you want to skip images (Default: True)
|
# Set it to False if you want to skip images (Default: True)
|
||||||
__KeepImages__ = True
|
__KeepImages__ = True
|
||||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||||
__IncludeSummary__ = False
|
__IncludeSummary__ = True
|
||||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||||
__IncludeThumbnails__ = True
|
__IncludeThumbnails__ = True
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2013/03/31 -- fix cover retrieval code and heading size, and remove in summary
|
||||||
2011/12/29 -- first version done
|
2011/12/29 -- first version done
|
||||||
TODO:
|
|
||||||
* use alternative source at http://m.singtao.com/index.php
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re
|
import os, datetime, re
|
||||||
from datetime import date
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe):
|
|||||||
title = 'Sing Tao Daily - Hong Kong'
|
title = 'Sing Tao Daily - Hong Kong'
|
||||||
description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
|
description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}'
|
||||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
|
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
|
||||||
if __Source__ == 'normal':
|
if __Source__ == 'normal':
|
||||||
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
|
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
|
||||||
@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe):
|
|||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
#cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29
|
soup = self.index_to_soup('http://m.singtao.com/')
|
||||||
base = 2660
|
cover = soup.find(attrs={'class':'special'}).get('src', False)
|
||||||
todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
|
|
||||||
diff = todaydate - date(2011, 12, 29)
|
|
||||||
base = base + int(diff.total_seconds()/(3600*24))
|
|
||||||
cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
try:
|
try:
|
||||||
br.open(cover)
|
br.open(cover)
|
||||||
except:
|
except:
|
||||||
cover = 'http://singtao.com/images/stlogo.gif'
|
cover = None
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -289,11 +285,11 @@ class STHKRecipe(BasicNewsRecipe):
|
|||||||
# the text may or may not be enclosed in <p></p> tag
|
# the text may or may not be enclosed in <p></p> tag
|
||||||
paras = articlebody.findAll('p')
|
paras = articlebody.findAll('p')
|
||||||
if not paras:
|
if not paras:
|
||||||
paras = articlebody
|
paras = articlebody
|
||||||
textFound = False
|
textFound = False
|
||||||
for p in paras:
|
for p in paras:
|
||||||
if not textFound:
|
if not textFound:
|
||||||
summary_candidate = self.tag_to_string(p).strip()
|
summary_candidate = self.tag_to_string(p).strip().replace(' ', '')
|
||||||
if len(summary_candidate) > 0:
|
if len(summary_candidate) > 0:
|
||||||
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
|
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
|
||||||
article.summary = article.text_summary = summary_candidate
|
article.summary = article.text_summary = summary_candidate
|
||||||
@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
ignore_duplicate_articles = {'title','url'}
|
ignore_duplicate_articles = {'title','url'}
|
||||||
|
compress_news_images = True
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||||
|
@ -36,47 +36,21 @@ class TheOnion(BasicNewsRecipe):
|
|||||||
, 'publisher': publisher
|
, 'publisher': publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
keep_only_tags = [dict(name='article', attrs={'class':'full-article'})]
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h2', attrs={'class':['section_title','title']})
|
|
||||||
,dict(attrs={'class':['main_image','meta','article_photo_lead','article_body']})
|
|
||||||
,dict(attrs={'id':['entries']})
|
|
||||||
]
|
|
||||||
remove_attributes=['lang','rel']
|
|
||||||
remove_tags_after = dict(attrs={'class':['article_body','feature_content']})
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','iframe','base','meta'])
|
dict(name=['nav', 'aside', 'section', 'meta']),
|
||||||
,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
|
{'attrs':{'class':lambda x: x and ('share-tools' in x or 'ad-zone' in x)}},
|
||||||
,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
|
]
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Daily' , u'http://feeds.theonion.com/theonion/daily' )
|
(u'Daily' , u'http://feeds.theonion.com/theonion/daily' )
|
||||||
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
|
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def preprocess_html(self, soup, *args):
|
||||||
artl = BasicNewsRecipe.get_article_url(self, article)
|
for img in soup.findAll('img', attrs={'data-src':True}):
|
||||||
if artl.startswith('http://www.theonion.com/audio/'):
|
if img['data-src']:
|
||||||
artl = None
|
img['src'] = img['data-src']
|
||||||
return artl
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll('a'):
|
|
||||||
limg = item.find('img')
|
|
||||||
if item.string is not None:
|
|
||||||
str = item.string
|
|
||||||
item.replaceWith(str)
|
|
||||||
else:
|
|
||||||
if limg:
|
|
||||||
item.name = 'div'
|
|
||||||
item.attrs = []
|
|
||||||
if not limg.has_key('alt'):
|
|
||||||
limg['alt'] = 'image'
|
|
||||||
else:
|
|
||||||
str = self.tag_to_string(item)
|
|
||||||
item.replaceWith(str)
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
17
recipes/universe_today.recipe
Normal file
17
recipes/universe_today.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class UniverseToday(BasicNewsRecipe):
|
||||||
|
title = u'Universe Today'
|
||||||
|
language = 'en'
|
||||||
|
description = u'Space and astronomy news.'
|
||||||
|
__author__ = 'seird'
|
||||||
|
publisher = u'universetoday.com'
|
||||||
|
category = 'science, astronomy, news, rss'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
auto_cleanup = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
feeds = [(u'Universe Today', u'http://feeds.feedburner.com/universetoday/pYdq')]
|
@ -6,17 +6,62 @@ __license__ = 'GPL v3'
|
|||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||||
|
|
||||||
|
|
||||||
class TimesColonist(BasicNewsRecipe):
|
class TimesColonist(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# Customization -- remove sections you don't want.
|
||||||
|
# If your e-reader is an e-ink Kindle and your output profile is
|
||||||
|
# set properly this recipe will not include images because the
|
||||||
|
# resulting file is too large. If you have one of these and want
|
||||||
|
# images you can set kindle_omit_images = False
|
||||||
|
# and remove sections (typically the e-ink Kindles will
|
||||||
|
# work with about a dozen of these, but your mileage may vary).
|
||||||
|
|
||||||
|
kindle_omit_images = True
|
||||||
|
|
||||||
|
section_list = [
|
||||||
|
('','Web Front Page'),
|
||||||
|
('news/','News Headlines'),
|
||||||
|
('news/b-c/','BC News'),
|
||||||
|
('news/national/','National News'),
|
||||||
|
('news/world/','World News'),
|
||||||
|
('opinion/','Opinion'),
|
||||||
|
('opinion/letters/','Letters'),
|
||||||
|
('business/','Business'),
|
||||||
|
('business/money/','Money'),
|
||||||
|
('business/technology/','Technology'),
|
||||||
|
('business/working/','Working'),
|
||||||
|
('sports/','Sports'),
|
||||||
|
('sports/hockey/','Hockey'),
|
||||||
|
('sports/football/','Football'),
|
||||||
|
('sports/basketball/','Basketball'),
|
||||||
|
('sports/golf/','Golf'),
|
||||||
|
('entertainment/','entertainment'),
|
||||||
|
('entertainment/go/','Go!'),
|
||||||
|
('entertainment/music/','Music'),
|
||||||
|
('entertainment/books/','Books'),
|
||||||
|
('entertainment/Movies/','Movies'),
|
||||||
|
('entertainment/television/','Television'),
|
||||||
|
('life/','Life'),
|
||||||
|
('life/health/','Health'),
|
||||||
|
('life/travel/','Travel'),
|
||||||
|
('life/driving/','Driving'),
|
||||||
|
('life/homes/','Homes'),
|
||||||
|
('life/food-drink/','Food & Drink')
|
||||||
|
]
|
||||||
|
|
||||||
title = u'Victoria Times Colonist'
|
title = u'Victoria Times Colonist'
|
||||||
url_prefix = 'http://www.timescolonist.com'
|
url_prefix = 'http://www.timescolonist.com'
|
||||||
description = u'News from Victoria, BC'
|
description = u'News from Victoria, BC'
|
||||||
fp_tag = 'CAN_TC'
|
fp_tag = 'CAN_TC'
|
||||||
|
|
||||||
|
masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
|
||||||
|
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
'''
|
'''
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
|
keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
|
||||||
remove_tags = [{'class':'comments'},
|
|
||||||
{'id':'photocredit'},
|
|
||||||
dict(name='div', attrs={'class':re.compile('top.controls')}),
|
|
||||||
dict(name='div', attrs={'class':re.compile('social')}),
|
|
||||||
dict(name='div', attrs={'class':re.compile('tools')}),
|
|
||||||
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
|
|
||||||
dict(name='div', attrs={'class':re.compile('window')}),
|
|
||||||
dict(name='div', attrs={'class':re.compile('related.news.element')})]
|
|
||||||
|
|
||||||
|
def __init__(self, options, log, progress_reporter):
|
||||||
|
self.remove_tags = [{'class':'comments'},
|
||||||
|
{'id':'photocredit'},
|
||||||
|
dict(name='div', attrs={'class':re.compile('top.controls')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('^comments')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('social')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('tools')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('window')}),
|
||||||
|
dict(name='div', attrs={'class':re.compile('related.news.element')})]
|
||||||
|
print("PROFILE NAME = "+options.output_profile.short_name)
|
||||||
|
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
|
||||||
|
self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
|
||||||
|
BasicNewsRecipe.__init__(self, options, log, progress_reporter)
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, date
|
from datetime import timedelta, date
|
||||||
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
def preprocess_html(self,soup):
|
def preprocess_html(self,soup):
|
||||||
byline = soup.find('p',attrs={'class':re.compile('ancillary')})
|
byline = soup.find('p',attrs={'class':re.compile('ancillary')})
|
||||||
if byline is not None:
|
if byline is not None:
|
||||||
byline.find('a')
|
|
||||||
authstr = self.tag_to_string(byline,False)
|
authstr = self.tag_to_string(byline,False)
|
||||||
authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
|
authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
|
||||||
authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
|
authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
|
||||||
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
atag = htag.a
|
atag = htag.a
|
||||||
if atag is not None:
|
if atag is not None:
|
||||||
url = atag['href']
|
url = atag['href']
|
||||||
#print("Checking "+url)
|
url = url.strip()
|
||||||
if atag['href'].startswith('/'):
|
# print("Checking >>"+url+'<<\n\r')
|
||||||
url = self.url_prefix+atag['href']
|
if url.startswith('/'):
|
||||||
|
url = self.url_prefix+url
|
||||||
if url in self.url_list:
|
if url in self.url_list:
|
||||||
return
|
return
|
||||||
self.url_list.append(url)
|
self.url_list.append(url)
|
||||||
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
if dtag is not None:
|
if dtag is not None:
|
||||||
description = self.tag_to_string(dtag,False)
|
description = self.tag_to_string(dtag,False)
|
||||||
article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||||
#print(sectitle+title+": description = "+description+" URL="+url)
|
print(sectitle+title+": description = "+description+" URL="+url+'\n\r')
|
||||||
|
|
||||||
def add_section_index(self,ans,securl,sectitle):
|
def add_section_index(self,ans,securl,sectitle):
|
||||||
print("Add section url="+self.url_prefix+'/'+securl)
|
print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
|
||||||
try:
|
try:
|
||||||
soup = self.index_to_soup(self.url_prefix+'/'+securl)
|
soup = self.index_to_soup(self.url_prefix+'/'+securl)
|
||||||
except:
|
except:
|
||||||
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
ans = []
|
ans = []
|
||||||
ans = self.add_section_index(ans,'','Web Front Page')
|
for (url,title) in self.section_list:
|
||||||
ans = self.add_section_index(ans,'news/','News Headlines')
|
ans = self.add_section_index(ans,url,title)
|
||||||
ans = self.add_section_index(ans,'news/b-c/','BC News')
|
|
||||||
ans = self.add_section_index(ans,'news/national/','Natioanl News')
|
|
||||||
ans = self.add_section_index(ans,'news/world/','World News')
|
|
||||||
ans = self.add_section_index(ans,'opinion/','Opinion')
|
|
||||||
ans = self.add_section_index(ans,'opinion/letters/','Letters')
|
|
||||||
ans = self.add_section_index(ans,'business/','Business')
|
|
||||||
ans = self.add_section_index(ans,'business/money/','Money')
|
|
||||||
ans = self.add_section_index(ans,'business/technology/','Technology')
|
|
||||||
ans = self.add_section_index(ans,'business/working/','Working')
|
|
||||||
ans = self.add_section_index(ans,'sports/','Sports')
|
|
||||||
ans = self.add_section_index(ans,'sports/hockey/','Hockey')
|
|
||||||
ans = self.add_section_index(ans,'sports/football/','Football')
|
|
||||||
ans = self.add_section_index(ans,'sports/basketball/','Basketball')
|
|
||||||
ans = self.add_section_index(ans,'sports/golf/','Golf')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/','entertainment')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/go/','Go!')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/music/','Music')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/books/','Books')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/Movies/','movies')
|
|
||||||
ans = self.add_section_index(ans,'entertainment/television/','Television')
|
|
||||||
ans = self.add_section_index(ans,'life/','Life')
|
|
||||||
ans = self.add_section_index(ans,'life/health/','Health')
|
|
||||||
ans = self.add_section_index(ans,'life/travel/','Travel')
|
|
||||||
ans = self.add_section_index(ans,'life/driving/','Driving')
|
|
||||||
ans = self.add_section_index(ans,'life/homes/','Homes')
|
|
||||||
ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -51,8 +50,8 @@ class WysokieObcasyRecipe(BasicNewsRecipe):
|
|||||||
printVerString=articleURL1 + ',' + articleURL2
|
printVerString=articleURL1 + ',' + articleURL2
|
||||||
s= baseURL + subPath + printVerString + '.html'
|
s= baseURL + subPath + printVerString + '.html'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
|
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
|
||||||
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
|
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
return getattr(self, 'cover_url', self.cover_url)
|
||||||
|
@ -357,7 +357,7 @@
|
|||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:table">
|
<xsl:template match="rtf:table">
|
||||||
<xsl:element name="table">
|
<xsl:element name="table">
|
||||||
<xsl:attribute name="id">
|
<xsl:attribute name="id">
|
||||||
<xsl:value-of select="generate-id(.)"/>
|
<xsl:value-of select="generate-id(.)"/>
|
||||||
@ -390,7 +390,6 @@
|
|||||||
|
|
||||||
|
|
||||||
<xsl:output method = "xml"/>
|
<xsl:output method = "xml"/>
|
||||||
|
|
||||||
<xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
|
<xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
|
||||||
|
|
||||||
|
|
||||||
@ -415,13 +414,11 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:page-break">
|
<xsl:template match="rtf:page-break">
|
||||||
<xsl:element name="br">
|
<br style = "page-break-after:always"/>
|
||||||
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
|
|
||||||
</xsl:element>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:hardline-break">
|
<xsl:template match="rtf:hardline-break">
|
||||||
<xsl:element name="br"/>
|
<br/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
||||||
@ -445,7 +442,7 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match = "rtf:field-block">
|
<xsl:template match = "rtf:field-block">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||||
@ -472,9 +469,7 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:pict">
|
<xsl:template match="rtf:pict">
|
||||||
<xsl:element name="img">
|
<img src = "{@num}"/>
|
||||||
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
|
|
||||||
</xsl:element>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="*">
|
<xsl:template match="*">
|
||||||
|
@ -47,6 +47,10 @@ binary_includes = [
|
|||||||
'/usr/lib/libgthread-2.0.so.0',
|
'/usr/lib/libgthread-2.0.so.0',
|
||||||
'/usr/lib/libpng14.so.14',
|
'/usr/lib/libpng14.so.14',
|
||||||
'/usr/lib/libexslt.so.0',
|
'/usr/lib/libexslt.so.0',
|
||||||
|
# Ensure that libimobiledevice is compiled against openssl, not gnutls
|
||||||
|
'/usr/lib/libimobiledevice.so.3',
|
||||||
|
'/usr/lib/libusbmuxd.so.2',
|
||||||
|
'/usr/lib/libplist.so.1',
|
||||||
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
||||||
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
||||||
'/usr/lib/libgcrypt.so.11',
|
'/usr/lib/libgcrypt.so.11',
|
||||||
|
@ -399,7 +399,8 @@ class Py2App(object):
|
|||||||
@flush
|
@flush
|
||||||
def add_fontconfig(self):
|
def add_fontconfig(self):
|
||||||
info('\nAdding fontconfig')
|
info('\nAdding fontconfig')
|
||||||
for x in ('fontconfig.1', 'freetype.6', 'expat.1'):
|
for x in ('fontconfig.1', 'freetype.6', 'expat.1',
|
||||||
|
'plist.1', 'usbmuxd.2', 'imobiledevice.3'):
|
||||||
src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
|
src = os.path.join(SW, 'lib', 'lib'+x+'.dylib')
|
||||||
self.install_dylib(src)
|
self.install_dylib(src)
|
||||||
dst = os.path.join(self.resources_dir, 'fonts')
|
dst = os.path.join(self.resources_dir, 'fonts')
|
||||||
|
@ -12,13 +12,13 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2013-03-27 13:07+0000\n"
|
"PO-Revision-Date: 2013-03-28 13:01+0000\n"
|
||||||
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
||||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2013-03-28 04:41+0000\n"
|
"X-Launchpad-Export-Date: 2013-03-29 04:36+0000\n"
|
||||||
"X-Generator: Launchpad (build 16546)\n"
|
"X-Generator: Launchpad (build 16546)\n"
|
||||||
"Language: ca\n"
|
"Language: ca\n"
|
||||||
|
|
||||||
@ -1884,7 +1884,7 @@ msgstr "Awera"
|
|||||||
|
|
||||||
#. name for aws
|
#. name for aws
|
||||||
msgid "Awyu; South"
|
msgid "Awyu; South"
|
||||||
msgstr "Awyu meridional"
|
msgstr "Awyu; meridional"
|
||||||
|
|
||||||
#. name for awt
|
#. name for awt
|
||||||
msgid "Araweté"
|
msgid "Araweté"
|
||||||
@ -1892,7 +1892,7 @@ msgstr "Araweté"
|
|||||||
|
|
||||||
#. name for awu
|
#. name for awu
|
||||||
msgid "Awyu; Central"
|
msgid "Awyu; Central"
|
||||||
msgstr "Awyu central"
|
msgstr "Awyu; Central"
|
||||||
|
|
||||||
#. name for awv
|
#. name for awv
|
||||||
msgid "Awyu; Jair"
|
msgid "Awyu; Jair"
|
||||||
@ -4052,7 +4052,7 @@ msgstr "Buginès"
|
|||||||
|
|
||||||
#. name for buh
|
#. name for buh
|
||||||
msgid "Bunu; Younuo"
|
msgid "Bunu; Younuo"
|
||||||
msgstr "Bunu; Younuo"
|
msgstr "Bunu; Younou"
|
||||||
|
|
||||||
#. name for bui
|
#. name for bui
|
||||||
msgid "Bongili"
|
msgid "Bongili"
|
||||||
@ -4308,7 +4308,7 @@ msgstr "Bwa"
|
|||||||
|
|
||||||
#. name for bwx
|
#. name for bwx
|
||||||
msgid "Bunu; Bu-Nao"
|
msgid "Bunu; Bu-Nao"
|
||||||
msgstr "Bunu; Bu-Nao"
|
msgstr "Bunu; Bu Nao"
|
||||||
|
|
||||||
#. name for bwy
|
#. name for bwy
|
||||||
msgid "Bwamu; Cwi"
|
msgid "Bwamu; Cwi"
|
||||||
@ -19804,7 +19804,7 @@ msgstr "Minoà"
|
|||||||
|
|
||||||
#. name for omo
|
#. name for omo
|
||||||
msgid "Utarmbung"
|
msgid "Utarmbung"
|
||||||
msgstr ""
|
msgstr "Utarmbung"
|
||||||
|
|
||||||
#. name for omp
|
#. name for omp
|
||||||
msgid "Manipuri; Old"
|
msgid "Manipuri; Old"
|
||||||
@ -20344,7 +20344,7 @@ msgstr "Pear"
|
|||||||
|
|
||||||
#. name for pcc
|
#. name for pcc
|
||||||
msgid "Bouyei"
|
msgid "Bouyei"
|
||||||
msgstr ""
|
msgstr "Buyí"
|
||||||
|
|
||||||
#. name for pcd
|
#. name for pcd
|
||||||
msgid "Picard"
|
msgid "Picard"
|
||||||
@ -20456,11 +20456,11 @@ msgstr "Pengo"
|
|||||||
|
|
||||||
#. name for peh
|
#. name for peh
|
||||||
msgid "Bonan"
|
msgid "Bonan"
|
||||||
msgstr ""
|
msgstr "Bonan"
|
||||||
|
|
||||||
#. name for pei
|
#. name for pei
|
||||||
msgid "Chichimeca-Jonaz"
|
msgid "Chichimeca-Jonaz"
|
||||||
msgstr ""
|
msgstr "Chichimec"
|
||||||
|
|
||||||
#. name for pej
|
#. name for pej
|
||||||
msgid "Pomo; Northern"
|
msgid "Pomo; Northern"
|
||||||
@ -20484,7 +20484,7 @@ msgstr "Persa Antic"
|
|||||||
|
|
||||||
#. name for pep
|
#. name for pep
|
||||||
msgid "Kunja"
|
msgid "Kunja"
|
||||||
msgstr ""
|
msgstr "Kunja"
|
||||||
|
|
||||||
#. name for peq
|
#. name for peq
|
||||||
msgid "Pomo; Southern"
|
msgid "Pomo; Southern"
|
||||||
@ -20536,7 +20536,7 @@ msgstr "Pagi"
|
|||||||
|
|
||||||
#. name for pgk
|
#. name for pgk
|
||||||
msgid "Rerep"
|
msgid "Rerep"
|
||||||
msgstr ""
|
msgstr "Rerep"
|
||||||
|
|
||||||
#. name for pgl
|
#. name for pgl
|
||||||
msgid "Irish; Primitive"
|
msgid "Irish; Primitive"
|
||||||
@ -20624,7 +20624,7 @@ msgstr "Pima Baix"
|
|||||||
|
|
||||||
#. name for pib
|
#. name for pib
|
||||||
msgid "Yine"
|
msgid "Yine"
|
||||||
msgstr ""
|
msgstr "Yine"
|
||||||
|
|
||||||
#. name for pic
|
#. name for pic
|
||||||
msgid "Pinji"
|
msgid "Pinji"
|
||||||
@ -20660,7 +20660,7 @@ msgstr "Pijao"
|
|||||||
|
|
||||||
#. name for pil
|
#. name for pil
|
||||||
msgid "Yom"
|
msgid "Yom"
|
||||||
msgstr ""
|
msgstr "Yom"
|
||||||
|
|
||||||
#. name for pim
|
#. name for pim
|
||||||
msgid "Powhatan"
|
msgid "Powhatan"
|
||||||
@ -20760,7 +20760,7 @@ msgstr "Llenguatge de signes pakistaní"
|
|||||||
|
|
||||||
#. name for pkt
|
#. name for pkt
|
||||||
msgid "Maleng"
|
msgid "Maleng"
|
||||||
msgstr ""
|
msgstr "Maleng"
|
||||||
|
|
||||||
#. name for pku
|
#. name for pku
|
||||||
msgid "Paku"
|
msgid "Paku"
|
||||||
@ -20768,7 +20768,7 @@ msgstr "Paku"
|
|||||||
|
|
||||||
#. name for pla
|
#. name for pla
|
||||||
msgid "Miani"
|
msgid "Miani"
|
||||||
msgstr ""
|
msgstr "Miani"
|
||||||
|
|
||||||
#. name for plb
|
#. name for plb
|
||||||
msgid "Polonombauk"
|
msgid "Polonombauk"
|
||||||
@ -20804,7 +20804,7 @@ msgstr "Polci"
|
|||||||
|
|
||||||
#. name for plk
|
#. name for plk
|
||||||
msgid "Shina; Kohistani"
|
msgid "Shina; Kohistani"
|
||||||
msgstr ""
|
msgstr "Shina; Kohistani"
|
||||||
|
|
||||||
#. name for pll
|
#. name for pll
|
||||||
msgid "Palaung; Shwe"
|
msgid "Palaung; Shwe"
|
||||||
@ -20852,7 +20852,7 @@ msgstr "Palawà; Brooke"
|
|||||||
|
|
||||||
#. name for ply
|
#. name for ply
|
||||||
msgid "Bolyu"
|
msgid "Bolyu"
|
||||||
msgstr ""
|
msgstr "Bolyu"
|
||||||
|
|
||||||
#. name for plz
|
#. name for plz
|
||||||
msgid "Paluan"
|
msgid "Paluan"
|
||||||
@ -20896,7 +20896,7 @@ msgstr "Algonquí Carolina"
|
|||||||
|
|
||||||
#. name for pml
|
#. name for pml
|
||||||
msgid "Lingua Franca"
|
msgid "Lingua Franca"
|
||||||
msgstr ""
|
msgstr "Aljamia"
|
||||||
|
|
||||||
#. name for pmm
|
#. name for pmm
|
||||||
msgid "Pomo"
|
msgid "Pomo"
|
||||||
@ -20924,7 +20924,7 @@ msgstr "Piemontès"
|
|||||||
|
|
||||||
#. name for pmt
|
#. name for pmt
|
||||||
msgid "Tuamotuan"
|
msgid "Tuamotuan"
|
||||||
msgstr ""
|
msgstr "Tuamotu"
|
||||||
|
|
||||||
#. name for pmu
|
#. name for pmu
|
||||||
msgid "Panjabi; Mirpur"
|
msgid "Panjabi; Mirpur"
|
||||||
@ -20972,7 +20972,7 @@ msgstr "Penrhyn"
|
|||||||
|
|
||||||
#. name for pni
|
#. name for pni
|
||||||
msgid "Aoheng"
|
msgid "Aoheng"
|
||||||
msgstr ""
|
msgstr "Aoheng"
|
||||||
|
|
||||||
#. name for pnm
|
#. name for pnm
|
||||||
msgid "Punan Batu 1"
|
msgid "Punan Batu 1"
|
||||||
@ -21008,7 +21008,7 @@ msgstr "Pontic"
|
|||||||
|
|
||||||
#. name for pnu
|
#. name for pnu
|
||||||
msgid "Bunu; Jiongnai"
|
msgid "Bunu; Jiongnai"
|
||||||
msgstr ""
|
msgstr "Bunu; Jiongnai"
|
||||||
|
|
||||||
#. name for pnv
|
#. name for pnv
|
||||||
msgid "Pinigura"
|
msgid "Pinigura"
|
||||||
@ -21100,7 +21100,7 @@ msgstr "Potavatomi"
|
|||||||
|
|
||||||
#. name for pov
|
#. name for pov
|
||||||
msgid "Crioulo; Upper Guinea"
|
msgid "Crioulo; Upper Guinea"
|
||||||
msgstr ""
|
msgstr "Crioll guineà"
|
||||||
|
|
||||||
#. name for pow
|
#. name for pow
|
||||||
msgid "Popoloca; San Felipe Otlaltepec"
|
msgid "Popoloca; San Felipe Otlaltepec"
|
||||||
@ -21128,7 +21128,7 @@ msgstr "Paipai"
|
|||||||
|
|
||||||
#. name for ppk
|
#. name for ppk
|
||||||
msgid "Uma"
|
msgid "Uma"
|
||||||
msgstr ""
|
msgstr "Uma"
|
||||||
|
|
||||||
#. name for ppl
|
#. name for ppl
|
||||||
msgid "Pipil"
|
msgid "Pipil"
|
||||||
@ -21144,7 +21144,7 @@ msgstr "Papapana"
|
|||||||
|
|
||||||
#. name for ppo
|
#. name for ppo
|
||||||
msgid "Folopa"
|
msgid "Folopa"
|
||||||
msgstr ""
|
msgstr "Folopa"
|
||||||
|
|
||||||
#. name for ppp
|
#. name for ppp
|
||||||
msgid "Pelende"
|
msgid "Pelende"
|
||||||
@ -21180,7 +21180,7 @@ msgstr "Malecite-Passamaquoddy"
|
|||||||
|
|
||||||
#. name for prb
|
#. name for prb
|
||||||
msgid "Lua'"
|
msgid "Lua'"
|
||||||
msgstr ""
|
msgstr "Lua"
|
||||||
|
|
||||||
#. name for prc
|
#. name for prc
|
||||||
msgid "Parachi"
|
msgid "Parachi"
|
||||||
@ -21220,7 +21220,7 @@ msgstr "Llenguatge de signes peruà"
|
|||||||
|
|
||||||
#. name for prm
|
#. name for prm
|
||||||
msgid "Kibiri"
|
msgid "Kibiri"
|
||||||
msgstr ""
|
msgstr "Kibiri"
|
||||||
|
|
||||||
#. name for prn
|
#. name for prn
|
||||||
msgid "Prasuni"
|
msgid "Prasuni"
|
||||||
@ -21272,7 +21272,7 @@ msgstr "Llenguatge de signes de Providencia"
|
|||||||
|
|
||||||
#. name for psa
|
#. name for psa
|
||||||
msgid "Awyu; Asue"
|
msgid "Awyu; Asue"
|
||||||
msgstr ""
|
msgstr "Awyu; Asue"
|
||||||
|
|
||||||
#. name for psc
|
#. name for psc
|
||||||
msgid "Persian Sign Language"
|
msgid "Persian Sign Language"
|
||||||
@ -21328,7 +21328,7 @@ msgstr "Llenguatge de signes portuguès"
|
|||||||
|
|
||||||
#. name for pss
|
#. name for pss
|
||||||
msgid "Kaulong"
|
msgid "Kaulong"
|
||||||
msgstr ""
|
msgstr "Kaulong"
|
||||||
|
|
||||||
#. name for pst
|
#. name for pst
|
||||||
msgid "Pashto; Central"
|
msgid "Pashto; Central"
|
||||||
@ -21376,11 +21376,11 @@ msgstr "Pìamatsina"
|
|||||||
|
|
||||||
#. name for ptt
|
#. name for ptt
|
||||||
msgid "Enrekang"
|
msgid "Enrekang"
|
||||||
msgstr ""
|
msgstr "Enrekang"
|
||||||
|
|
||||||
#. name for ptu
|
#. name for ptu
|
||||||
msgid "Bambam"
|
msgid "Bambam"
|
||||||
msgstr ""
|
msgstr "Bambam"
|
||||||
|
|
||||||
#. name for ptv
|
#. name for ptv
|
||||||
msgid "Port Vato"
|
msgid "Port Vato"
|
||||||
@ -29584,7 +29584,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for yir
|
#. name for yir
|
||||||
msgid "Awyu; North"
|
msgid "Awyu; North"
|
||||||
msgstr ""
|
msgstr "Awyu; Septentrional"
|
||||||
|
|
||||||
#. name for yis
|
#. name for yis
|
||||||
msgid "Yis"
|
msgid "Yis"
|
||||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = u'calibre'
|
__appname__ = u'calibre'
|
||||||
numeric_version = (0, 9, 25)
|
numeric_version = (0, 9, 26)
|
||||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
|||||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||||
from calibre.ebooks.metadata.sources.douban import Douban
|
from calibre.ebooks.metadata.sources.douban import Douban
|
||||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||||
# from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
||||||
|
from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
|
||||||
|
|
||||||
plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
|||||||
config['enabled_plugins'] = ep
|
config['enabled_plugins'] = ep
|
||||||
|
|
||||||
default_disabled_plugins = set([
|
default_disabled_plugins = set([
|
||||||
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
|
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
|
||||||
])
|
])
|
||||||
|
|
||||||
def is_disabled(plugin):
|
def is_disabled(plugin):
|
||||||
|
@ -97,6 +97,12 @@ class TXTInput(InputFormatPlugin):
|
|||||||
if not ienc:
|
if not ienc:
|
||||||
ienc = 'utf-8'
|
ienc = 'utf-8'
|
||||||
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
|
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
|
||||||
|
# Remove BOM from start of txt as its presence can confuse markdown
|
||||||
|
import codecs
|
||||||
|
for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
|
||||||
|
if txt.startswith(bom):
|
||||||
|
txt = txt[len(bom):]
|
||||||
|
break
|
||||||
txt = txt.decode(ienc, 'replace')
|
txt = txt.decode(ienc, 'replace')
|
||||||
|
|
||||||
# Replace entities
|
# Replace entities
|
||||||
|
@ -132,7 +132,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
text()="Détails sur le produit" or \
|
text()="Détails sur le produit" or \
|
||||||
text()="Detalles del producto" or \
|
text()="Detalles del producto" or \
|
||||||
text()="Detalhes do produto" or \
|
text()="Detalhes do produto" or \
|
||||||
text()="登録情報"]/../div[@class="content"]
|
starts-with(text(), "登録情報")]/../div[@class="content"]
|
||||||
'''
|
'''
|
||||||
# Editor: is for Spanish
|
# Editor: is for Spanish
|
||||||
self.publisher_xpath = '''
|
self.publisher_xpath = '''
|
||||||
@ -235,6 +235,12 @@ class Worker(Thread): # Get details {{{
|
|||||||
msg = 'Failed to parse amazon details page: %r'%self.url
|
msg = 'Failed to parse amazon details page: %r'%self.url
|
||||||
self.log.exception(msg)
|
self.log.exception(msg)
|
||||||
return
|
return
|
||||||
|
if self.domain == 'jp':
|
||||||
|
for a in root.xpath('//a[@href]'):
|
||||||
|
if 'black-curtain-redirect.html' in a.get('href'):
|
||||||
|
self.url = 'http://amazon.co.jp'+a.get('href')
|
||||||
|
self.log('Black curtain redirect found, following')
|
||||||
|
return self.get_details()
|
||||||
|
|
||||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||||
if errmsg:
|
if errmsg:
|
||||||
@ -252,8 +258,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.log.exception('Error parsing asin for url: %r'%self.url)
|
self.log.exception('Error parsing asin for url: %r'%self.url)
|
||||||
asin = None
|
asin = None
|
||||||
if self.testing:
|
if self.testing:
|
||||||
import tempfile
|
import tempfile, uuid
|
||||||
with tempfile.NamedTemporaryFile(prefix=asin + '_',
|
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
|
||||||
suffix='.html', delete=False) as f:
|
suffix='.html', delete=False) as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
print ('Downloaded html for', asin, 'saved in', f.name)
|
print ('Downloaded html for', asin, 'saved in', f.name)
|
||||||
@ -499,7 +505,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
def parse_language(self, pd):
|
def parse_language(self, pd):
|
||||||
for x in reversed(pd.xpath(self.language_xpath)):
|
for x in reversed(pd.xpath(self.language_xpath)):
|
||||||
if x.tail:
|
if x.tail:
|
||||||
raw = x.tail.strip()
|
raw = x.tail.strip().partition(',')[0].strip()
|
||||||
ans = self.lang_map.get(raw, None)
|
ans = self.lang_map.get(raw, None)
|
||||||
if ans:
|
if ans:
|
||||||
return ans
|
return ans
|
||||||
@ -1004,6 +1010,11 @@ if __name__ == '__main__': # tests {{{
|
|||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
jp_tests = [ # {{{
|
jp_tests = [ # {{{
|
||||||
|
( # Adult filtering test
|
||||||
|
{'identifiers':{'isbn':'4799500066'}},
|
||||||
|
[title_test(u'Bitch Trap'),]
|
||||||
|
),
|
||||||
|
|
||||||
( # isbn -> title, authors
|
( # isbn -> title, authors
|
||||||
{'identifiers':{'isbn': '9784101302720' }},
|
{'identifiers':{'isbn': '9784101302720' }},
|
||||||
[title_test(u'精霊の守り人',
|
[title_test(u'精霊の守り人',
|
||||||
|
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
|
|||||||
# Google covers are often poor quality (scans/errors) but they have high
|
# Google covers are often poor quality (scans/errors) but they have high
|
||||||
# resolution, so they trump covers from better sources. So make sure they
|
# resolution, so they trump covers from better sources. So make sure they
|
||||||
# are only used if no other covers are found.
|
# are only used if no other covers are found.
|
||||||
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
|
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
|
||||||
|
|
||||||
def create_log(ostream=None):
|
def create_log(ostream=None):
|
||||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||||
@ -429,6 +429,40 @@ class Source(Plugin):
|
|||||||
mi.tags = list(map(fixcase, mi.tags))
|
mi.tags = list(map(fixcase, mi.tags))
|
||||||
mi.isbn = check_isbn(mi.isbn)
|
mi.isbn = check_isbn(mi.isbn)
|
||||||
|
|
||||||
|
def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
|
||||||
|
if not urls:
|
||||||
|
log('No images found for, title: %r and authors: %r'%(title, authors))
|
||||||
|
return
|
||||||
|
from threading import Thread
|
||||||
|
import time
|
||||||
|
if prefs_name:
|
||||||
|
urls = urls[:self.prefs[prefs_name]]
|
||||||
|
if get_best_cover:
|
||||||
|
urls = urls[:1]
|
||||||
|
log('Downloading %d covers'%len(urls))
|
||||||
|
workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
|
||||||
|
for w in workers:
|
||||||
|
w.daemon = True
|
||||||
|
w.start()
|
||||||
|
alive = True
|
||||||
|
start_time = time.time()
|
||||||
|
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
||||||
|
alive = False
|
||||||
|
for w in workers:
|
||||||
|
if w.is_alive():
|
||||||
|
alive = True
|
||||||
|
break
|
||||||
|
abort.wait(0.1)
|
||||||
|
|
||||||
|
def download_image(self, url, timeout, log, result_queue):
|
||||||
|
try:
|
||||||
|
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
||||||
|
result_queue.put((self, ans))
|
||||||
|
log('Downloaded cover from: %s'%url)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception('Failed to download cover from: %r'%url)
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Metadata API {{{
|
# Metadata API {{{
|
||||||
|
58
src/calibre/ebooks/metadata/sources/big_book_search.py
Normal file
58
src/calibre/ebooks/metadata/sources/big_book_search.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||||
|
|
||||||
|
def get_urls(br, tokens):
|
||||||
|
from urllib import quote_plus
|
||||||
|
from mechanize import Request
|
||||||
|
from lxml import html
|
||||||
|
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
|
||||||
|
q = b'+'.join(escaped)
|
||||||
|
url = 'http://bigbooksearch.com/books/'+q
|
||||||
|
br.open(url).read()
|
||||||
|
req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
|
||||||
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
raw = br.open(req).read()
|
||||||
|
root = html.fromstring(raw.decode('utf-8'))
|
||||||
|
urls = [i.get('src') for i in root.xpath('//img[@src]')]
|
||||||
|
return urls
|
||||||
|
|
||||||
|
class BigBookSearch(Source):
|
||||||
|
|
||||||
|
name = 'Big Book Search'
|
||||||
|
description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
|
||||||
|
capabilities = frozenset(['cover'])
|
||||||
|
config_help_message = _('Configure the Big Book Search plugin')
|
||||||
|
can_get_multiple_covers = True
|
||||||
|
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
|
||||||
|
_('The maximum number of covers to process from the search result')),
|
||||||
|
)
|
||||||
|
supports_gzip_transfer_encoding = True
|
||||||
|
|
||||||
|
def download_cover(self, log, result_queue, abort,
|
||||||
|
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||||
|
if not title:
|
||||||
|
return
|
||||||
|
br = self.browser
|
||||||
|
tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
|
||||||
|
urls = get_urls(br, tokens)
|
||||||
|
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||||
|
|
||||||
|
def test():
|
||||||
|
from calibre import browser
|
||||||
|
import pprint
|
||||||
|
br = browser()
|
||||||
|
urls = get_urls(br, ['consider', 'phlebas', 'banks'])
|
||||||
|
pprint.pprint(urls)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test()
|
||||||
|
|
@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to
|
|||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread):
|
||||||
|
|
||||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
|
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
self.daemon = True
|
self.daemon = True
|
||||||
|
|
||||||
self.plugin = plugin
|
self.plugin = plugin
|
||||||
self.abort = abort
|
self.abort = abort
|
||||||
|
self.get_best_cover = get_best_cover
|
||||||
self.buf = BytesIO()
|
self.buf = BytesIO()
|
||||||
self.log = create_log(self.buf)
|
self.log = create_log(self.buf)
|
||||||
self.title, self.authors, self.identifiers = (title, authors,
|
self.title, self.authors, self.identifiers = (title, authors,
|
||||||
@ -37,7 +38,7 @@ class Worker(Thread):
|
|||||||
try:
|
try:
|
||||||
if self.plugin.can_get_multiple_covers:
|
if self.plugin.can_get_multiple_covers:
|
||||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||||
title=self.title, authors=self.authors, get_best_cover=True,
|
title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
|
||||||
identifiers=self.identifiers, timeout=self.timeout)
|
identifiers=self.identifiers, timeout=self.timeout)
|
||||||
else:
|
else:
|
||||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||||
@ -72,7 +73,7 @@ def process_result(log, result):
|
|||||||
return (plugin, width, height, fmt, data)
|
return (plugin, width, height, fmt, data)
|
||||||
|
|
||||||
def run_download(log, results, abort,
|
def run_download(log, results, abort,
|
||||||
title=None, authors=None, identifiers={}, timeout=30):
|
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||||
'''
|
'''
|
||||||
Run the cover download, putting results into the queue :param:`results`.
|
Run the cover download, putting results into the queue :param:`results`.
|
||||||
|
|
||||||
@ -89,7 +90,7 @@ def run_download(log, results, abort,
|
|||||||
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
|
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
|
||||||
|
|
||||||
rq = Queue()
|
rq = Queue()
|
||||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
|
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
|
||||||
in plugins]
|
in plugins]
|
||||||
for w in workers:
|
for w in workers:
|
||||||
w.start()
|
w.start()
|
||||||
@ -163,7 +164,7 @@ def download_cover(log,
|
|||||||
abort = Event()
|
abort = Event()
|
||||||
|
|
||||||
run_download(log, rq, abort, title=title, authors=authors,
|
run_download(log, rq, abort, title=title, authors=authors,
|
||||||
identifiers=identifiers, timeout=timeout)
|
identifiers=identifiers, timeout=timeout, get_best_cover=True)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
@ -106,6 +106,8 @@ class Worker(Thread): # {{{
|
|||||||
parts = pub.partition(':')[0::2]
|
parts = pub.partition(':')[0::2]
|
||||||
pub = parts[1] or parts[0]
|
pub = parts[1] or parts[0]
|
||||||
try:
|
try:
|
||||||
|
if ', Ship Date:' in pub:
|
||||||
|
pub = pub.partition(', Ship Date:')[0]
|
||||||
q = parse_only_date(pub, assume_utc=True)
|
q = parse_only_date(pub, assume_utc=True)
|
||||||
if q.year != UNDEFINED_DATE:
|
if q.year != UNDEFINED_DATE:
|
||||||
mi.pubdate = q
|
mi.pubdate = q
|
||||||
|
@ -39,39 +39,11 @@ class GoogleImages(Source):
|
|||||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||||
if not title:
|
if not title:
|
||||||
return
|
return
|
||||||
from threading import Thread
|
|
||||||
import time
|
|
||||||
timeout = max(60, timeout) # Needs at least a minute
|
timeout = max(60, timeout) # Needs at least a minute
|
||||||
title = ' '.join(self.get_title_tokens(title))
|
title = ' '.join(self.get_title_tokens(title))
|
||||||
author = ' '.join(self.get_author_tokens(authors))
|
author = ' '.join(self.get_author_tokens(authors))
|
||||||
urls = self.get_image_urls(title, author, log, abort, timeout)
|
urls = self.get_image_urls(title, author, log, abort, timeout)
|
||||||
if not urls:
|
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||||
log('No images found in Google for, title: %r and authors: %r'%(title, author))
|
|
||||||
return
|
|
||||||
urls = urls[:self.prefs['max_covers']]
|
|
||||||
if get_best_cover:
|
|
||||||
urls = urls[:1]
|
|
||||||
workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
|
|
||||||
for w in workers:
|
|
||||||
w.daemon = True
|
|
||||||
w.start()
|
|
||||||
alive = True
|
|
||||||
start_time = time.time()
|
|
||||||
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
|
||||||
alive = False
|
|
||||||
for w in workers:
|
|
||||||
if w.is_alive():
|
|
||||||
alive = True
|
|
||||||
break
|
|
||||||
abort.wait(0.1)
|
|
||||||
|
|
||||||
def download_image(self, url, timeout, log, result_queue):
|
|
||||||
try:
|
|
||||||
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
|
||||||
result_queue.put((self, ans))
|
|
||||||
log('Downloaded cover from: %s'%url)
|
|
||||||
except Exception:
|
|
||||||
self.log.exception('Failed to download cover from: %r'%url)
|
|
||||||
|
|
||||||
def get_image_urls(self, title, author, log, abort, timeout):
|
def get_image_urls(self, title, author, log, abort, timeout):
|
||||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||||
|
@ -262,6 +262,35 @@ def from_links(container):
|
|||||||
toc.remove(child)
|
toc.remove(child)
|
||||||
return toc
|
return toc
|
||||||
|
|
||||||
|
def find_text(node):
|
||||||
|
LIMIT = 200
|
||||||
|
pat = re.compile(r'\s+')
|
||||||
|
for child in node:
|
||||||
|
if isinstance(child, etree._Element):
|
||||||
|
text = xml2text(child).strip()
|
||||||
|
text = pat.sub(' ', text)
|
||||||
|
if len(text) < 1:
|
||||||
|
continue
|
||||||
|
if len(text) > LIMIT:
|
||||||
|
# Look for less text in a child of this node, recursively
|
||||||
|
ntext = find_text(child)
|
||||||
|
return ntext or (text[:LIMIT] + '...')
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def from_files(container):
|
||||||
|
toc = TOC()
|
||||||
|
for spinepath in container.spine_items:
|
||||||
|
name = container.abspath_to_name(spinepath)
|
||||||
|
root = container.parsed(name)
|
||||||
|
body = XPath('//h:body')(root)
|
||||||
|
if not body:
|
||||||
|
continue
|
||||||
|
text = find_text(body[0])
|
||||||
|
if text:
|
||||||
|
toc.add(text, name)
|
||||||
|
return toc
|
||||||
|
|
||||||
def add_id(container, name, loc):
|
def add_id(container, name, loc):
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
body = root.xpath('//*[local-name()="body"]')[0]
|
body = root.xpath('//*[local-name()="body"]')[0]
|
||||||
|
@ -333,8 +333,8 @@ class OEBReader(object):
|
|||||||
guide = self.oeb.guide
|
guide = self.oeb.guide
|
||||||
manifest = self.oeb.manifest
|
manifest = self.oeb.manifest
|
||||||
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
||||||
href = elem.get('href')
|
ref_href = elem.get('href')
|
||||||
path = urlnormalize(urldefrag(href)[0])
|
path = urlnormalize(urldefrag(ref_href)[0])
|
||||||
if path not in manifest.hrefs:
|
if path not in manifest.hrefs:
|
||||||
corrected_href = None
|
corrected_href = None
|
||||||
for href in manifest.hrefs:
|
for href in manifest.hrefs:
|
||||||
@ -342,12 +342,12 @@ class OEBReader(object):
|
|||||||
corrected_href = href
|
corrected_href = href
|
||||||
break
|
break
|
||||||
if corrected_href is None:
|
if corrected_href is None:
|
||||||
self.logger.warn(u'Guide reference %r not found' % href)
|
self.logger.warn(u'Guide reference %r not found' % ref_href)
|
||||||
continue
|
continue
|
||||||
href = corrected_href
|
ref_href = corrected_href
|
||||||
typ = elem.get('type')
|
typ = elem.get('type')
|
||||||
if typ not in guide:
|
if typ not in guide:
|
||||||
guide.add(typ, elem.get('title'), href)
|
guide.add(typ, elem.get('title'), ref_href)
|
||||||
|
|
||||||
def _find_ncx(self, opf):
|
def _find_ncx(self, opf):
|
||||||
result = xpath(opf, '/o2:package/o2:spine/@toc')
|
result = xpath(opf, '/o2:package/o2:spine/@toc')
|
||||||
|
@ -180,5 +180,6 @@ class BorderParse:
|
|||||||
elif 'single' in border_style_list:
|
elif 'single' in border_style_list:
|
||||||
new_border_dict[att] = 'single'
|
new_border_dict[att] = 'single'
|
||||||
else:
|
else:
|
||||||
new_border_dict[att] = border_style_list[0]
|
if border_style_list:
|
||||||
|
new_border_dict[att] = border_style_list[0]
|
||||||
return new_border_dict
|
return new_border_dict
|
||||||
|
@ -88,9 +88,7 @@ class StoreAction(InterfaceAction):
|
|||||||
if row == None:
|
if row == None:
|
||||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||||
return
|
return
|
||||||
|
self.search({ 'author': self._get_author(row) })
|
||||||
query = 'author:"%s"' % self._get_author(row)
|
|
||||||
self.search(query)
|
|
||||||
|
|
||||||
def _get_title(self, row):
|
def _get_title(self, row):
|
||||||
title = ''
|
title = ''
|
||||||
@ -107,18 +105,14 @@ class StoreAction(InterfaceAction):
|
|||||||
if row == None:
|
if row == None:
|
||||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||||
return
|
return
|
||||||
|
self.search({ 'title': self._get_title(row) })
|
||||||
query = 'title:"%s"' % self._get_title(row)
|
|
||||||
self.search(query)
|
|
||||||
|
|
||||||
def search_author_title(self):
|
def search_author_title(self):
|
||||||
row = self._get_selected_row()
|
row = self._get_selected_row()
|
||||||
if row == None:
|
if row == None:
|
||||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||||
return
|
return
|
||||||
|
self.search({ 'author': self._get_author(row), 'title': self._get_title(row) })
|
||||||
query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
|
|
||||||
self.search(query)
|
|
||||||
|
|
||||||
def choose(self):
|
def choose(self):
|
||||||
from calibre.gui2.store.config.chooser.chooser_dialog import StoreChooserDialog
|
from calibre.gui2.store.config.chooser.chooser_dialog import StoreChooserDialog
|
||||||
|
@ -62,16 +62,20 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
self.setup_store_checks()
|
self.setup_store_checks()
|
||||||
|
|
||||||
# Set the search query
|
# Set the search query
|
||||||
|
if isinstance(query, (str, unicode)):
|
||||||
|
self.search_edit.setText(query)
|
||||||
|
elif isinstance(query, dict):
|
||||||
|
if 'author' in query:
|
||||||
|
self.search_author.setText(query['author'])
|
||||||
|
if 'title' in query:
|
||||||
|
self.search_title.setText(query['title'])
|
||||||
# Title
|
# Title
|
||||||
self.search_title.setText(query)
|
|
||||||
self.search_title.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
self.search_title.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
||||||
self.search_title.setMinimumContentsLength(25)
|
self.search_title.setMinimumContentsLength(25)
|
||||||
# Author
|
# Author
|
||||||
self.search_author.setText(query)
|
|
||||||
self.search_author.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
self.search_author.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
||||||
self.search_author.setMinimumContentsLength(25)
|
self.search_author.setMinimumContentsLength(25)
|
||||||
# Keyword
|
# Keyword
|
||||||
self.search_edit.setText(query)
|
|
||||||
self.search_edit.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
self.search_edit.setSizeAdjustPolicy(QComboBox.AdjustToMinimumContentsLengthWithIcon)
|
||||||
self.search_edit.setMinimumContentsLength(25)
|
self.search_edit.setMinimumContentsLength(25)
|
||||||
|
|
||||||
@ -408,7 +412,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
self.save_state()
|
self.save_state()
|
||||||
|
|
||||||
def exec_(self):
|
def exec_(self):
|
||||||
if unicode(self.search_edit.text()).strip():
|
if unicode(self.search_edit.text()).strip() or unicode(self.search_title.text()).strip() or unicode(self.search_author.text()).strip():
|
||||||
self.do_search()
|
self.do_search()
|
||||||
return QDialog.exec_(self)
|
return QDialog.exec_(self)
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 2 # Needed for dynamic plugin loading
|
store_version = 3 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
|
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
|
||||||
@ -67,7 +67,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
|
cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
|
||||||
cover_url = re.sub(r'%2F', '/', cover_url)
|
cover_url = re.sub(r'%2F', '/', cover_url)
|
||||||
cover_url = re.sub(r'\widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
|
cover_url = re.sub(r'widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
|
||||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
title = re.sub(r' - ebook$', '', title)
|
title = re.sub(r' - ebook$', '', title)
|
||||||
formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
|
formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
|
||||||
@ -82,7 +82,7 @@ class NextoStore(BasicStoreConfig, StorePlugin):
|
|||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
s = SearchResult()
|
s = SearchResult()
|
||||||
s.cover_url = 'http://www.nexto.pl' + cover_url
|
s.cover_url = cover_url if cover_url[:4] == 'http' else 'http://www.nexto.pl' + cover_url
|
||||||
s.title = title.strip()
|
s.title = title.strip()
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price
|
s.price = price
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 2 # Needed for dynamic plugin loading
|
store_version = 3 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
|
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
|
||||||
@ -41,7 +41,7 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
|
|||||||
url = 'http://virtualo.pl/?q=' + urllib.quote(query) + '&f=format_id:4,6,3'
|
url = 'http://virtualo.pl/?q=' + urllib.quote(query) + '&f=format_id:4,6,3'
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
no_drm_pattern = re.compile("Znak wodny")
|
no_drm_pattern = re.compile(r'Znak wodny|Brak')
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
@ -58,8 +58,8 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a//img/@src'))
|
cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a//img/@src'))
|
||||||
title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()'))
|
title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()'))
|
||||||
author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()'))
|
author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()'))
|
||||||
formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/img/@src')]
|
formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/span/img/@src')]
|
||||||
nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]/div/div/text()')))
|
nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]//span[@class="prompt_preview"]/text()')))
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
@ -70,6 +70,6 @@ class VirtualoStore(BasicStoreConfig, StorePlugin):
|
|||||||
s.price = price + ' zł'
|
s.price = price + ' zł'
|
||||||
s.detail_item = 'http://virtualo.pl' + id.strip().split('http://')[0]
|
s.detail_item = 'http://virtualo.pl' + id.strip().split('http://')[0]
|
||||||
s.formats = ', '.join(formats).upper()
|
s.formats = ', '.join(formats).upper()
|
||||||
s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_UNKNOWN
|
s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_LOCKED
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
store_version = 1 # Needed for dynamic plugin loading
|
store_version = 2 # Needed for dynamic plugin loading
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
@ -24,8 +24,8 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
url = 'http://clkuk.tradedoubler.com/click?p=51196&a=1951604&g=19333484'
|
url = 'http://www.awin1.com/awclick.php?mid=3787&id=120917'
|
||||||
url_details = 'http://clkuk.tradedoubler.com/click?p(51196)a(1951604)g(16460516)url({0})'
|
url_details = 'http://www.awin1.com/cread.php?awinmid=3787&awinaffid=120917&clickref=&p={0}'
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
if external or self.config.get('open_external', False):
|
||||||
if detail_item:
|
if detail_item:
|
||||||
|
@ -18,7 +18,7 @@ from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog,
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
|
from calibre.ebooks.oeb.polish.container import get_container, AZW3Container
|
||||||
from calibre.ebooks.oeb.polish.toc import (
|
from calibre.ebooks.oeb.polish.toc import (
|
||||||
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links)
|
get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files)
|
||||||
from calibre.gui2 import Application, error_dialog, gprefs
|
from calibre.gui2 import Application, error_dialog, gprefs
|
||||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||||
from calibre.gui2.toc.location import ItemEdit
|
from calibre.gui2.toc.location import ItemEdit
|
||||||
@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{
|
|||||||
go_to_root = pyqtSignal()
|
go_to_root = pyqtSignal()
|
||||||
create_from_xpath = pyqtSignal(object)
|
create_from_xpath = pyqtSignal(object)
|
||||||
create_from_links = pyqtSignal()
|
create_from_links = pyqtSignal()
|
||||||
|
create_from_files = pyqtSignal()
|
||||||
flatten_toc = pyqtSignal()
|
flatten_toc = pyqtSignal()
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent):
|
||||||
@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{
|
|||||||
)))
|
)))
|
||||||
l.addWidget(b)
|
l.addWidget(b)
|
||||||
|
|
||||||
|
self.cfb = b = QPushButton(_('Generate ToC from &files'))
|
||||||
|
b.clicked.connect(self.create_from_files)
|
||||||
|
b.setToolTip(textwrap.fill(_(
|
||||||
|
'Generate a Table of Contents from individual files in the book.'
|
||||||
|
' Each entry in the ToC will point to the start of the file, the'
|
||||||
|
' text of the entry will be the "first line" of text from the file.'
|
||||||
|
)))
|
||||||
|
l.addWidget(b)
|
||||||
|
|
||||||
self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
|
self.xpb = b = QPushButton(_('Generate ToC from &XPath'))
|
||||||
b.clicked.connect(self.create_from_user_xpath)
|
b.clicked.connect(self.create_from_user_xpath)
|
||||||
b.setToolTip(textwrap.fill(_(
|
b.setToolTip(textwrap.fill(_(
|
||||||
@ -549,11 +559,11 @@ class TOCView(QWidget): # {{{
|
|||||||
b.setToolTip(_('Remove all selected entries'))
|
b.setToolTip(_('Remove all selected entries'))
|
||||||
b.clicked.connect(self.del_items)
|
b.clicked.connect(self.del_items)
|
||||||
|
|
||||||
self.left_button = b = QToolButton(self)
|
self.right_button = b = QToolButton(self)
|
||||||
b.setIcon(QIcon(I('forward.png')))
|
b.setIcon(QIcon(I('forward.png')))
|
||||||
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
|
b.setIconSize(QSize(ICON_SIZE, ICON_SIZE))
|
||||||
l.addWidget(b, 4, 3)
|
l.addWidget(b, 4, 3)
|
||||||
b.setToolTip(_('Unindent the current entry [Ctrl+Left]'))
|
b.setToolTip(_('Indent the current entry [Ctrl+Right]'))
|
||||||
b.clicked.connect(self.tocw.move_right)
|
b.clicked.connect(self.tocw.move_right)
|
||||||
|
|
||||||
self.down_button = b = QToolButton(self)
|
self.down_button = b = QToolButton(self)
|
||||||
@ -577,6 +587,7 @@ class TOCView(QWidget): # {{{
|
|||||||
i.add_new_item.connect(self.add_new_item)
|
i.add_new_item.connect(self.add_new_item)
|
||||||
i.create_from_xpath.connect(self.create_from_xpath)
|
i.create_from_xpath.connect(self.create_from_xpath)
|
||||||
i.create_from_links.connect(self.create_from_links)
|
i.create_from_links.connect(self.create_from_links)
|
||||||
|
i.create_from_files.connect(self.create_from_files)
|
||||||
i.flatten_item.connect(self.flatten_item)
|
i.flatten_item.connect(self.flatten_item)
|
||||||
i.flatten_toc.connect(self.flatten_toc)
|
i.flatten_toc.connect(self.flatten_toc)
|
||||||
i.go_to_root.connect(self.go_to_root)
|
i.go_to_root.connect(self.go_to_root)
|
||||||
@ -778,6 +789,14 @@ class TOCView(QWidget): # {{{
|
|||||||
_('No links were found that could be added to the Table of Contents.'), show=True)
|
_('No links were found that could be added to the Table of Contents.'), show=True)
|
||||||
self.insert_toc_fragment(toc)
|
self.insert_toc_fragment(toc)
|
||||||
|
|
||||||
|
def create_from_files(self):
|
||||||
|
toc = from_files(self.ebook)
|
||||||
|
if len(toc) == 0:
|
||||||
|
return error_dialog(self, _('No items found'),
|
||||||
|
_('No files were found that could be added to the Table of Contents.'), show=True)
|
||||||
|
self.insert_toc_fragment(toc)
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class TOCEditor(QDialog): # {{{
|
class TOCEditor(QDialog): # {{{
|
||||||
|
@ -54,7 +54,7 @@ def get_parser(usage):
|
|||||||
def get_db(dbpath, options):
|
def get_db(dbpath, options):
|
||||||
global do_notify
|
global do_notify
|
||||||
if options.library_path is not None:
|
if options.library_path is not None:
|
||||||
dbpath = options.library_path
|
dbpath = os.path.expanduser(options.library_path)
|
||||||
if dbpath is None:
|
if dbpath is None:
|
||||||
raise ValueError('No saved library path, either run the GUI or use the'
|
raise ValueError('No saved library path, either run the GUI or use the'
|
||||||
' --with-library option')
|
' --with-library option')
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user