mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Fix #2497 (Scientific American recipe not working)
This commit is contained in:
parent
3f40befc6d
commit
c410bb9ea9
@ -43,7 +43,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
|
||||
'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
|
||||
'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
|
||||
'h1', 'h2', 'h3', 'phd_comics',
|
||||
'h1', 'h2', 'h3', 'phd_comics', 'woz_die',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
88
src/calibre/web/feeds/recipes/recipe_le_temps.py
Normal file
88
src/calibre/web/feeds/recipes/recipe_le_temps.py
Normal file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LeTemps(BasicNewsRecipe):
|
||||
title = u'Le Temps'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'id':'footer'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'box links'})]
|
||||
remove_tags = [dict(name='script')]
|
||||
extra_css = '''.heading {font-size: 13px; line-height: 15px;
|
||||
margin: 20px 0;} \n h2 {font-size: 24px; line-height: 25px; margin-bottom:
|
||||
14px;} \n .author {font-size: 11px; margin: 0 0 5px 0;} \n .lead {font-
|
||||
weight: 700; margin: 10px 0;} \n p {margin: 0 0 10px 0;}'''
|
||||
|
||||
feeds = [
|
||||
('Actualité',
|
||||
'http://www.letemps.ch/rss/site/'),
|
||||
('Monde',
|
||||
'http://www.letemps.ch/rss/site/actualite/monde'),
|
||||
('Suisse & Régions',
|
||||
'http://www.letemps.ch/rss/site/actualite/suisse_regions'),
|
||||
('Sciences & Environnement',
|
||||
'http://www.letemps.ch/rss/site/actualite/sciences_environnement'),
|
||||
('Société',
|
||||
'http://www.letemps.ch/rss/site/actualite/societe'),
|
||||
('Economie & Finance',
|
||||
'http://www.letemps.ch/rss/site/economie_finance'),
|
||||
('Economie & Finance - Finance',
|
||||
'http://www.letemps.ch/rss/site/economie_finance/finance'),
|
||||
('Economie & Finance - Fonds de placement',
|
||||
'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'),
|
||||
('Economie & Finance - Carrières',
|
||||
'http://www.letemps.ch/rss/site/economie_finance/carrieres'),
|
||||
('Culture',
|
||||
'http://www.letemps.ch/rss/site/culture'),
|
||||
('Culture - Cinéma',
|
||||
'http://www.letemps.ch/rss/site/culture/cinema'),
|
||||
('Culture - Musiques',
|
||||
'http://www.letemps.ch/rss/site/culture/musiques'),
|
||||
('Culture - Scènes',
|
||||
'http://www.letemps.ch/rss/site/culture/scenes'),
|
||||
('Culture - Arts plastiques',
|
||||
'http://www.letemps.ch/rss/site/culture/arts_plastiques'),
|
||||
('Livres',
|
||||
'http://www.letemps.ch/rss/site/culture/livres'),
|
||||
('Opinions',
|
||||
'http://www.letemps.ch/rss/site/opinions'),
|
||||
('Opinions - Editoriaux',
|
||||
'http://www.letemps.ch/rss/site/opinions/editoriaux'),
|
||||
('Opinions - Invités',
|
||||
'http://www.letemps.ch/rss/site/opinions/invites'),
|
||||
('Opinions - Chroniques',
|
||||
'http://www.letemps.ch/rss/site/opinions/chroniques'),
|
||||
('LifeStyle',
|
||||
'http://www.letemps.ch/rss/site/lifestyle'),
|
||||
('LifeStyle - Luxe',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/luxe'),
|
||||
('LifeStyle - Horlogerie & Joaillerie',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
|
||||
('LifeStyle - Design',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/design'),
|
||||
('LifeStyle - Voyages',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/voyages'),
|
||||
('LifeStyle - Gastronomie',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
|
||||
('LifeStyle - Architecture & Immobilier',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
|
||||
('LifeStyle - Automobile',
|
||||
'http://www.letemps.ch/rss/site/lifestyle/automobile'),
|
||||
('Sports',
|
||||
'http://www.letemps.ch/rss/site/actualite/sports'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('Page', 'Facet/print')
|
||||
|
||||
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
sciam.com
|
||||
'''
|
||||
import re
|
||||
from lxml import html
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ScientificAmerican(BasicNewsRecipe):
|
||||
@ -20,101 +19,70 @@ class ScientificAmerican(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_tags_before = dict(name='div', attrs={'class':'headline'})
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags_after = dict(id=['article'])
|
||||
remove_tags = [
|
||||
dict(id=['sharetools', 'reddit']),
|
||||
dict(name='script'),
|
||||
{'class':['float_left', 'atools']},
|
||||
{"class": re.compile(r'also-in-this')}
|
||||
]
|
||||
html2lrf_options = ['--base-font-size', '8']
|
||||
recursions = 1
|
||||
match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14)']
|
||||
# feeds = [
|
||||
# (u'Latest News', u'http://rss.sciam.com/ScientificAmerican-News'),
|
||||
# (u'Global', u'http://rss.sciam.com/ScientificAmerican-Global'),
|
||||
# (u'Health', u'http://rss.sciam.com/sciam/health'),
|
||||
# (u'Space', u'http://rss.sciam.com/sciam/space'),
|
||||
# (u'Technology', u'http://rss.sciam.com/sciam/technology'),
|
||||
# (u'Biology', u'http://rss.sciam.com/sciam/biology'),
|
||||
# (u'Mind & Brain', u'http://rss.sciam.com/sciam/mind-and-brain'),
|
||||
# (u"What's Next", u'http://rss.sciam.com/sciam/whats-next'),
|
||||
# (u'Archeology and Paleontology', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=archaeology-and-paleontology'),
|
||||
# (u'Physics', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=physics'),
|
||||
# (u'Math', u'http://rss.sciam.com/sciam/math'),
|
||||
# (u'History of Science', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=history-of-science'),
|
||||
# (u'Chemistry', u'http://rss.sciam.com/sciam/chemistry'),
|
||||
# (u'Mind Matters', u'http://rss.sciam.com/ScientificAmerican-MindBlog')
|
||||
# ]
|
||||
#
|
||||
match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14|15)']
|
||||
|
||||
def parse_index(self):
|
||||
src = self.browser.open('http://www.sciam.com/sciammag/').read()
|
||||
root = html.fromstring(src)
|
||||
self.cover_url = root.xpath('//img[re:match(@src, "cover_")]',
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'}
|
||||
)[0].get('src')
|
||||
self.timefmt = ' [%s]'%(root.xpath('//div[@id = "magazine-month"]')[0].text)
|
||||
feeds = []
|
||||
features = []
|
||||
for a in root.xpath('//a[@href and @title = "Feature"]'):
|
||||
if not a.text.strip():
|
||||
continue
|
||||
soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
|
||||
month = soup.find(id='magazine-month')
|
||||
self.timefmt = ' [%s]'%(self.tag_to_string(month))
|
||||
img = soup.find('img', alt='Scientific American Magazine', src=True)
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
features, feeds = [], []
|
||||
for p in soup.find(id='magazine-info').findAll('p') + \
|
||||
soup.find(id='magazine-info-more').findAll('p'):
|
||||
all_as = p.findAll('a', href=True)
|
||||
a = all_as[0]
|
||||
if a is None: continue
|
||||
desc = ''
|
||||
for s in p.find('span', attrs={'class':'sub'}):
|
||||
desc += self.tag_to_string(s)
|
||||
|
||||
article = {
|
||||
'url' : a.get('href'),
|
||||
'title' : u''.join(a.xpath('./text()')),
|
||||
'title' : self.tag_to_string(all_as[-1]),
|
||||
'date' : '',
|
||||
'description' : '',
|
||||
'description' : desc,
|
||||
}
|
||||
for s in a.itersiblings('span'):
|
||||
if s.get('class', '') == 'sub':
|
||||
article['description'] += u''.join(s.xpath('./text()')) + ' '
|
||||
features.append(article)
|
||||
if features:
|
||||
feeds.append(('Features', features))
|
||||
|
||||
departments = []
|
||||
for a in root.xpath('//a[@href and @class="title"]'):
|
||||
txt = u''.join(a.xpath('./text()')).strip()
|
||||
if not txt:
|
||||
section = []
|
||||
found = []
|
||||
title = None
|
||||
for x in soup.find(id='magazine-main_col1').findAll(['div', 'a']):
|
||||
if x.name == 'div':
|
||||
if section:
|
||||
feeds.append((title, section))
|
||||
title = self.tag_to_string(x)
|
||||
section = []
|
||||
else:
|
||||
if title is None or not a.get('href', False) or a.get('href', None) in found:
|
||||
continue
|
||||
article = {
|
||||
'url' : a.get('href'),
|
||||
'title' : txt,
|
||||
'date' : '',
|
||||
'description' : '',
|
||||
'url' : x['href'],
|
||||
'title' : self.tag_to_string(x),
|
||||
'date': '',
|
||||
'description': '',
|
||||
}
|
||||
p = a.getparent()
|
||||
p.remove(a)
|
||||
article['description'] = u''.join(p.xpath('./text()'))
|
||||
departments.append(article)
|
||||
section.append(article)
|
||||
if section:
|
||||
feeds.append((title, section))
|
||||
|
||||
feeds.append(('Departments', departments))
|
||||
opinion = []
|
||||
for a in root.xpath('//div[@id = "opinion"]//a[@href]'):
|
||||
txt = u''.join(a.xpath('./text()')).strip()
|
||||
if not txt:
|
||||
continue
|
||||
article = {
|
||||
'url' : a.get('href'),
|
||||
'title' : txt,
|
||||
'date' : '',
|
||||
'description' : '',
|
||||
}
|
||||
opinion.append(article)
|
||||
feeds.append(('Opinion', opinion))
|
||||
|
||||
ontheweb = []
|
||||
for a in root.xpath('//div[@id = "ontheweb"]//a[@href]'):
|
||||
txt = u''.join(a.xpath('./text()')).strip()
|
||||
if not txt:
|
||||
continue
|
||||
article = {
|
||||
'url' : a.get('href'),
|
||||
'title' : txt,
|
||||
'date' : '',
|
||||
'description' : '',
|
||||
}
|
||||
ontheweb.append(article)
|
||||
feeds.append(('On the web', ontheweb))
|
||||
articles = []
|
||||
for a in soup.find(id='opinion').findAll('a', href=True):
|
||||
articles.append({'url':a['href'], 'title':self.tag_to_string(a),
|
||||
'description':'', 'date':''})
|
||||
feeds.append(('Opinion', articles))
|
||||
|
||||
return feeds
|
||||
|
||||
|
46
src/calibre/web/feeds/recipes/recipe_woz_die.py
Normal file
46
src/calibre/web/feeds/recipes/recipe_woz_die.py
Normal file
@ -0,0 +1,46 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class WozDie(BasicNewsRecipe):
|
||||
title = u'WOZ Die Wochenzeitung'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = _('German')
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='p', attrs={'class':'arrow_top'})]
|
||||
remove_tags = [dict(name='p', attrs={'class':'bottom_right'})]
|
||||
remove_tags = [dict(name='script')]
|
||||
extra_css = '''#print_titel{vertical-align: bottom; text-align:
|
||||
left; color: #666666; background-color: white; padding-top: 30px; padding-
|
||||
bottom: 10px; border-bottom: 1px solid #999999;} #title{text-align:
|
||||
left; font-size: large; font-weight: 600; padding-top: 0px; padding-
|
||||
bottom: 6px;} h3 {text-align: left; font-size: large; font-weight: 600;
|
||||
padding-top: 0px; padding-bottom: 6px;} #lead{font-weight: 600;
|
||||
padding-bottom: 6px;} h2{font-weight: 600; padding-bottom: 6px;}
|
||||
#author{color: #666666; padding-top: 0px; padding-bottom: 0px;}
|
||||
h4{color: #666666; padding-top: 0px; padding-bottom: 0px;} #author2
|
||||
{color: #666666; padding-top: 0px; padding-bottom: 0px;} .dotted_line
|
||||
{padding-top: 0px; margin-bottom: 18px; border-bottom: 1px dotted
|
||||
#666666;} .intro{margin: 0 auto; font-weight: 600; padding-bottom:
|
||||
18px;} h5{margin: 0 auto; font-weight: 600; padding-bottom: 18px;}
|
||||
.intro2{margin: 0 auto; font-weight: 600;} .text{padding-bottom:
|
||||
18px;} .subtitle{margin: 0 auto; font-weight: 600; padding-bottom:
|
||||
10px;} .articletitle{margin: 0 auto; font-weight: 600; padding-bottom:
|
||||
10px;} #content_infobox{margin-top: 20px; margin-left: 0px; margin-
|
||||
right: 0px; margin-bottom: 10px; text-align: left; border-bottom: 1px
|
||||
solid #999999;} .content_infobox_titel{padding-top: 6px; padding-
|
||||
bottom: 8px; padding-left: 8px; padding-right: 8px; font-weight: 600;
|
||||
border-top: 1px solid #999999; border-bottom: 1px dotted #999999;}
|
||||
.content_infobox_text{padding-top: 6px; padding-bottom: 12px; padding-
|
||||
left: 8px; padding-right: 8px;} .box_gray{padding-top: 4px; padding-
|
||||
left: 7px; padding-right: 7px; padding-bottom: 4px;} .box_white {
|
||||
padding-top: 4px; padding-left: 7px; padding-right: 7px; padding-bottom:
|
||||
4px;} .content_infobox_mehr{margin-top: 20px; margin-left: 0px; margin-
|
||||
right: 0px; margin-bottom: 10px; text-align: left; width: 600px; border-
|
||||
bottom: 1px solid #999999;}'''
|
||||
|
||||
feeds = [('WOZ Die Wochenzeitung - Headlines',
|
||||
'http://www.woz.ch/inhalt/headlinesRSS.php'),]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('rss/', 'print_')
|
||||
|
Loading…
x
Reference in New Issue
Block a user