Added recipes: NSPM, NSPM in english, The market ticker, Tom's Hardware, Spiegel international (in english), Pescanik

Updated recipes: Vreme, B92, Politika, Novosti, jutarnji, la nacion, Clarin, Blic, Danas
This commit is contained in:
Kovid Goyal 2008-12-31 08:23:15 -08:00
parent b1eb16b876
commit fad5109de6
24 changed files with 326 additions and 59 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 330 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1016 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1016 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 811 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 607 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 B

View File

@ -19,7 +19,8 @@ recipe_modules = [
'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas', 'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas',
'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation', 'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation',
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware',
] ]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -5,6 +5,7 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
b92.net b92.net
''' '''
import string,re import string,re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -16,25 +17,29 @@ class B92(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'B92'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_after = dict(name='div', attrs={'class':'gas'})
remove_tags = [
dict(name='div' , attrs={'class':'interaction clearfix' })
,dict(name='div' , attrs={'class':'gas' })
,dict(name='ul' , attrs={'class':'comment-nav' })
,dict(name='table', attrs={'class':'pages-navigation-form'})
]
feeds = [ feeds = [
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' ) (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
,(u'Kultura' , u'http://www.b92.net/info/rss/kultura.xml' ) ,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
,(u'Automobili', u'http://www.b92.net/info/rss/automobili.xml') ,(u'Zivot', u'http://www.b92.net/info/rss/zivot.xml')
,(u'Zivot' , u'http://www.b92.net/info/rss/zivot.xml' ) ,(u'Sport', u'http://www.b92.net/info/rss/sport.xml')
,(u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml')
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
] ]
def print_version(self, url): def print_version(self, url):
return url + '&version=print' main, sep, article_id = url.partition('nav_id=')
rmain, rsep, rrest = main.partition('.php?')
mrmain , rsepp, nnt = rmain.rpartition('/')
mprmain, rrsep, news_type = mrmain.rpartition('/')
nurl = 'http://www.b92.net/mobilni/' + news_type + '/index.php?nav_id=' + article_id
brbiz, biz, bizrest = rmain.partition('/biz/')
if biz:
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
return nurl

View File

@ -11,12 +11,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe): class Blic(BasicNewsRecipe):
title = u'Blic' title = u'Blic'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Vesti' description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Blic'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -9,14 +9,19 @@ clarin.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe): class Clarin(BasicNewsRecipe):
title = u'Clarin' title = 'Clarin'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y mundo' description = 'Noticias de Argentina y mundo'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
simultaneous_downloads = 1 cover_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
delay = 1 html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'Grupo Clarin'
]
remove_tags = [ remove_tags = [
dict(name='a' , attrs={'class':'Imp' }) dict(name='a' , attrs={'class':'Imp' })

View File

@ -9,26 +9,33 @@ import string,re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe): class Danas(BasicNewsRecipe):
title = u'Danas' title = 'Danas'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Vesti' description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' cover_url = 'http://www.danas.rs/images/basic/danas.gif'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Danas'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ] keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ]
remove_tags_after = dict(name='div', attrs={'id':'comments'})
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'width_1_4'}) dict(name='div', attrs={'class':'width_1_4' })
,dict(name='div', attrs={'class':'metaClanka'}) ,dict(name='div', attrs={'class':'metaClanka' })
,dict(name='div', attrs={'id':'comments'}) ,dict(name='div', attrs={'id':'comments' })
,dict(name='div', attrs={'class':'baner'}) ,dict(name='div', attrs={'class':'baner' })
,dict(name='div', attrs={'class':'slikaClanka'})
] ]
feeds = [(u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def print_version(self, url):
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')] return url + '&action=print'

View File

@ -11,18 +11,30 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Jutarnji(BasicNewsRecipe): class Jutarnji(BasicNewsRecipe):
title = u'Jutarnji' title = 'Jutarnji'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Hrvatski portal' description = 'Online izdanje Jutarnjeg lista'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
simultaneous_downloads = 1
delay = 1
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Croatia'
, '--publisher', 'Europapress holding d.o.o.'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='embed')] remove_tags = [
dict(name='embed')
,dict(name='a', attrs={'class':'a11'})
,dict(name='hr')
]
feeds = [ feeds = [
(u'Naslovnica' , u'http://www.jutarnji.hr/rss' ) (u'Naslovnica' , u'http://www.jutarnji.hr/rss' )
@ -37,10 +49,11 @@ class Jutarnji(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
main, split, rest = url.partition('.jl') main, split, rest = url.partition('.jl')
rmain, rsplit, rrest = main.rpartition(',') rmain, rsplit, rrest = main.rpartition(',')
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
soup.prettify()
return soup return soup

View File

@ -9,13 +9,20 @@ lanacion.com.ar
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Lanacion(BasicNewsRecipe): class Lanacion(BasicNewsRecipe):
title = u'La Nacion' title = 'La Nacion'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo' description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'La Nacion SA'
]
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})] keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
remove_tags = [ remove_tags = [
@ -38,3 +45,12 @@ class Lanacion(BasicNewsRecipe):
,(u'Ciencia/Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' ) ,(u'Ciencia/Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' )
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) ,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
] ]
def get_cover_url(self):
index = 'http://www.lanacion.com.ar'
cover_url = None
soup = self.index_to_soup(index)
cover_item = soup.find('img',attrs={'class':'logo'})
if cover_item:
cover_url = index + cover_item['src']
return cover_url

View File

@ -9,14 +9,19 @@ import string,re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe): class Novosti(BasicNewsRecipe):
title = u'Vecernje Novosti' title = 'Vecernje Novosti'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Vesti' description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Novosti AD'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -18,7 +18,13 @@ class Nspm(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l' INDEX = 'http://www.nspm.rs/?alphabet=l'
timefmt = ' [%A, %d %B, %Y]' cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia'
, '--publisher', 'IIC NSPM'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
nspm.rs/nspm-in-english
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Nspm_int(BasicNewsRecipe):
title = 'NSPM in English'
__author__ = 'Darko Miletic'
description = 'Magazine dedicated to political theory and sociological research'
oldest_article = 20
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia, english'
, '--publisher', 'IIC NSPM'
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
return br
keep_only_tags = [dict(name='div', attrs={'id':'jsn-mainbody'})]
remove_tags = [dict(name='div', attrs={'id':'yvComment' })]
feeds = [ (u'NSPM in English', u'http://nspm.rs/nspm-in-english/feed/rss.html')]

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
pescanik.net
'''
import string,re
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe):
title = 'Pescanik'
__author__ = 'Darko Miletic'
description = 'Pescanik'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
html2lrf_options = ['--base-font-size', '10']
html2epub_options = 'base_font_size = "10pt"'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url):
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')
return nurl + '&pop=1&page=0'

View File

@ -9,25 +9,30 @@ import string,re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Politika(BasicNewsRecipe): class Politika(BasicNewsRecipe):
title = u'Politika Online' title = 'Politika Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu' description = 'Najstariji dnevni list na Balkanu'
oldest_article = 7 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
extra_css = '.content_center_border {text-align: left;}' extra_css = '.content_center_border {text-align: left;}'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' cover_url = 'http://www.politika.rs:8080/images/politika.gif'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_before = dict(name='div', attrs={'class':'content_center_border'}) keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ]
remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'}) remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'})
feeds = [ feeds = [
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' ) (u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' ) ,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' ) ,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' ) ,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' ) ,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
spiegel.de
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_int(BasicNewsRecipe):
title = u'Spiegel Online International'
__author__ = 'Darko Miletic'
description = "News and POV from Europe's largest newsmagazine"
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://www.spiegel.de/static/sys/v8/headlines/spiegelonline.gif'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Germany'
, '--publisher', 'SPIEGEL ONLINE GmbH'
]
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')]
def print_version(self, url):
main, sep, rest = url.rpartition(',')
rmain, rsep, rrest = main.rpartition(',')
return rmain + ',druck-' + rrest + ',' + rest

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
market-ticker.denninger.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Themarketticker(BasicNewsRecipe):
title = 'The Market Ticker'
__author__ = 'Darko Miletic'
description = 'Commentary On The Capital Markets'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
html2lrf_options = [ '--comment' , description
, '--category' , 'blog,news,finances'
, '--base-font-size', '10'
]
feeds = [(u'Posts', u'http://market-ticker.denninger.net/feeds/index.rss2')]

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
tomshardware.com
'''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe):
title = "Tom's Hardware US"
__author__ = 'Darko Miletic'
description = 'Hardware reviews and News'
no_stylesheets = True
needs_subscription = True
INDEX = 'http://www.tomshardware.com'
LOGIN = 'http://www.tomshardware.com/membres/?r=%2Fus%2F#loginForm'
cover_url = 'http://img.bestofmedia.com/img/tomshardware/design/tomshardware.jpg'
html2lrf_options = [ '--comment' , description
, '--category' , 'hardware,news'
, '--base-font-size', '10'
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='connexion')
br['login'] = self.username
br['mdp' ] = self.password
br.submit()
return br
remove_tags = [
dict(name='div' , attrs={'id':'header' })
,dict(name='object')
]
feeds = [
(u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml')
,(u'Latest News' , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
]
def print_version(self, url):
main, sep, rest = url.rpartition('.html')
rmain, rsep, article_id = main.rpartition(',')
tmain, tsep, trest = rmain.rpartition('/reviews/')
if tsep:
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id
def preprocess_html(self, soup):
del(soup.body['onload'])
return soup

View File

@ -15,13 +15,16 @@ class Vreme(BasicNewsRecipe):
title = 'Vreme' title = 'Vreme'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Politicki Nedeljnik Srbije' description = 'Politicki Nedeljnik Srbije'
timefmt = ' [%A, %d %B, %Y]'
no_stylesheets = True no_stylesheets = True
simultaneous_downloads = 1 needs_subscription = True
delay = 1
needs_subscription = True
INDEX = 'http://www.vreme.com' INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/index.php' LOGIN = 'http://www.vreme.com/account/index.php'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia'
, '--publisher', 'Vreme d.o.o.'
]
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -66,3 +69,11 @@ class Vreme(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '&print=yes' return url + '&print=yes'
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'alt':'Naslovna strana broja'})
if cover_item:
cover_url = self.INDEX + cover_item['src']
return cover_url