mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add profiles for USA Today and Jutarnji
This commit is contained in:
parent
c86e9afe5a
commit
f570901c62
@ -32,11 +32,15 @@ from libprs500.ebooks.lrf.web.profiles.reuters import Reuters
|
|||||||
from libprs500.ebooks.lrf.web.profiles.atlantic import Atlantic
|
from libprs500.ebooks.lrf.web.profiles.atlantic import Atlantic
|
||||||
from libprs500.ebooks.lrf.web.profiles.ap import AssociatedPress
|
from libprs500.ebooks.lrf.web.profiles.ap import AssociatedPress
|
||||||
from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker
|
from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker
|
||||||
|
from libprs500.ebooks.lrf.web.profiles.jutarnji import Jutarnji
|
||||||
|
from libprs500.ebooks.lrf.web.profiles.usatoday import USAToday
|
||||||
|
|
||||||
builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC,
|
builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC,
|
||||||
ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet,
|
ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet,
|
||||||
JerusalemPost, Newsweek, NewYorker, NewYorkReviewOfBooks, NYTimes,
|
JerusalemPost, Jutarnji, Newsweek, NewYorker,
|
||||||
Portfolio, Reuters, SpiegelOnline, WallStreetJournal, ZeitNachrichten,
|
NewYorkReviewOfBooks, NYTimes, USAToday,
|
||||||
|
Portfolio, Reuters, SpiegelOnline, WallStreetJournal,
|
||||||
|
ZeitNachrichten,
|
||||||
]
|
]
|
||||||
|
|
||||||
available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
|
available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
|
44
src/libprs500/ebooks/lrf/web/profiles/jutarnji.py
Normal file
44
src/libprs500/ebooks/lrf/web/profiles/jutarnji.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
'''
|
||||||
|
Profile to download Jutarnji.hr by Valloric
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
|
class Jutarnji(DefaultProfile):
|
||||||
|
|
||||||
|
title = 'Jutarnji'
|
||||||
|
max_recursions = 2
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
html_description = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<body.*?<span class="vijestnaslov">', re.IGNORECASE | re.DOTALL), lambda match : '<body><span class="vijestnaslov">'),
|
||||||
|
(re.compile(r'</div>.*?</td>', re.IGNORECASE | re.DOTALL), lambda match : '</div></td>'),
|
||||||
|
(re.compile(r'<a name="addComment.*?</body>', re.IGNORECASE | re.DOTALL), lambda match : '</body>'),
|
||||||
|
(re.compile(r'<br>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
## Getting the print version
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + url[len(url)-9:len(url)-3]
|
||||||
|
|
||||||
|
|
||||||
|
## Comment out the feeds you don't want retrieved.
|
||||||
|
## Or add any new new RSS feed URL's here, sorted alphabetically when converted to LRF
|
||||||
|
## If you want one of these at the top, append a space in front of the name.
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
return [
|
||||||
|
(' Naslovnica', 'http://www.jutarnji.hr/rss'),
|
||||||
|
('Sport', 'http://www.jutarnji.hr/sport/rss'),
|
||||||
|
('Novac', 'http://www.jutarnji.hr/novac/rss'),
|
||||||
|
('Kultura i zivot', 'http://www.jutarnji.hr/kultura_i_zivot/rss'),
|
||||||
|
('Automoto', 'http://www.jutarnji.hr/auto_moto/rss'),
|
||||||
|
('Hi-Tech', 'http://www.jutarnji.hr/kultura_i_zivot/hi-tech/rss'),
|
||||||
|
('Dom i nekretnine', 'http://www.jutarnji.hr/nekretnine/rss'),
|
||||||
|
]
|
43
src/libprs500/ebooks/lrf/web/profiles/usatoday.py
Normal file
43
src/libprs500/ebooks/lrf/web/profiles/usatoday.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
'''
|
||||||
|
Profile to download Jutarnji.hr by Valloric
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
|
class USAToday(DefaultProfile):
|
||||||
|
|
||||||
|
title = 'USA Today'
|
||||||
|
max_recursions = 2
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
html_description = True
|
||||||
|
#no_stylesheets = True
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<BODY.*?<!--Article Goes Here-->', re.IGNORECASE | re.DOTALL), lambda match : '<BODY>'),
|
||||||
|
(re.compile(r'<!--Article End-->.*?</BODY>', re.IGNORECASE | re.DOTALL), lambda match : '</BODY>'),
|
||||||
|
]
|
||||||
|
|
||||||
|
## Getting the print version
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
|
||||||
|
|
||||||
|
|
||||||
|
## Comment out the feeds you don't want retrieved.
|
||||||
|
## Or add any new new RSS feed URL's here, sorted alphabetically when converted to LRF
|
||||||
|
## If you want one of these at the top, append a space in front of the name.
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
return [
|
||||||
|
(' Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
||||||
|
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
|
||||||
|
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
|
||||||
|
('Travel Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
|
||||||
|
('Money Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
|
||||||
|
('Entertainment Headlines', 'http://rssfeeds.usatoday.com/usatoday-LifeTopStories'),
|
||||||
|
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
|
||||||
|
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
||||||
|
]
|
Loading…
x
Reference in New Issue
Block a user