mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add profiles for USA Today and Jutarnji
This commit is contained in:
parent
c86e9afe5a
commit
f570901c62
@ -32,11 +32,15 @@ from libprs500.ebooks.lrf.web.profiles.reuters import Reuters
|
||||
from libprs500.ebooks.lrf.web.profiles.atlantic import Atlantic
|
||||
from libprs500.ebooks.lrf.web.profiles.ap import AssociatedPress
|
||||
from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker
|
||||
from libprs500.ebooks.lrf.web.profiles.jutarnji import Jutarnji
|
||||
from libprs500.ebooks.lrf.web.profiles.usatoday import USAToday
|
||||
|
||||
builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC,
|
||||
ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet,
|
||||
JerusalemPost, Newsweek, NewYorker, NewYorkReviewOfBooks, NYTimes,
|
||||
Portfolio, Reuters, SpiegelOnline, WallStreetJournal, ZeitNachrichten,
|
||||
JerusalemPost, Jutarnji, Newsweek, NewYorker,
|
||||
NewYorkReviewOfBooks, NYTimes, USAToday,
|
||||
Portfolio, Reuters, SpiegelOnline, WallStreetJournal,
|
||||
ZeitNachrichten,
|
||||
]
|
||||
|
||||
available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
|
44
src/libprs500/ebooks/lrf/web/profiles/jutarnji.py
Normal file
44
src/libprs500/ebooks/lrf/web/profiles/jutarnji.py
Normal file
@ -0,0 +1,44 @@
|
||||
'''
|
||||
Profile to download Jutarnji.hr by Valloric
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||
|
||||
class Jutarnji(DefaultProfile):
|
||||
|
||||
title = 'Jutarnji'
|
||||
max_recursions = 2
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 20
|
||||
html_description = True
|
||||
no_stylesheets = True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<body.*?<span class="vijestnaslov">', re.IGNORECASE | re.DOTALL), lambda match : '<body><span class="vijestnaslov">'),
|
||||
(re.compile(r'</div>.*?</td>', re.IGNORECASE | re.DOTALL), lambda match : '</div></td>'),
|
||||
(re.compile(r'<a name="addComment.*?</body>', re.IGNORECASE | re.DOTALL), lambda match : '</body>'),
|
||||
(re.compile(r'<br>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
]
|
||||
|
||||
## Getting the print version
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + url[len(url)-9:len(url)-3]
|
||||
|
||||
|
||||
## Comment out the feeds you don't want retrieved.
|
||||
## Or add any new new RSS feed URL's here, sorted alphabetically when converted to LRF
|
||||
## If you want one of these at the top, append a space in front of the name.
|
||||
|
||||
def get_feeds(self):
|
||||
return [
|
||||
(' Naslovnica', 'http://www.jutarnji.hr/rss'),
|
||||
('Sport', 'http://www.jutarnji.hr/sport/rss'),
|
||||
('Novac', 'http://www.jutarnji.hr/novac/rss'),
|
||||
('Kultura i zivot', 'http://www.jutarnji.hr/kultura_i_zivot/rss'),
|
||||
('Automoto', 'http://www.jutarnji.hr/auto_moto/rss'),
|
||||
('Hi-Tech', 'http://www.jutarnji.hr/kultura_i_zivot/hi-tech/rss'),
|
||||
('Dom i nekretnine', 'http://www.jutarnji.hr/nekretnine/rss'),
|
||||
]
|
43
src/libprs500/ebooks/lrf/web/profiles/usatoday.py
Normal file
43
src/libprs500/ebooks/lrf/web/profiles/usatoday.py
Normal file
@ -0,0 +1,43 @@
|
||||
'''
|
||||
Profile to download Jutarnji.hr by Valloric
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||
|
||||
class USAToday(DefaultProfile):
|
||||
|
||||
title = 'USA Today'
|
||||
max_recursions = 2
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 20
|
||||
html_description = True
|
||||
#no_stylesheets = True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<BODY.*?<!--Article Goes Here-->', re.IGNORECASE | re.DOTALL), lambda match : '<BODY>'),
|
||||
(re.compile(r'<!--Article End-->.*?</BODY>', re.IGNORECASE | re.DOTALL), lambda match : '</BODY>'),
|
||||
]
|
||||
|
||||
## Getting the print version
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
|
||||
|
||||
|
||||
## Comment out the feeds you don't want retrieved.
|
||||
## Or add any new new RSS feed URL's here, sorted alphabetically when converted to LRF
|
||||
## If you want one of these at the top, append a space in front of the name.
|
||||
|
||||
def get_feeds(self):
|
||||
return [
|
||||
(' Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
||||
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
|
||||
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
|
||||
('Travel Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
|
||||
('Money Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
|
||||
('Entertainment Headlines', 'http://rssfeeds.usatoday.com/usatoday-LifeTopStories'),
|
||||
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
|
||||
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user