diff --git a/src/libprs500/ebooks/lrf/web/__init__.py b/src/libprs500/ebooks/lrf/web/__init__.py index 5c6c9dbdaa..7b38f51867 100644 --- a/src/libprs500/ebooks/lrf/web/__init__.py +++ b/src/libprs500/ebooks/lrf/web/__init__.py @@ -32,11 +32,15 @@ from libprs500.ebooks.lrf.web.profiles.reuters import Reuters from libprs500.ebooks.lrf.web.profiles.atlantic import Atlantic from libprs500.ebooks.lrf.web.profiles.ap import AssociatedPress from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker +from libprs500.ebooks.lrf.web.profiles.jutarnji import Jutarnji +from libprs500.ebooks.lrf.web.profiles.usatoday import USAToday builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC, ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet, - JerusalemPost, Newsweek, NewYorker, NewYorkReviewOfBooks, NYTimes, - Portfolio, Reuters, SpiegelOnline, WallStreetJournal, ZeitNachrichten, + JerusalemPost, Jutarnji, Newsweek, NewYorker, + NewYorkReviewOfBooks, NYTimes, USAToday, + Portfolio, Reuters, SpiegelOnline, WallStreetJournal, + ZeitNachrichten, ] available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles] \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/web/profiles/jutarnji.py b/src/libprs500/ebooks/lrf/web/profiles/jutarnji.py new file mode 100644 index 0000000000..3909e35a91 --- /dev/null +++ b/src/libprs500/ebooks/lrf/web/profiles/jutarnji.py @@ -0,0 +1,44 @@ +''' + Profile to download Jutarnji.hr by Valloric +''' + +import re + +from libprs500.ebooks.lrf.web.profiles import DefaultProfile + +class Jutarnji(DefaultProfile): + + title = 'Jutarnji' + max_recursions = 2 + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 20 + html_description = True + no_stylesheets = True + + preprocess_regexps = [ + (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'.*?', re.IGNORECASE | re.DOTALL), lambda match : ''), + (re.compile(r'