diff --git a/src/libprs500/ebooks/lrf/web/profiles/ap.py b/src/libprs500/ebooks/lrf/web/profiles/ap.py new file mode 100644 index 0000000000..d581767253 --- /dev/null +++ b/src/libprs500/ebooks/lrf/web/profiles/ap.py @@ -0,0 +1,38 @@ +import re +from libprs500.ebooks.lrf.web.profiles import DefaultProfile + + +class AssociatedPress(DefaultProfile): + + title = 'Associated Press' + max_recursions = 2 + max_articles_per_feed = 15 + html2lrf_options = ['--force-page-break-before-tag="chapter"'] + + + preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in +[ + (r'.*?' , lambda match : ''), + (r'.*?', lambda match : ''), + (r'.*?', lambda match : ''), + (r'.*?', lambda match : ''), + (r'.*?', lambda match : ''), + (r'
.*?

', lambda match : '

'), + (r'

', lambda match : '

'), + (r'Learn more about our Privacy Policy.*?', lambda match : ''), + ] + ] + + + + def get_feeds(self): + return [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'), + ('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'), + ('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'), + ('AP Political News', 'http://hosted.ap.org/lineups/POLITICSHEADS-rss_2.0.xml?SITE=ORMED&SECTION=HOME'), + ('AP Washington News', 'http://hosted.ap.org/lineups/WASHINGTONHEADS-rss_2.0.xml?SITE=NYPLA&SECTION=HOME'), + ('AP Technology News', 'http://hosted.ap.org/lineups/TECHHEADS-rss_2.0.xml?SITE=NYWNE&SECTION=HOME'), + ('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'), + ('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'), + ('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'), + ] \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/web/profiles/reuters.py b/src/libprs500/ebooks/lrf/web/profiles/reuters.py index 449138e76d..418d493841 100644 --- a/src/libprs500/ebooks/lrf/web/profiles/reuters.py +++ b/src/libprs500/ebooks/lrf/web/profiles/reuters.py @@ -30,7 +30,7 @@ class Reuters(DefaultProfile): ('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'), ('Politics News', 'http://feeds.reuters.com/reuters/politicsNews?format=xml'), ('Science News', 'http://feeds.reuters.com/reuters/scienceNews?format=xml'), - ('Emviroment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'), + ('Environment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'), ('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'), ('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml') ]