mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add profiles for Washington Post and United Press International
This commit is contained in:
parent
0c61a51a1b
commit
45d114bc32
@ -34,13 +34,15 @@ from libprs500.ebooks.lrf.web.profiles.ap import AssociatedPress
|
|||||||
from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker
|
from libprs500.ebooks.lrf.web.profiles.newyorker import NewYorker
|
||||||
from libprs500.ebooks.lrf.web.profiles.jutarnji import Jutarnji
|
from libprs500.ebooks.lrf.web.profiles.jutarnji import Jutarnji
|
||||||
from libprs500.ebooks.lrf.web.profiles.usatoday import USAToday
|
from libprs500.ebooks.lrf.web.profiles.usatoday import USAToday
|
||||||
|
from libprs500.ebooks.lrf.web.profiles.upi import UnitedPressInternational
|
||||||
|
from libprs500.ebooks.lrf.web.profiles.wash_post import WashingtonPost
|
||||||
|
|
||||||
builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC,
|
builtin_profiles = [Atlantic, AssociatedPress, Barrons, BBC,
|
||||||
ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet,
|
ChristianScienceMonitor, CNN, Dilbert, Economist, FazNet,
|
||||||
JerusalemPost, Jutarnji, Newsweek, NewYorker,
|
JerusalemPost, Jutarnji, Newsweek, NewYorker,
|
||||||
NewYorkReviewOfBooks, NYTimes, USAToday,
|
NewYorkReviewOfBooks, NYTimes, UnitedPressInternational, USAToday,
|
||||||
Portfolio, Reuters, SpiegelOnline, WallStreetJournal,
|
Portfolio, Reuters, SpiegelOnline, WallStreetJournal,
|
||||||
ZeitNachrichten,
|
WashingtonPost, ZeitNachrichten,
|
||||||
]
|
]
|
||||||
|
|
||||||
available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
|
available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
|
36
src/libprs500/ebooks/lrf/web/profiles/upi.py
Normal file
36
src/libprs500/ebooks/lrf/web/profiles/upi.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import re
|
||||||
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
|
|
||||||
|
class UnitedPressInternational(DefaultProfile):
|
||||||
|
|
||||||
|
title = 'United Press International'
|
||||||
|
max_recursions = 2
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
html2lrf_options = ['--override-css= "H1 {font-family: Arial; font-weight: bold; color: #000000; size: 10pt;}"']
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||||
|
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
||||||
|
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
||||||
|
(r'<script.*?>.*?</script>', lambda match : ''),
|
||||||
|
(r'<body onload=.*?>.*?<a href="http://www.upi.com">', lambda match : '<body style="font: 8pt arial;">'),
|
||||||
|
##(r'<div class=\'headerDIV\'><h2><a style="color: #990000;" href="http://www.upi.com/NewsTrack/Top_News/">Top News</a></h2></div>.*?<br clear="all">', lambda match : ''),
|
||||||
|
(r'<script src="http://www.g.*?>.*?</body>', lambda match : ''),
|
||||||
|
(r'<span style="font: 16pt arial', lambda match : '<span style="font: 12pt arial'),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
return [ ('Top Stories', 'http://www.upi.com/rss/NewsTrack/Top_News/'),
|
||||||
|
('Science', 'http://www.upi.com/rss/NewsTrack/Science/'),
|
||||||
|
('Heatlth', 'http://www.upi.com/rss/NewsTrack/Health/'),
|
||||||
|
('Quirks', 'http://www.upi.com/rss/NewsTrack/Quirks/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return (url + 'print_view/')
|
44
src/libprs500/ebooks/lrf/web/profiles/wash_post.py
Normal file
44
src/libprs500/ebooks/lrf/web/profiles/wash_post.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import re
|
||||||
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
|
|
||||||
|
class WashingtonPost(DefaultProfile):
|
||||||
|
|
||||||
|
title = 'Washington Post'
|
||||||
|
max_recursions = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
use_pubdate = False
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||||
|
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
||||||
|
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
||||||
|
(r'<script.*?>.*?</script>', lambda match : ''),
|
||||||
|
(r'<body.*?>.*?.correction {', lambda match : '<body><style>.correction {'),
|
||||||
|
(r'<span class="display:none;" name="pubDate".*?>.*?</body>', lambda match : '<body>'),
|
||||||
|
|
||||||
|
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
return [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||||
|
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
|
||||||
|
('Nation', 'http://www.www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
|
||||||
|
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
|
||||||
|
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
|
||||||
|
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
|
||||||
|
('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
|
||||||
|
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||||
|
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return (url.rpartition('.')[0] + '_pf.html')
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user