mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Washington Post recipe
This commit is contained in:
parent
ffee7f8da1
commit
d97aaefd2c
@ -91,7 +91,7 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
#: If True stylesheets are not downloaded and processed
|
||||
no_stylesheets = False
|
||||
|
||||
#: Convenient flag to strip all javascripts tags from the downloaded HTML
|
||||
#: Convenient flag to strip all javascript tags from the downloaded HTML
|
||||
remove_javascript = True
|
||||
|
||||
#: If True the GUI will ask the user for a username and password
|
||||
|
@ -12,19 +12,7 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
language = _('English')
|
||||
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
||||
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
||||
(r'<script.*?>.*?</script>', lambda match : ''),
|
||||
(r'<body.*?>.*?.correction {', lambda match : '<body><style>.correction {'),
|
||||
(r'<span class="display:none;" name="pubDate".*?>.*?</body>', lambda match : '<body>'),
|
||||
|
||||
|
||||
]
|
||||
]
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||
@ -37,9 +25,18 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||
]
|
||||
|
||||
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', article.get('link', None))
|
||||
|
||||
def print_version(self, url):
|
||||
return (url.rpartition('.')[0] + '_pf.html')
|
||||
return url.rpartition('.')[0] + '_pf.html'
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for div in soup.findAll(name='div', style=re.compile('margin')):
|
||||
div['style'] = ''
|
||||
return soup
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user