mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Washington Post recipe
This commit is contained in:
parent
ffee7f8da1
commit
d97aaefd2c
@ -91,7 +91,7 @@ class BasicNewsRecipe(object, LoggingInterface):
|
|||||||
#: If True stylesheets are not downloaded and processed
|
#: If True stylesheets are not downloaded and processed
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
|
|
||||||
#: Convenient flag to strip all javascripts tags from the downloaded HTML
|
#: Convenient flag to strip all javascript tags from the downloaded HTML
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
#: If True the GUI will ask the user for a username and password
|
#: If True the GUI will ask the user for a username and password
|
||||||
|
@ -12,19 +12,7 @@ class WashingtonPost(BasicNewsRecipe):
|
|||||||
language = _('English')
|
language = _('English')
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
remove_javascript = True
|
||||||
[
|
|
||||||
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
|
||||||
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
|
||||||
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
|
||||||
(r'<script.*?>.*?</script>', lambda match : ''),
|
|
||||||
(r'<body.*?>.*?.correction {', lambda match : '<body><style>.correction {'),
|
|
||||||
(r'<span class="display:none;" name="pubDate".*?>.*?</body>', lambda match : '<body>'),
|
|
||||||
|
|
||||||
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||||
@ -38,8 +26,17 @@ class WashingtonPost(BasicNewsRecipe):
|
|||||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('feedburner_origlink', article.get('link', None))
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return (url.rpartition('.')[0] + '_pf.html')
|
return url.rpartition('.')[0] + '_pf.html'
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for div in soup.findAll(name='div', style=re.compile('margin')):
|
||||||
|
div['style'] = ''
|
||||||
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user