mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Counterpunch by O. Emmerson. Fixes #817967 (New recipe: counterpunch.com)
This commit is contained in:
parent
aa6a3929eb
commit
b4f0302a24
40
recipes/counterpunch.recipe
Normal file
40
recipes/counterpunch.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import re
|
||||||
|
from lxml.html import parse
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Counterpunch(BasicNewsRecipe):
|
||||||
|
'''
|
||||||
|
Parses counterpunch.com for articles
|
||||||
|
'''
|
||||||
|
title = 'Counterpunch'
|
||||||
|
description = 'Daily political opinion from www.Counterpunch.com'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'O. Emmerson'
|
||||||
|
keep_only_tags = [dict(name='td', attrs={'width': '522'})]
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
title, url = 'Counterpunch', 'http://www.counterpunch.com'
|
||||||
|
articles = self.parse_page(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_page(self, url):
|
||||||
|
parsed_page = parse(url).getroot()
|
||||||
|
articles = []
|
||||||
|
unwanted_text = re.compile('Website\ of\ the|I\ urge\ you|Subscribe\ now|DONATE|\@asis\.com|donation\ button|click\ over\ to\ our')
|
||||||
|
parsed_articles = [a for a in parsed_page.cssselect("html>body>table tr>td>p[class='style2']") if not unwanted_text.search(a.text_content())]
|
||||||
|
for art in parsed_articles:
|
||||||
|
try:
|
||||||
|
author = art.text
|
||||||
|
title = art.cssselect("a")[0].text + ' by {0}'.format(author)
|
||||||
|
art_url = 'http://www.counterpunch.com/' + art.cssselect("a")[0].attrib['href']
|
||||||
|
articles.append({'title': title, 'url': art_url})
|
||||||
|
except Exception as e:
|
||||||
|
e
|
||||||
|
#print('Handler Error: ', e, 'title :', a.text_content())
|
||||||
|
pass
|
||||||
|
return articles
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user