mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New recipe for Watching America by kwetal
This commit is contained in:
parent
ee9e45d50d
commit
00506bfe5f
96
resources/recipes/watchingamerica.recipe
Normal file
96
resources/recipes/watchingamerica.recipe
Normal file
@ -0,0 +1,96 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class WatchingAmericaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 1
|
||||
|
||||
title = u'Watching America'
|
||||
publisher = u'watchingamerica.com'
|
||||
category = u'News'
|
||||
description = u'Global opinion about the United States'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_attributes = ['style']
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||
.main_content em {font-size: x-small; font-style: italic; color: #696969;}
|
||||
.main_content span strong {font-size: x-large; font-weight: bold;}
|
||||
.insideitro {font-size: xx-small; font-style: italic; color: #666666;}
|
||||
span {padding: 0em; margin 0em;}
|
||||
'''
|
||||
|
||||
INDEX = u'http://watchingamerica.com/News/'
|
||||
|
||||
def parse_index(self):
|
||||
answer = []
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
articles = []
|
||||
feature = soup.find('div', attrs = {'id': 'headzone'})
|
||||
if feature:
|
||||
link = feature.find('a', attrs = {'class': 'feature'})
|
||||
url = link.get('href', None)
|
||||
title = self.tag_to_string(link)
|
||||
description = self.tag_to_string(feature.find('h1', attrs = {'class': 'pull'}))
|
||||
article = {'title': title, 'date': u'', 'url': url, 'description': description}
|
||||
articles.append(article)
|
||||
answer.append(('Feature', articles))
|
||||
|
||||
feed_titles = ['Translations from the West', 'Translations from the East']
|
||||
for i in range(1, 3):
|
||||
articles = []
|
||||
div = soup.find('div', attrs = {'class': 'newscol' + str(i)})
|
||||
if div:
|
||||
for link in div.findAll('a', attrs = {'class': 'headline'}):
|
||||
url = link.get('href', None)
|
||||
title = self.tag_to_string(link)
|
||||
|
||||
description = None
|
||||
h3 = link.findNextSibling('h3')
|
||||
if h3:
|
||||
description = self.tag_to_string(h3)
|
||||
|
||||
article = {'title': title, 'date': u'', 'url': url, 'description': description}
|
||||
articles.append(article)
|
||||
answer.append((feed_titles[i - 1], articles))
|
||||
|
||||
return answer
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
freshSoup = self.get_fresh_soup(soup)
|
||||
article = soup.find('p', attrs = {'class': 'MsoNormal'}).parent
|
||||
if article:
|
||||
article.name = 'div'
|
||||
del article['width']
|
||||
article['class'] = 'main_content'
|
||||
org = article.find('a', attrs = {'href': '?SHOW_ORIGINAL_TEXT'})
|
||||
if org:
|
||||
org.parent.extract()
|
||||
|
||||
intro = article.find('span', attrs = {'class': 'insideitro'})
|
||||
if intro:
|
||||
for el in intro.findAll(['strong', 'em', 'br']):
|
||||
if el.name == 'br':
|
||||
el.extract()
|
||||
else:
|
||||
el.name = 'div'
|
||||
|
||||
freshSoup.body.append(article)
|
||||
|
||||
return freshSoup
|
||||
|
||||
def get_fresh_soup(self, oldSoup):
|
||||
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||
if oldSoup.head.title:
|
||||
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||
return freshSoup
|
Loading…
x
Reference in New Issue
Block a user