mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
56 lines
2.0 KiB
Plaintext
56 lines
2.0 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
try:
|
|
from urllib.parse import quote
|
|
except ImportError:
|
|
from urllib import quote
|
|
|
|
|
|
class EconomiaMagazine(BasicNewsRecipe):
|
|
title = u'Economia Magazine'
|
|
__author__ = 'Kovid Goyal'
|
|
description = 'Economia - Intelligence & Insight for ICAEW Members'
|
|
language = 'en_GB'
|
|
BASE = 'http://economia.icaew.com/'
|
|
no_stylesheets = True
|
|
|
|
keep_only_tags = [
|
|
dict(name='h1'),
|
|
dict(name='figure', attrs={
|
|
'class': lambda x: x and 'figure' in x.split()}),
|
|
dict(attrs={'class': 'intro articleCopy'.split()})
|
|
]
|
|
|
|
def image_url_processor(cls, baseurl, iurl):
|
|
if iurl:
|
|
return baseurl + quote(iurl)
|
|
return baseurl + '404.jpeg'
|
|
|
|
def preprocess_raw_html(self, raw_html, url):
|
|
return raw_html.replace('src=""', '')
|
|
|
|
def parse_index(self):
|
|
soup = self.index_to_soup('http://economia.icaew.com/')
|
|
img = soup.find('img', src=lambda x: x and 'Magazine covers' in x)
|
|
self.cover_url = self.BASE + quote(img['src'].encode('utf-8'))
|
|
soup = self.index_to_soup(self.BASE + img.parent['href'])
|
|
self.timefmt = ' [%s]' % self.tag_to_string(
|
|
soup.find('title')).split('|')[0].strip()
|
|
ans = []
|
|
for div in soup.findAll('div', attrs={'class': 'articlePreview'}):
|
|
h2 = div.find('h2')
|
|
section_title = self.tag_to_string(h2).strip()
|
|
self.log('Found section:', section_title)
|
|
articles = []
|
|
for li in div.findAll('li'):
|
|
h3 = li.find('h3')
|
|
title = self.tag_to_string(h3)
|
|
a = h3.find('a', href=True)
|
|
url = self.BASE + a['href']
|
|
p = li.find('p')
|
|
self.log('\t', title, 'at', url)
|
|
articles.append({'title': title, 'url': url,
|
|
'description': self.tag_to_string(p)})
|
|
if articles:
|
|
ans.append((section_title, articles))
|
|
return ans
|