mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #7210 (Download News)
This commit is contained in:
parent
0bff8a7d5a
commit
7d7757ab93
@ -1,74 +1,43 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class NewZealandHerald(BasicNewsRecipe):
|
||||
|
||||
title = 'New Zealand Herald'
|
||||
__author__ = 'Krittika Goyal'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Daily news'
|
||||
timefmt = ' [%d %b, %Y]'
|
||||
language = 'en_NZ'
|
||||
oldest_article = 2.5
|
||||
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='div', attrs={'class':'contentContainer left eight'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'callToAction'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['sectionHeader', 'tools','callToAction', 'contentContainer right two nopad relatedColumn']}),
|
||||
#dict(name='div', attrs={'id':['shareContainer']}),
|
||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or #author')"}),
|
||||
#dict(name='table', attrs={'cellspacing':'0'}),
|
||||
feeds = [
|
||||
('Business',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
|
||||
('World',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'),
|
||||
('National',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'),
|
||||
('Entertainment',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'),
|
||||
('Travel',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'),
|
||||
('Opinion',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
|
||||
('Life & Style',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
|
||||
('Technology'
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
|
||||
('Sport',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
|
||||
('Motoring',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'),
|
||||
('Property',
|
||||
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
table = soup.find('table')
|
||||
if table is not None:
|
||||
table.extract()
|
||||
return soup
|
||||
|
||||
#TO GET ARTICLES IN SECTION
|
||||
def nz_parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find(attrs={'class':'col-300 categoryList'})
|
||||
date = div.find(attrs={'class':'link-list-heading'})
|
||||
|
||||
current_articles = []
|
||||
for x in date.findAllNext(attrs={'class':['linkList', 'link-list-heading']}):
|
||||
if x.get('class') == 'link-list-heading': break
|
||||
for li in x.findAll('li'):
|
||||
a = li.find('a', href=True)
|
||||
if a is None:
|
||||
continue
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.nzherald.co.nz'+url
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
return current_articles
|
||||
|
||||
|
||||
# To GET SECTIONS
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
('National',
|
||||
'http://www.nzherald.co.nz/nz/news/headlines.cfm?c_id=1'),
|
||||
('World',
|
||||
'http://www.nzherald.co.nz/world/news/headlines.cfm?c_id=2'),
|
||||
('Politics',
|
||||
'http://www.nzherald.co.nz/politics/news/headlines.cfm?c_id=280'),
|
||||
('Crime',
|
||||
'http://www.nzherald.co.nz/crime/news/headlines.cfm?c_id=30'),
|
||||
('Environment',
|
||||
'http://www.nzherald.co.nz/environment/news/headlines.cfm?c_id=39'),
|
||||
]:
|
||||
articles = self.nz_parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
def print_version(self, url):
|
||||
m = re.search(r'objectid=(\d+)', url)
|
||||
if m is None:
|
||||
return url
|
||||
return 'http://www.nzherald.co.nz/news/print.cfm?pnum=1&objectid=' + m.group(1)
|
||||
|
||||
|
@ -957,6 +957,8 @@ class BasicNewsRecipe(Recipe):
|
||||
self.log.error(_('Could not download cover: %s')%str(err))
|
||||
self.log.debug(traceback.format_exc())
|
||||
else:
|
||||
if not cu:
|
||||
return
|
||||
cdata = None
|
||||
if os.access(cu, os.R_OK):
|
||||
cdata = open(cu, 'rb').read()
|
||||
@ -987,6 +989,7 @@ class BasicNewsRecipe(Recipe):
|
||||
self.cover_path = cpath
|
||||
|
||||
def download_cover(self):
|
||||
self.cover_path = None
|
||||
try:
|
||||
self._download_cover()
|
||||
except:
|
||||
|
Loading…
x
Reference in New Issue
Block a user