mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Update Metro UK
This commit is contained in:
parent
3690241ab1
commit
6c7ff4e4e6
@ -7,71 +7,75 @@ import time
|
|||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
title = u'Metro UK'
|
title = u'Metro UK'
|
||||||
description = 'News from The Metro, UK'
|
description = 'News from The Metro, UK'
|
||||||
#timefmt = ''
|
|
||||||
__author__ = 'Dave Asbury'
|
|
||||||
#last update 4/4/13
|
|
||||||
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
|
||||||
|
|
||||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 12
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
encoding = 'UTF-8'
|
#encoding = 'UTF-8'
|
||||||
|
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
|
compress_news_images_max_size = 30
|
||||||
|
remove_attributes = ['style', 'font']
|
||||||
|
preprocess_regexps = [
|
||||||
|
|
||||||
|
(re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
ans = []
|
ans = []
|
||||||
feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
|
feeds = [('UK', 'http://metro.co.uk/news/uk/'),
|
||||||
('World', 'http://metro.co.uk/news/world/'),
|
('World', 'http://metro.co.uk/news/world/'),
|
||||||
('Weird', 'http://metro.co.uk/news/weird/'),
|
('Weird', 'http://metro.co.uk/news/weird/'),
|
||||||
('Money', 'http://metro.co.uk/news/money/'),
|
('Money', 'http://metro.co.uk/news/money/'),
|
||||||
('Sport', 'http://metro.co.uk/sport/'),
|
('Sport', 'http://metro.co.uk/sport/'),
|
||||||
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
|
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
|
||||||
]
|
]
|
||||||
for key, feed in feeds:
|
for key, feed in feeds:
|
||||||
soup = self.index_to_soup(feed)
|
soup = self.index_to_soup(feed)
|
||||||
articles[key] = []
|
articles[key] = []
|
||||||
ans.append(key)
|
ans.append(key)
|
||||||
|
|
||||||
today = datetime.date.today()
|
today = datetime.date.today()
|
||||||
today = time.mktime(today.timetuple())-60*60*24
|
today = time.mktime(today.timetuple())-60*60*24
|
||||||
|
|
||||||
for a in soup.findAll('a'):
|
for a in soup.findAll('a'):
|
||||||
for name, value in a.attrs:
|
for name, value in a.attrs:
|
||||||
if name == "class" and value=="post":
|
if name == "class" and value=="post":
|
||||||
url = a['href']
|
url = a['href']
|
||||||
title = a['title']
|
title = a['title']
|
||||||
print title
|
print title
|
||||||
description = ''
|
description = ''
|
||||||
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
|
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
|
||||||
skip = 1
|
skip = 1
|
||||||
if len(m.groups()) == 3:
|
if len(m.groups()) == 3:
|
||||||
g = m.groups()
|
g = m.groups()
|
||||||
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
|
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
|
||||||
pubdate = time.strftime('%a, %d %b', dt.timetuple())
|
pubdate = time.strftime('%a, %d %b', dt.timetuple())
|
||||||
|
|
||||||
dt = time.mktime(dt.timetuple())
|
dt = time.mktime(dt.timetuple())
|
||||||
if dt >= today:
|
if dt >= today:
|
||||||
print pubdate
|
print pubdate
|
||||||
skip = 0
|
skip = 0
|
||||||
else:
|
else:
|
||||||
pubdate = strftime('%a, %d %b')
|
pubdate = strftime('%a, %d %b')
|
||||||
|
|
||||||
summary = a.find(True, attrs={'class':'excerpt'})
|
summary = a.find(True, attrs={'class':'excerpt'})
|
||||||
if summary:
|
if summary:
|
||||||
description = self.tag_to_string(summary, use_alt=False)
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
|
||||||
if skip == 0:
|
if skip == 0:
|
||||||
articles[key].append(
|
articles[key].append(
|
||||||
dict(title=title, url=url, date=pubdate,
|
dict(title=title, url=url, date=pubdate,
|
||||||
description=description,
|
description=description,
|
||||||
content=''))
|
content=''))
|
||||||
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||||
return ans
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user