Fix #2823 (Medata from Filename) and add subtitles to Economist recipe

This commit is contained in:
Kovid Goyal 2009-07-14 07:24:55 -06:00
parent ee2fea02b6
commit ece4adfab9
3 changed files with 13 additions and 8 deletions

View File

@ -127,7 +127,7 @@ def metadata_from_filename(name, pat=None):
except IndexError: except IndexError:
pass pass
try: try:
au = match.group('authors') au = match.group('author')
aus = string_to_authors(au) aus = string_to_authors(au)
mi.authors = aus mi.authors = aus
except IndexError: except IndexError:

View File

@ -113,7 +113,7 @@ p, li { white-space: pre-wrap; }
<item row="1" column="1" colspan="2" > <item row="1" column="1" colspan="2" >
<widget class="QLineEdit" name="authors" > <widget class="QLineEdit" name="authors" >
<property name="toolTip" > <property name="toolTip" >
<string>Regular expression (?P&lt;authors>)</string> <string>Regular expression (?P&lt;author>)</string>
</property> </property>
<property name="text" > <property name="text" >
<string>No match</string> <string>No match</string>

View File

@ -12,7 +12,7 @@ import mechanize, string
from urllib2 import quote from urllib2 import quote
class Economist(BasicNewsRecipe): class Economist(BasicNewsRecipe):
title = 'The Economist' title = 'The Economist'
language = _('English') language = _('English')
__author__ = "Kovid Goyal" __author__ = "Kovid Goyal"
@ -22,7 +22,7 @@ class Economist(BasicNewsRecipe):
INDEX = 'http://www.economist.com/printedition' INDEX = 'http://www.economist.com/printedition'
remove_tags = [dict(name=['script', 'noscript', 'title'])] remove_tags = [dict(name=['script', 'noscript', 'title'])]
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
@ -32,7 +32,7 @@ class Economist(BasicNewsRecipe):
req.add_data(data) req.add_data(data)
br.open(req).read() br.open(req).read()
return br return br
def parse_index(self): def parse_index(self):
soup = BeautifulSoup(self.browser.open(self.INDEX).read(), soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
convertEntities=BeautifulSoup.HTML_ENTITIES) convertEntities=BeautifulSoup.HTML_ENTITIES)
@ -60,13 +60,18 @@ class Economist(BasicNewsRecipe):
continue continue
a = tag.find('a', href=True) a = tag.find('a', href=True)
if a is not None: if a is not None:
url=a['href'].replace('displaystory', 'PrinterFriendly') url=a['href'].replace('displaystory', 'PrinterFriendly')
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.economist.com' + url url = 'http://www.economist.com' + url
article = dict(title=text, try:
subtitle = tag.previousSibling.contents[0].contents[0]
text = subtitle + ': ' + text
except:
pass
article = dict(title=text,
url = url, url = url,
description='', content='', date='') description='', content='', date='')
feeds[key].append(article) feeds[key].append(article)
ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)] ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)]
return ans return ans