mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix #2823 (Medata from Filename) and add subtitles to Economist recipe
This commit is contained in:
parent
ee2fea02b6
commit
ece4adfab9
@ -127,7 +127,7 @@ def metadata_from_filename(name, pat=None):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
au = match.group('authors')
|
au = match.group('author')
|
||||||
aus = string_to_authors(au)
|
aus = string_to_authors(au)
|
||||||
mi.authors = aus
|
mi.authors = aus
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@ -113,7 +113,7 @@ p, li { white-space: pre-wrap; }
|
|||||||
<item row="1" column="1" colspan="2" >
|
<item row="1" column="1" colspan="2" >
|
||||||
<widget class="QLineEdit" name="authors" >
|
<widget class="QLineEdit" name="authors" >
|
||||||
<property name="toolTip" >
|
<property name="toolTip" >
|
||||||
<string>Regular expression (?P<authors>)</string>
|
<string>Regular expression (?P<author>)</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="text" >
|
<property name="text" >
|
||||||
<string>No match</string>
|
<string>No match</string>
|
||||||
|
@ -12,7 +12,7 @@ import mechanize, string
|
|||||||
from urllib2 import quote
|
from urllib2 import quote
|
||||||
|
|
||||||
class Economist(BasicNewsRecipe):
|
class Economist(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The Economist'
|
title = 'The Economist'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
__author__ = "Kovid Goyal"
|
__author__ = "Kovid Goyal"
|
||||||
@ -22,7 +22,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
INDEX = 'http://www.economist.com/printedition'
|
INDEX = 'http://www.economist.com/printedition'
|
||||||
remove_tags = [dict(name=['script', 'noscript', 'title'])]
|
remove_tags = [dict(name=['script', 'noscript', 'title'])]
|
||||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
@ -32,7 +32,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
req.add_data(data)
|
req.add_data(data)
|
||||||
br.open(req).read()
|
br.open(req).read()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
|
soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
|
||||||
convertEntities=BeautifulSoup.HTML_ENTITIES)
|
convertEntities=BeautifulSoup.HTML_ENTITIES)
|
||||||
@ -60,13 +60,18 @@ class Economist(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
a = tag.find('a', href=True)
|
a = tag.find('a', href=True)
|
||||||
if a is not None:
|
if a is not None:
|
||||||
url=a['href'].replace('displaystory', 'PrinterFriendly')
|
url=a['href'].replace('displaystory', 'PrinterFriendly')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.economist.com' + url
|
url = 'http://www.economist.com' + url
|
||||||
article = dict(title=text,
|
try:
|
||||||
|
subtitle = tag.previousSibling.contents[0].contents[0]
|
||||||
|
text = subtitle + ': ' + text
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
article = dict(title=text,
|
||||||
url = url,
|
url = url,
|
||||||
description='', content='', date='')
|
description='', content='', date='')
|
||||||
feeds[key].append(article)
|
feeds[key].append(article)
|
||||||
|
|
||||||
ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)]
|
ans = [(key, feeds[key]) for key in ans if feeds.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user