mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Boston Globe
This commit is contained in:
parent
e62a0769c5
commit
9d06256df1
@ -21,10 +21,10 @@ class BostonGlobeSubscription(BasicNewsRecipe):
|
|||||||
timefmt = ' [%a, %d %b, %Y]'
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('main-hed lead-figure byline article-text comic'),
|
classes('comic article__title methode__story article-header__headline lead-media figure article-header__byline article-content'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('inline-newsletter ad skip-nav'),
|
classes('inline-newsletter ad skip-nav article-footer'),
|
||||||
dict(name=['meta', 'link'])
|
dict(name=['meta', 'link'])
|
||||||
]
|
]
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
@ -66,13 +66,13 @@ class BostonGlobeSubscription(BasicNewsRecipe):
|
|||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
def is_login_form(form):
|
def is_login_form(form):
|
||||||
return form.action == "/eom/SysConfig/WebPortal/BostonGlobe/Framework/regi/login2.jsp"
|
return form.action == "https://www.bostonglobe.com/eom/SysConfig/WebPortal/BostonGlobe/Framework/regi/login2.jsp"
|
||||||
|
|
||||||
# br.set_debug_http(True)
|
# br.set_debug_http(True)
|
||||||
# br.set_debug_responses(True)
|
# br.set_debug_responses(True)
|
||||||
# br.set_debug_redirects(True)
|
# br.set_debug_redirects(True)
|
||||||
br.open(
|
br.open(
|
||||||
"https://www.bostonglobe.com/eom/SysConfig/WebPortal/BostonGlobe/Framework/regi/final-login.jsp")
|
"https://www.bostonglobe.com/login?p1=BGHeader_LogIn")
|
||||||
br.select_form(predicate=is_login_form)
|
br.select_form(predicate=is_login_form)
|
||||||
br["username"] = self.username
|
br["username"] = self.username
|
||||||
br["password"] = self.password
|
br["password"] = self.password
|
||||||
@ -103,10 +103,12 @@ class BostonGlobeSubscription(BasicNewsRecipe):
|
|||||||
self.log("Getting Top Stories")
|
self.log("Getting Top Stories")
|
||||||
articles = []
|
articles = []
|
||||||
topStoriesDiv = soup.find("div", {"class": "stories-top"})
|
topStoriesDiv = soup.find("div", {"class": "stories-top"})
|
||||||
stories = topStoriesDiv.findAll("div", {"class": "story"})
|
stories = topStoriesDiv.findAll("div", {"class": lambda x: x and 'story' in x.split()})
|
||||||
for story in stories:
|
for story in stories:
|
||||||
h2 = story.find("h2", {"class": 'story-title'})
|
h2 = story.find("h2", {"class": 'story-title'})
|
||||||
link = story.find("a")
|
link = story.find("a", {'class': 'story-perm'})
|
||||||
|
for img in h2.findAll('img'):
|
||||||
|
img.extract()
|
||||||
if h2 is not None and link is not None:
|
if h2 is not None and link is not None:
|
||||||
title = self.tag_to_string(h2)
|
title = self.tag_to_string(h2)
|
||||||
url = self.absolutize_url(link["href"])
|
url = self.absolutize_url(link["href"])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user