mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Baltimore Sun
This commit is contained in:
parent
b7e058518d
commit
1c153d4100
@ -11,10 +11,9 @@ class BaltimoreSun(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = 'The Baltimore Sun'
|
title = 'The Baltimore Sun'
|
||||||
__author__ = 'Josh Hall'
|
__author__ = 'Josh Hall'
|
||||||
|
|
||||||
description = 'Complete local news and blogs from Baltimore'
|
description = 'Complete local news and blogs from Baltimore'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
version = 2
|
version = 2.1
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -22,6 +21,7 @@ class BaltimoreSun(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
recursions = 1
|
recursions = 1
|
||||||
|
|
||||||
|
ignore_duplicate_articles = {'title'}
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
|
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
|
||||||
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
|
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
|
||||||
]
|
]
|
||||||
@ -201,3 +201,5 @@ class BaltimoreSun(BasicNewsRecipe):
|
|||||||
tag.extract()
|
tag.extract()
|
||||||
for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
|
for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
|
||||||
tag.extract()
|
tag.extract()
|
||||||
|
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user