mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
quick fix for fronda - just populate article list and get the first page of article content
This commit is contained in:
parent
3ab8d26ac2
commit
73ed9f1268
@ -62,34 +62,25 @@ class Fronda(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
articles[genName] = []
|
articles[genName] = []
|
||||||
for item in soup.findAll('li'):
|
for item in soup.findAll('article',attrs={'class':'article article-wide'}):
|
||||||
article_h = item.find('h2')
|
article_a = item.find('a')
|
||||||
if not article_h:
|
|
||||||
continue
|
|
||||||
article_date = self.tag_to_string(item.find('b'))
|
|
||||||
if self.date_cut(article_date):
|
|
||||||
continue
|
|
||||||
article_a = article_h.find('a')
|
|
||||||
article_url = 'http://www.fronda.pl' + article_a['href']
|
article_url = 'http://www.fronda.pl' + article_a['href']
|
||||||
article_title = self.tag_to_string(article_a)
|
article_title = self.tag_to_string(article_a)
|
||||||
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
|
articles[genName].append( { 'title' : article_title, 'url' : article_url })
|
||||||
if articles[genName]:
|
if articles[genName]:
|
||||||
feeds.append((genName, articles[genName]))
|
feeds.append((genName, articles[genName]))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'yui-g'})
|
dict(name='div', attrs={'class':'content content-70 phone-100'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}),
|
dict(name='div', attrs={'class':['clearfix','last-articles clearfix','comments clearfix','related-articles','social-buttons clearfix']}),
|
||||||
dict(name='h3', attrs={'class':'block-header article comments'}),
|
dict(name='span', attrs={'class':'small-info'}),
|
||||||
dict(name='ul', attrs={'class':['comment-list','category','tag-list']}),
|
dict(name='ul', attrs={'class':'nav nav-tags clearfix'}),
|
||||||
dict(name='p', attrs={'id':'comments-disclaimer'}),
|
dict(name='h3', attrs={'class':'section-header'}),
|
||||||
dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
|
dict(name='article', attrs={'class':['slided-article hidden-phone', 'article article-wide hidden-phone']})
|
||||||
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
|
|
||||||
dict(name='div', attrs={'id':'comment-form'}),
|
|
||||||
dict(name='span', attrs={'class':'separator'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user