mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
fix computerworld.pl recipe
This commit is contained in:
parent
8bfeac7440
commit
c6d33d0add
@ -14,19 +14,13 @@ class Computerworld_pl(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_attributes = ['style', ]
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
|
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
|
||||||
(re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
|
(re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
|
||||||
keep_only_tags = [dict(id=['article-default-body'])]
|
keep_only_tags = [dict(name='article')]
|
||||||
remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}), dict(
|
remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
|
||||||
id=['topComment', 'bottom_tools'])]
|
dict(name='ul',attrs={'class':'tags'}),
|
||||||
|
dict(name='ol'),
|
||||||
|
dict(id=['topComment', 'bottom_tools'])]
|
||||||
|
|
||||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
feeds = [(u'Wiadomo\u015bci', u'https://www.computerworld.pl/news?rss')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
|
||||||
if soup.title.string.lower() == 'advertisement':
|
|
||||||
tag = soup.find(name='a')
|
|
||||||
if tag:
|
|
||||||
new_soup = self.index_to_soup(tag['href'], raw=True)
|
|
||||||
return new_soup
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user