mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
c987d56c8e
commit
66ea17fe86
@ -9,9 +9,10 @@ class CGM(BasicNewsRecipe):
|
|||||||
category = 'music'
|
category = 'music'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds= True
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheers=True
|
no_stylesheers=True
|
||||||
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}'
|
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
|
||||||
remove_tags_before=dict(id='mainContent')
|
remove_tags_before=dict(id='mainContent')
|
||||||
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||||
@ -22,10 +23,12 @@ class CGM(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
ad=soup.findAll('img')
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
ad=soup.findAll('a')
|
||||||
for r in ad:
|
for r in ad:
|
||||||
if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']:
|
if 'http://www.hustla.pl' in r['href']:
|
||||||
ad[ad.index(r)].extract()
|
r.extract()
|
||||||
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||||
if gallery:
|
if gallery:
|
||||||
img=gallery.find('embed')
|
img=gallery.find('embed')
|
||||||
|
@ -9,8 +9,17 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
|
extra_css = 'h2 {font-style: italic; font-size:20px;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||||
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
||||||
|
|
||||||
|
def parse_feeds (self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
return feeds
|
||||||
|
@ -9,4 +9,6 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
keep_only_tags=[dict(name='header', attrs={'class':'entry-header'}), dict(name='div', attrs={'class':'entry-content clearfix'})]
|
||||||
|
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'}), dict(name='span', attrs={'class':'dsq-postid'})]
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user