diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index dd47af946a..00b4a8753e 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -10,15 +10,15 @@ class Adventure_zone(BasicNewsRecipe): oldest_article = 20 max_articles_per_feed = 100 cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' - index='http://www.adventure-zone.info/fusion/' + index = 'http://www.adventure-zone.info/fusion/' use_embedded_content = False preprocess_regexps = [(re.compile(r"Komentarze", re.IGNORECASE), lambda m: ''), (re.compile(r''), lambda match: ''), (re.compile(r''), lambda match: '')] - remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) - remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})] - remove_tags_after= dict(id='comments') - extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }' + remove_tags_before = dict(name='td', attrs={'class':'main-bg'}) + remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})] + remove_tags_after = dict(id='comments') + extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}' feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')] '''def get_cover_url(self): @@ -26,7 +26,7 @@ class Adventure_zone(BasicNewsRecipe): cover=soup.find(id='box_OstatninumerAZ') self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src'] return getattr(self, 'cover_url', self.cover_url)''' - + def populate_article_metadata(self, article, soup, first): result = re.search('(.+) - Adventure Zone', soup.title.string) if result: @@ -66,5 +66,4 @@ class Adventure_zone(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] return soup - - + diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe index 745ade420c..11a56ec6b5 100644 --- a/recipes/astroflesz.recipe +++ b/recipes/astroflesz.recipe @@ -18,3 +18,10 @@ class Astroflesz(BasicNewsRecipe): remove_tags_after = dict(name='div', attrs={'class':'itemLinks'}) remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})] feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')] + + def postprocess_html(self, soup, first_fetch): + t = soup.find(attrs={'class':'itemIntroText'}) + if t: + for i in t.findAll('img'): + i['style'] = 'float: left; margin-right: 5px;' + return soup diff --git a/recipes/ciekawostki_historyczne.recipe b/recipes/ciekawostki_historyczne.recipe index b45f28e4ba..42ea94fa1d 100644 --- a/recipes/ciekawostki_historyczne.recipe +++ b/recipes/ciekawostki_historyczne.recipe @@ -11,7 +11,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe): masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg' cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg' max_articles_per_feed = 100 - oldest_article = 140000 + extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}' + oldest_article = 12 preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

Zobacz też:

.*?', re.DOTALL), lambda match: '')] no_stylesheets = True remove_empty_feeds = True diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe index 9b2f6e8200..7e6549e713 100644 --- a/recipes/conowego_pl.recipe +++ b/recipes/conowego_pl.recipe @@ -11,6 +11,7 @@ class CoNowegoPl(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 INDEX = 'http://www.conowego.pl/' + extra_css = '.news-single-img {float:left; margin-right:5px;}' no_stylesheets = True remove_empty_feeds = True use_embedded_content = False @@ -35,7 +36,7 @@ class CoNowegoPl(BasicNewsRecipe): pagetext = soup2.find(attrs={'class':'ni_content'}) pos = len(appendtag.contents) appendtag.insert(pos, pagetext) - + comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) for comment in comments: comment.extract() diff --git a/recipes/czas_gentlemanow.recipe b/recipes/czas_gentlemanow.recipe index 009cc7e9dd..d9b6ab78c7 100644 --- a/recipes/czas_gentlemanow.recipe +++ b/recipes/czas_gentlemanow.recipe @@ -12,11 +12,13 @@ class CzasGentlemanow(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} oldest_article = 7 max_articles_per_feed = 100 + extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\ + .wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}' no_stylesheets = True remove_empty_feeds = True preprocess_regexps = [(re.compile(u'

Może Cię też zainteresować:

'), lambda m: '')] use_embedded_content = False keep_only_tags = [dict(name='div', attrs={'class':'content'})] - remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])] + remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails', 'respond'])] remove_tags_after = dict(id='comments') feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')] diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index 708bdbb017..f37059becf 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -16,6 +16,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): extra_css = '.title {font-size:22px;}' oldest_article = 8 max_articles_per_feed = 100 + remove_attrs = ['style', 'width', 'height'] preprocess_regexps = [(re.compile(ur'
Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
'), lambda match: '') ] keep_only_tags=[dict(attrs={'class':['news', 'entry single']})] remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')] @@ -28,4 +29,11 @@ class Dobreprogramy_pl(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] + for r in soup.findAll('iframe'): + r.parent.extract() return soup + def postprocess_html(self, soup, first_fetch): + for r in soup.findAll('span', text=''): + if not r.string: + r.extract() + return soup \ No newline at end of file diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index 50de40354c..cdd0630891 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -9,6 +9,7 @@ class Dzieje(BasicNewsRecipe): category = 'history' language = 'pl' ignore_duplicate_articles = {'title', 'url'} + extra_css = '.imagecache-default {float:left; margin-right:20px;}' index = 'http://dzieje.pl' oldest_article = 8 max_articles_per_feed = 100 diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe index 21d3b607d2..e925ebad6f 100644 --- a/recipes/ekologia_pl.recipe +++ b/recipes/ekologia_pl.recipe @@ -9,7 +9,7 @@ class EkologiaPl(BasicNewsRecipe): language = 'pl' cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png' ignore_duplicate_articles = {'title', 'url'} - extra_css = '.title {font-size: 200%;}' + extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe index 442e273b1b..fa0a69912b 100644 --- a/recipes/film_org_pl.recipe +++ b/recipes/film_org_pl.recipe @@ -7,6 +7,7 @@ class FilmOrgPl(BasicNewsRecipe): description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." category = 'film' language = 'pl' + extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}' cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png' ignore_duplicate_articles = {'title', 'url'} oldest_article = 7 diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index 3a86438d1c..780dcbe9b2 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -10,7 +10,6 @@ class FilmWebPl(BasicNewsRecipe): category = 'movies' language = 'pl' index = 'http://www.filmweb.pl' - #extra_css = '.MarkupPhotoHTML-7 {float:left; margin-right: 10px;}' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True @@ -19,9 +18,9 @@ class FilmWebPl(BasicNewsRecipe): remove_javascript = True preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), (re.compile(ur'(
\s*?
\s*?)+', re.IGNORECASE), lambda m: '
')]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')] extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}' - remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] + #remove_tags = [dict()] remove_attributes = ['style',] - keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] + keep_only_tags = [dict(attrs={'class':['hdr hdr-super', 'newsContent']})] feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'), (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'), @@ -44,12 +43,12 @@ class FilmWebPl(BasicNewsRecipe): skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'}) if skip_tag is not None: return self.index_to_soup(skip_tag['href'], raw=True) - + def postprocess_html(self, soup, first_fetch): for r in soup.findAll(attrs={'class':'singlephoto'}): r['style'] = 'float:left; margin-right: 10px;' return soup - + def preprocess_html(self, soup): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: @@ -59,11 +58,6 @@ class FilmWebPl(BasicNewsRecipe): for i in soup.findAll('sup'): if not i.string or i.string.startswith('(kliknij'): i.extract() - tag = soup.find(name='ul', attrs={'class':'inline sep-line'}) - if tag: - tag.name = 'div' - for t in tag.findAll('li'): - t.name = 'div' for r in soup.findAll(id=re.compile('photo-\d+')): r.extract() for r in soup.findAll(style=re.compile('float: ?left')): diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe index 2228ef5122..3b321772ec 100644 --- a/recipes/niebezpiecznik.recipe +++ b/recipes/niebezpiecznik.recipe @@ -9,8 +9,9 @@ class Niebezpiecznik_pl(BasicNewsRecipe): oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True + remove_empty_feeds = True cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png' remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})] keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})] feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'), - ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')] + ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')] diff --git a/recipes/wirtualnemedia_pl.recipe b/recipes/wirtualnemedia_pl.recipe index 018891c243..155cafbec2 100644 --- a/recipes/wirtualnemedia_pl.recipe +++ b/recipes/wirtualnemedia_pl.recipe @@ -8,23 +8,24 @@ class WirtualneMedia(BasicNewsRecipe): use_embedded_content = False remove_empty_feeds = True __author__ = 'fenuks' + extra_css = '.thumbnail {float:left; max-width:150px; margin-right:5px;}' description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.' category = 'internet' language = 'pl' masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg' cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif' remove_tags=[dict(id=['header', 'footer'])] - feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'), - (u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'), - (u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'), - (u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'), - (u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'), - (u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'), - (u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'), - (u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'), - (u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'), - (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml') - ] + feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'), + (u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'), + (u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'), + (u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'), + (u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'), + (u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'), + (u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'), + (u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'), + (u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'), + (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml') + ] def print_version(self, url): - return url.replace('artykul', 'print') \ No newline at end of file + return url.replace('artykul', 'print') diff --git a/recipes/zaufana_trzecia_strona.recipe b/recipes/zaufana_trzecia_strona.recipe index 13e7d98cce..14f2092d18 100644 --- a/recipes/zaufana_trzecia_strona.recipe +++ b/recipes/zaufana_trzecia_strona.recipe @@ -1,5 +1,6 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai from calibre.web.feeds.news import BasicNewsRecipe + class ZTS(BasicNewsRecipe): title = u'Zaufana Trzecia Strona' __author__ = 'fenuks' @@ -7,6 +8,7 @@ class ZTS(BasicNewsRecipe): category = 'IT, security' language = 'pl' cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png' + extra_css = '.thumbnail {float: left; margin-right:5px;}' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True