Update various Polish news sources

2025-07-09 03:04:10 -04:00 · 2013-03-22 08:18:54 +05:30 · 2013-03-22 08:18:54 +05:30 · 6ce4e61d4f
commit 6ce4e61d4f
parent f819a51ad8 d1454c079c
13 changed files with 53 additions and 35 deletions
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -10,15 +10,15 @@ class Adventure_zone(BasicNewsRecipe):
    oldest_article = 20
    max_articles_per_feed = 100
    cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
-    index='http://www.adventure-zone.info/fusion/'
+    index = 'http://www.adventure-zone.info/fusion/'
    use_embedded_content = False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
    (re.compile(r'</?table.*?>'), lambda match: ''),
    (re.compile(r'</?tbody.*?>'), lambda match: '')]
-    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
+    remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
-    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
+    remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
-    remove_tags_after= dict(id='comments')
+    remove_tags_after = dict(id='comments')
-    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
+    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
    '''def get_cover_url(self):
@ -67,4 +67,3 @@ class Adventure_zone(BasicNewsRecipe):
                a['href']=self.index + a['href']
        return soup
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@ -18,3 +18,10 @@ class Astroflesz(BasicNewsRecipe):
    remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
    remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
    feeds          = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
    def postprocess_html(self, soup, first_fetch):
        t = soup.find(attrs={'class':'itemIntroText'})
        if t:
            for i in t.findAll('img'):
                i['style'] = 'float: left; margin-right: 5px;'
        return soup
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -11,7 +11,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
    masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
    cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
    max_articles_per_feed = 100
-    oldest_article = 140000
+    extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
    oldest_article = 12
    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
    no_stylesheets = True
    remove_empty_feeds = True
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -11,6 +11,7 @@ class CoNowegoPl(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    INDEX = 'http://www.conowego.pl/'
    extra_css = '.news-single-img {float:left; margin-right:5px;}'
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
--- a/recipes/czas_gentlemanow.recipe
+++ b/recipes/czas_gentlemanow.recipe
@ -12,11 +12,13 @@ class CzasGentlemanow(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    oldest_article = 7
    max_articles_per_feed = 100
    extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
    .wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
    no_stylesheets = True
    remove_empty_feeds = True
    preprocess_regexps     = [(re.compile(u'<h3>Może Cię też zainteresować:</h3>'), lambda m: '')]
    use_embedded_content = False
    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
-    remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])]
+    remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails', 'respond'])]
    remove_tags_after = dict(id='comments')
    feeds          = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -16,6 +16,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    extra_css      = '.title {font-size:22px;}'
    oldest_article = 8
    max_articles_per_feed = 100
    remove_attrs = ['style', 'width', 'height']
    preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
    keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
    remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
@ -28,4 +29,11 @@ class Dobreprogramy_pl(BasicNewsRecipe):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
        for r in soup.findAll('iframe'):
            r.parent.extract()
        return soup
    def postprocess_html(self, soup, first_fetch):
        for r in soup.findAll('span', text=''):
            if not r.string:
                r.extract()
        return soup
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@ -9,6 +9,7 @@ class Dzieje(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    ignore_duplicate_articles = {'title', 'url'}
    extra_css = '.imagecache-default {float:left; margin-right:20px;}'
    index = 'http://dzieje.pl'
    oldest_article = 8
    max_articles_per_feed = 100
--- a/recipes/ekologia_pl.recipe
+++ b/recipes/ekologia_pl.recipe
@ -9,7 +9,7 @@ class EkologiaPl(BasicNewsRecipe):
    language       = 'pl'
    cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
    ignore_duplicate_articles = {'title', 'url'}
-    extra_css = '.title {font-size: 200%;}'
+    extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
--- a/recipes/film_org_pl.recipe
+++ b/recipes/film_org_pl.recipe
@ -7,6 +7,7 @@ class FilmOrgPl(BasicNewsRecipe):
    description   = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
    category       = 'film'
    language       = 'pl'
    extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
    cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
    ignore_duplicate_articles = {'title', 'url'}
    oldest_article = 7
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -10,7 +10,6 @@ class FilmWebPl(BasicNewsRecipe):
    category       = 'movies'
    language       = 'pl'
    index = 'http://www.filmweb.pl'
    #extra_css = '.MarkupPhotoHTML-7 {float:left; margin-right: 10px;}'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets = True
@ -19,9 +18,9 @@ class FilmWebPl(BasicNewsRecipe):
    remove_javascript = True
    preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
    extra_css      = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
-    remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
+    #remove_tags = [dict()]
    remove_attributes = ['style',]
-    keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
+    keep_only_tags = [dict(attrs={'class':['hdr hdr-super', 'newsContent']})]
    feeds          = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
                         (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
                         (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
@ -59,11 +58,6 @@ class FilmWebPl(BasicNewsRecipe):
        for i in soup.findAll('sup'):
            if not i.string or i.string.startswith('(kliknij'):
                i.extract()
        tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
        if tag:
            tag.name = 'div'
            for t in tag.findAll('li'):
                t.name = 'div'
        for r in soup.findAll(id=re.compile('photo-\d+')):
            r.extract()
        for r in soup.findAll(style=re.compile('float: ?left')):
--- a/recipes/niebezpiecznik.recipe
+++ b/recipes/niebezpiecznik.recipe
@ -9,6 +9,7 @@ class Niebezpiecznik_pl(BasicNewsRecipe):
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
    cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
    remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
    keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
--- a/recipes/wirtualnemedia_pl.recipe
+++ b/recipes/wirtualnemedia_pl.recipe
@ -8,6 +8,7 @@ class WirtualneMedia(BasicNewsRecipe):
    use_embedded_content = False
    remove_empty_feeds = True
    __author__        = 'fenuks'
    extra_css = '.thumbnail {float:left; max-width:150px; margin-right:5px;}'
    description   = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
    category       = 'internet'
    language       = 'pl'
--- a/recipes/zaufana_trzecia_strona.recipe
+++ b/recipes/zaufana_trzecia_strona.recipe
@ -1,5 +1,6 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class ZTS(BasicNewsRecipe):
    title          = u'Zaufana Trzecia Strona'
    __author__        = 'fenuks'
@ -7,6 +8,7 @@ class ZTS(BasicNewsRecipe):
    category       = 'IT, security'
    language       = 'pl'
    cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png'
    extra_css = '.thumbnail {float: left; margin-right:5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True