mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Fix unicode string syntax errors in recipies
This commit is contained in:
		
							parent
							
								
									e44a10560e
								
							
						
					
					
						commit
						c011243859
					
				@ -15,5 +15,5 @@ class Android_com_pl(BasicNewsRecipe):
 | 
			
		||||
    remove_tags_after = [{'class': 'post-content'}]
 | 
			
		||||
    remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
 | 
			
		||||
        (re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    feeds = [(u'Android', u'http://android.com.pl/feed/')]
 | 
			
		||||
 | 
			
		||||
@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe):
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def preprocess_raw_html(self, raw_html, url):
 | 
			
		||||
        raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
 | 
			
		||||
        raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
 | 
			
		||||
        raw_html = re.sub(
 | 
			
		||||
            ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
 | 
			
		||||
            unicode(r'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>'), raw_html)
 | 
			
		||||
        return raw_html
 | 
			
		||||
 | 
			
		||||
@ -16,8 +16,8 @@ class BenchmarkPl(BasicNewsRecipe):
 | 
			
		||||
    extra_css = 'ul {list-style-type: none;}'
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>',  # noqa
 | 
			
		||||
        re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>',  # noqa
 | 
			
		||||
        re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
 | 
			
		||||
    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
 | 
			
		||||
        name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
 | 
			
		||||
 | 
			
		||||
@ -14,8 +14,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
 | 
			
		||||
    oldest_article = 12
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
 | 
			
		||||
                           lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
 | 
			
		||||
                           lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    remove_empty_feeds = True
 | 
			
		||||
    keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
 | 
			
		||||
 | 
			
		||||
@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe):
 | 
			
		||||
    remove_javascript = True
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
         lambda match: '</body>'),
 | 
			
		||||
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
 | 
			
		||||
            lambda match: '</body>'),
 | 
			
		||||
        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
 | 
			
		||||
            lambda match: '<!-- removed -->'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,11 +14,11 @@ class CNetJapanDigital(BasicNewsRecipe):
 | 
			
		||||
    remove_javascript = True
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
         lambda match: '</body>'),
 | 
			
		||||
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
 | 
			
		||||
            lambda match: '</body>'),
 | 
			
		||||
        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
 | 
			
		||||
            lambda match: '<!-- removed -->'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,11 +14,11 @@ class CNetJapanRelease(BasicNewsRecipe):
 | 
			
		||||
    remove_javascript = True
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
 | 
			
		||||
         lambda match: '</body>'),
 | 
			
		||||
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
 | 
			
		||||
            lambda match: '</body>'),
 | 
			
		||||
        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
 | 
			
		||||
        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
 | 
			
		||||
            lambda match: '<!-- removed -->'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
 | 
			
		||||
                          (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
 | 
			
		||||
                          (re.compile(u'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
 | 
			
		||||
    keep_only_tags = [dict(name='article')]
 | 
			
		||||
    remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
 | 
			
		||||
         dict(name='ul',attrs={'class':'tags'}),
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    remove_attrs = ['style', 'width', 'height']
 | 
			
		||||
    preprocess_regexps = [(re.compile(
 | 
			
		||||
        ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '')]
 | 
			
		||||
        unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
 | 
			
		||||
    keep_only_tags = [dict(name='h1'), dict(
 | 
			
		||||
        attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
 | 
			
		||||
    remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master', 'social nested-grid  grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')]  # noqa
 | 
			
		||||
 | 
			
		||||
@ -19,8 +19,8 @@ class DziennikWschodni(BasicNewsRecipe):
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
 | 
			
		||||
    keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
 | 
			
		||||
    remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
 | 
			
		||||
 | 
			
		||||
@ -20,8 +20,8 @@ class EchoDnia(BasicNewsRecipe):
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
 | 
			
		||||
 | 
			
		||||
    keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
 | 
			
		||||
    remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
 | 
			
		||||
 | 
			
		||||
@ -45,7 +45,7 @@ class Esensja(BasicNewsRecipe):
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
 | 
			
		||||
                          (re.compile(
 | 
			
		||||
                              ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
 | 
			
		||||
                              u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          ]
 | 
			
		||||
 | 
			
		||||
    def parse_index(self):
 | 
			
		||||
 | 
			
		||||
@ -23,7 +23,7 @@ class EsensjaRSS(BasicNewsRecipe):
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
    preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
 | 
			
		||||
                          (re.compile(
 | 
			
		||||
                              ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
 | 
			
		||||
                              u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          ]
 | 
			
		||||
    remove_attributes = ['style', 'bgcolor', 'alt', 'color']
 | 
			
		||||
    keep_only_tags = [dict(attrs={'class': 'sekcja'}), ]
 | 
			
		||||
 | 
			
		||||
@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe):
 | 
			
		||||
                      'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
 | 
			
		||||
    preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''),  # fix malformed HTML with 2 body tags...
 | 
			
		||||
    (re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
 | 
			
		||||
    (re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')
 | 
			
		||||
    (re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
 | 
			
		||||
    ]
 | 
			
		||||
    remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
 | 
			
		||||
                    'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
 | 
			
		||||
 | 
			
		||||
@ -17,8 +17,8 @@ class forbes_pl(BasicNewsRecipe):
 | 
			
		||||
    cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
 | 
			
		||||
                           lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
 | 
			
		||||
                           lambda match: ''), (re.compile(u'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    remove_javascript = True
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    now = datetime.datetime.now()
 | 
			
		||||
 | 
			
		||||
@ -43,7 +43,7 @@ class ForsalPL(BasicNewsRecipe):
 | 
			
		||||
    (u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
 | 
			
		||||
 | 
			
		||||
    def print_version(self, url):
 | 
			
		||||
        url_id = re.search(ur'/[0-9]+,', url)
 | 
			
		||||
        url_id = re.search(u'/[0-9]+,', url)
 | 
			
		||||
        if url_id:
 | 
			
		||||
            return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
 | 
			
		||||
        else:
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,7 @@ class Gildia(BasicNewsRecipe):
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'</?sup>'), lambda match: '')]
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
    remove_tags = [dict(name='div', attrs={'class': [
 | 
			
		||||
                        'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
 | 
			
		||||
 | 
			
		||||
@ -15,7 +15,7 @@ class in4(BasicNewsRecipe):
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    remove_empty_feeds = True
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
 | 
			
		||||
        (re.compile(u'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    keep_only_tags = [dict(name='div', attrs={'class': 'left_alone'})]
 | 
			
		||||
    remove_tags_after = dict(name='img', attrs={'title': 'komentarze'})
 | 
			
		||||
    remove_tags = [dict(name='img', attrs={'title': 'komentarze'})]
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@ class Ksiazka_net_pl(BasicNewsRecipe):
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    remove_empty_feeds = True
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
        (re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
 | 
			
		||||
        (re.compile(u'Podoba mi się, kupuję:'), lambda match: '<br />')]
 | 
			
		||||
    remove_tags_before = dict(name='div', attrs={'class': 'm-body'})
 | 
			
		||||
    remove_tags_after = dict(name='div', attrs={'class': 'm-body-link'})
 | 
			
		||||
    remove_tags = [
 | 
			
		||||
 | 
			
		||||
@ -10,8 +10,8 @@ class NaTemat(BasicNewsRecipe):
 | 
			
		||||
    description = u'informacje, komentarze, opinie'
 | 
			
		||||
    category = 'news'
 | 
			
		||||
    language = 'pl'
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'Czytaj też\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Zobacz też\:.*?</a>', re.IGNORECASE), lambda m: ''),  # noqa
 | 
			
		||||
                          (re.compile(ur'Czytaj więcej\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Czytaj również\:.*?</a>', re.IGNORECASE), lambda m: '')]  # noqa
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Czytaj też\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Zobacz też\\:.*?</a>', re.IGNORECASE), lambda m: ''),  # noqa
 | 
			
		||||
                          (re.compile(u'Czytaj więcej\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Czytaj również\\:.*?</a>', re.IGNORECASE), lambda m: '')]  # noqa
 | 
			
		||||
    cover_url = 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    keep_only_tags = [dict(id='main')]
 | 
			
		||||
 | 
			
		||||
@ -14,8 +14,8 @@ class Tablety_pl(BasicNewsRecipe):
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    oldest_article = 8
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          (re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    keep_only_tags = [dict(id='news_block')]
 | 
			
		||||
    remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
 | 
			
		||||
                                         'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
 | 
			
		||||
 | 
			
		||||
@ -12,8 +12,8 @@ class tanuki(BasicNewsRecipe):
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    encoding = 'utf-8'
 | 
			
		||||
    extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
 | 
			
		||||
        ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
 | 
			
		||||
        unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
 | 
			
		||||
    remove_empty_feeds = True
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})]  # noqa
 | 
			
		||||
 | 
			
		||||
@ -11,7 +11,7 @@ class TawernaRPG(BasicNewsRecipe):
 | 
			
		||||
    language = 'pl'
 | 
			
		||||
    extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}'  # noqa
 | 
			
		||||
    cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<h2>Dodaj komentarz</h2>.*</body>',
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<h2>Dodaj komentarz</h2>.*</body>',
 | 
			
		||||
                                      re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
    oldest_article = 7
 | 
			
		||||
 | 
			
		||||
@ -20,8 +20,8 @@ class Trojmiasto(BasicNewsRecipe):
 | 
			
		||||
    remove_attributes = ['style', 'font']
 | 
			
		||||
    ignore_duplicate_articles = {'title', 'url'}
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(ur'<b>[A-ZĄĆĘŁŃÓŚŹŻ \-,.:]*?</b>', re.DOTALL), lambda match: ''), ]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
 | 
			
		||||
                          (re.compile(u'<b>[A-ZĄĆĘŁŃÓŚŹŻ ,.:-]*?</b>', re.DOTALL), lambda match: ''), ]
 | 
			
		||||
 | 
			
		||||
    remove_tags = [
 | 
			
		||||
        dict(id=['logo', 'font_small', 'font_big']),
 | 
			
		||||
 | 
			
		||||
@ -9,8 +9,8 @@ class WNP(BasicNewsRecipe):
 | 
			
		||||
    description = u'Wirtualny Nowy Przemysł'
 | 
			
		||||
    category = 'economy'
 | 
			
		||||
    language = 'pl'
 | 
			
		||||
    preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    preprocess_regexps = [(re.compile(u'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
 | 
			
		||||
                          (re.compile(u'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
 | 
			
		||||
    oldest_article = 8
 | 
			
		||||
    max_articles_per_feed = 100
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user