Fix unicode string syntax errors in recipies

2025-11-03 19:17:02 -05:00 · 2018-09-04 18:14:34 -04:00 · 2018-09-04 18:14:34 -04:00 · c011243859
commit c011243859
parent e44a10560e
25 changed files with 39 additions and 39 deletions
--- a/recipes/android_com_pl.recipe
+++ b/recipes/android_com_pl.recipe
@ -15,5 +15,5 @@ class Android_com_pl(BasicNewsRecipe):
    remove_tags_after = [{'class': 'post-content'}]
    remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
    preprocess_regexps = [
-        (re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
+        (re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
    feeds = [(u'Android', u'http://android.com.pl/feed/')]
--- a/recipes/appledaily_tw.recipe
+++ b/recipes/appledaily_tw.recipe
@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe):
    ]

    def preprocess_raw_html(self, raw_html, url):
-        raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
+        raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
        raw_html = re.sub(
-            ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
+            unicode(r'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>'), raw_html)
        return raw_html
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -16,8 +16,8 @@ class BenchmarkPl(BasicNewsRecipe):
    extra_css = 'ul {list-style-type: none;}'
    no_stylesheets = True
    use_embedded_content = False
-    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>',  # noqa
-        re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
+    preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>',  # noqa
+        re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa

    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
        name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -14,8 +14,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
    max_articles_per_feed = 100
    extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
    oldest_article = 12
-    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
-                           lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
+                           lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
    no_stylesheets = True
    remove_empty_feeds = True
    keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
--- a/recipes/cnetjapan.recipe
+++ b/recipes/cnetjapan.recipe
@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe):
    remove_javascript = True

    preprocess_regexps = [
-        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
+        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
         lambda match: '</body>'),
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
            lambda match: '</body>'),
-        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
            lambda match: '<!-- removed -->'),
    ]

--- a/recipes/cnetjapan_digital.recipe
+++ b/recipes/cnetjapan_digital.recipe
@ -14,11 +14,11 @@ class CNetJapanDigital(BasicNewsRecipe):
    remove_javascript = True

    preprocess_regexps = [
-        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
+        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
         lambda match: '</body>'),
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
            lambda match: '</body>'),
-        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
            lambda match: '<!-- removed -->'),
    ]

--- a/recipes/cnetjapan_release.recipe
+++ b/recipes/cnetjapan_release.recipe
@ -14,11 +14,11 @@ class CNetJapanRelease(BasicNewsRecipe):
    remove_javascript = True

    preprocess_regexps = [
-        (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
+        (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
         lambda match: '</body>'),
        (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
            lambda match: '</body>'),
-        (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
+        (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
            lambda match: '<!-- removed -->'),
    ]

--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content = False
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
-                          (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
+                          (re.compile(u'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
    keep_only_tags = [dict(name='article')]
    remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
         dict(name='ul',attrs={'class':'tags'}),
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_attrs = ['style', 'width', 'height']
    preprocess_regexps = [(re.compile(
-        ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '')]
+        unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
    keep_only_tags = [dict(name='h1'), dict(
        attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
    remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master', 'social nested-grid  grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')]  # noqa
--- a/recipes/dziennik_wschodni.recipe
+++ b/recipes/dziennik_wschodni.recipe
@ -19,8 +19,8 @@ class DziennikWschodni(BasicNewsRecipe):
    no_stylesheets = True
    ignore_duplicate_articles = {'title', 'url'}

-    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
-                          (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
+    preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
+                          (re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa

    keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
    remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@ -20,8 +20,8 @@ class EchoDnia(BasicNewsRecipe):
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}

-    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
-                          (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa
+    preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
+                          (re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')]  # noqa

    keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
    remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
--- a/recipes/esenja.recipe
+++ b/recipes/esenja.recipe
@ -45,7 +45,7 @@ class Esensja(BasicNewsRecipe):

    preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
                          (re.compile(
-                              ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
+                              u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
                          ]

    def parse_index(self):
--- a/recipes/esensja_(rss).recipe
+++ b/recipes/esensja_(rss).recipe
@ -23,7 +23,7 @@ class EsensjaRSS(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
                          (re.compile(
-                              ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
+                              u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
                          ]
    remove_attributes = ['style', 'bgcolor', 'alt', 'color']
    keep_only_tags = [dict(attrs={'class': 'sekcja'}), ]
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe):
                      'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
    preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''),  # fix malformed HTML with 2 body tags...
    (re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
-    (re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')
+    (re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
    ]
    remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
                    'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
--- a/recipes/forbes_pl.recipe
+++ b/recipes/forbes_pl.recipe
@ -17,8 +17,8 @@ class forbes_pl(BasicNewsRecipe):
    cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
    max_articles_per_feed = 100
    extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
-    preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
-                           lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
+                           lambda match: ''), (re.compile(u'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
    remove_javascript = True
    no_stylesheets = True
    now = datetime.datetime.now()
--- a/recipes/forsal.recipe
+++ b/recipes/forsal.recipe
@ -43,7 +43,7 @@ class ForsalPL(BasicNewsRecipe):
    (u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]

    def print_version(self, url):
-        url_id = re.search(ur'/[0-9]+,', url)
+        url_id = re.search(u'/[0-9]+,', url)
        if url_id:
            return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
        else:
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -16,7 +16,7 @@ class Gildia(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
-    preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'</?sup>'), lambda match: '')]
    ignore_duplicate_articles = {'title', 'url'}
    remove_tags = [dict(name='div', attrs={'class': [
                        'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@ -15,7 +15,7 @@ class in4(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    preprocess_regexps = [
-        (re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
+        (re.compile(u'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
    keep_only_tags = [dict(name='div', attrs={'class': 'left_alone'})]
    remove_tags_after = dict(name='img', attrs={'title': 'komentarze'})
    remove_tags = [dict(name='img', attrs={'title': 'komentarze'})]
--- a/recipes/ksiazka_pl.recipe
+++ b/recipes/ksiazka_pl.recipe
@ -14,7 +14,7 @@ class Ksiazka_net_pl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    preprocess_regexps = [
-        (re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
+        (re.compile(u'Podoba mi się, kupuję:'), lambda match: '<br />')]
    remove_tags_before = dict(name='div', attrs={'class': 'm-body'})
    remove_tags_after = dict(name='div', attrs={'class': 'm-body-link'})
    remove_tags = [
--- a/recipes/natemat_pl.recipe
+++ b/recipes/natemat_pl.recipe
@ -10,8 +10,8 @@ class NaTemat(BasicNewsRecipe):
    description = u'informacje, komentarze, opinie'
    category = 'news'
    language = 'pl'
-    preprocess_regexps = [(re.compile(ur'Czytaj też\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Zobacz też\:.*?</a>', re.IGNORECASE), lambda m: ''),  # noqa
-                          (re.compile(ur'Czytaj więcej\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Czytaj również\:.*?</a>', re.IGNORECASE), lambda m: '')]  # noqa
+    preprocess_regexps = [(re.compile(u'Czytaj też\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Zobacz też\\:.*?</a>', re.IGNORECASE), lambda m: ''),  # noqa
+                          (re.compile(u'Czytaj więcej\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Czytaj również\\:.*?</a>', re.IGNORECASE), lambda m: '')]  # noqa
    cover_url = 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
    no_stylesheets = True
    keep_only_tags = [dict(id='main')]
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@ -14,8 +14,8 @@ class Tablety_pl(BasicNewsRecipe):
    no_stylesheets = True
    oldest_article = 8
    max_articles_per_feed = 100
-    preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
-                          (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
+                          (re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
    keep_only_tags = [dict(id='news_block')]
    remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
                                         'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -12,8 +12,8 @@ class tanuki(BasicNewsRecipe):
    max_articles_per_feed = 100
    encoding = 'utf-8'
    extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
-    preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
-        ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
+        unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
    remove_empty_feeds = True
    no_stylesheets = True
    keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})]  # noqa
--- a/recipes/tawernarpg_pl.recipe
+++ b/recipes/tawernarpg_pl.recipe
@ -11,7 +11,7 @@ class TawernaRPG(BasicNewsRecipe):
    language = 'pl'
    extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}'  # noqa
    cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
-    preprocess_regexps = [(re.compile(ur'<h2>Dodaj komentarz</h2>.*</body>',
+    preprocess_regexps = [(re.compile(u'<h2>Dodaj komentarz</h2>.*</body>',
                                      re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
    use_embedded_content = False
    oldest_article = 7
--- a/recipes/trojmiasto_pl.recipe
+++ b/recipes/trojmiasto_pl.recipe
@ -20,8 +20,8 @@ class Trojmiasto(BasicNewsRecipe):
    remove_attributes = ['style', 'font']
    ignore_duplicate_articles = {'title', 'url'}

-    preprocess_regexps = [(re.compile(ur'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
-                          (re.compile(ur'<b>[A-ZĄĆĘŁŃÓŚŹŻ \-,.:]*?</b>', re.DOTALL), lambda match: ''), ]
+    preprocess_regexps = [(re.compile(u'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''),  # noqa
+                          (re.compile(u'<b>[A-ZĄĆĘŁŃÓŚŹŻ ,.:-]*?</b>', re.DOTALL), lambda match: ''), ]

    remove_tags = [
        dict(id=['logo', 'font_small', 'font_big']),
--- a/recipes/wnp.recipe
+++ b/recipes/wnp.recipe
@ -9,8 +9,8 @@ class WNP(BasicNewsRecipe):
    description = u'Wirtualny Nowy Przemysł'
    category = 'economy'
    language = 'pl'
-    preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
-                          (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
+    preprocess_regexps = [(re.compile(u'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
+                          (re.compile(u'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets = True