mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Fix unicode string syntax errors in recipies
This commit is contained in:
parent
e44a10560e
commit
c011243859
@ -15,5 +15,5 @@ class Android_com_pl(BasicNewsRecipe):
|
|||||||
remove_tags_after = [{'class': 'post-content'}]
|
remove_tags_after = [{'class': 'post-content'}]
|
||||||
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
|
remove_tags = [dict(name='ul', attrs={'class': 'tags small-tags'}), dict(name='a', attrs={'onclick': 'return ss_plugin_loadpopup_js(this);'})]
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
(re.compile(u'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||||
|
@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
|
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
|
||||||
raw_html = re.sub(
|
raw_html = re.sub(
|
||||||
ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
|
unicode(r'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>'), raw_html)
|
||||||
return raw_html
|
return raw_html
|
||||||
|
@ -16,8 +16,8 @@ class BenchmarkPl(BasicNewsRecipe):
|
|||||||
extra_css = 'ul {list-style-type: none;}'
|
extra_css = 'ul {list-style-type: none;}'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa
|
preprocess_regexps = [(re.compile(u'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', # noqa
|
||||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>'), (re.compile(u'Więcej o .*?</ul>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
|
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(
|
||||||
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
|
name='div', attrs={'class': ['m_zwykly', 'gallery']}), dict(id='article')]
|
||||||
|
@ -14,8 +14,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
||||||
oldest_article = 12
|
oldest_article = 12
|
||||||
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
|
preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL),
|
||||||
lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
lambda match: ''), (re.compile(u'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||||
|
@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
|
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
|
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
|
||||||
lambda match: '<!-- removed -->'),
|
lambda match: '<!-- removed -->'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -14,11 +14,11 @@ class CNetJapanDigital(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
|
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
|
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
|
||||||
lambda match: '<!-- removed -->'),
|
lambda match: '<!-- removed -->'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -14,11 +14,11 @@ class CNetJapanRelease(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL | re.IGNORECASE | re.UNICODE),
|
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
|
||||||
lambda match: '</body>'),
|
lambda match: '</body>'),
|
||||||
(re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
|
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
|
||||||
lambda match: '<!-- removed -->'),
|
lambda match: '<!-- removed -->'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
|
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''),
|
||||||
(re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
|
(re.compile(u'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ]
|
||||||
keep_only_tags = [dict(name='article')]
|
keep_only_tags = [dict(name='article')]
|
||||||
remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
|
remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}),
|
||||||
dict(name='ul',attrs={'class':'tags'}),
|
dict(name='ul',attrs={'class':'tags'}),
|
||||||
|
@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_attrs = ['style', 'width', 'height']
|
remove_attrs = ['style', 'width', 'height']
|
||||||
preprocess_regexps = [(re.compile(
|
preprocess_regexps = [(re.compile(
|
||||||
ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '')]
|
unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
|
||||||
keep_only_tags = [dict(name='h1'), dict(
|
keep_only_tags = [dict(name='h1'), dict(
|
||||||
attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
|
attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
|
||||||
remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa
|
remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa
|
||||||
|
@ -19,8 +19,8 @@ class DziennikWschodni(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
||||||
(re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
(re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
|
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
|
||||||
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
|
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
|
||||||
|
@ -20,8 +20,8 @@ class EchoDnia(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
preprocess_regexps = [(re.compile(u'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(u'Przeczytaj także:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
||||||
(re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
(re.compile(u'Przeczytaj również:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też:.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: '')] # noqa
|
||||||
|
|
||||||
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
|
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
|
||||||
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
|
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
|
||||||
|
@ -45,7 +45,7 @@ class Esensja(BasicNewsRecipe):
|
|||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
|
preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
|
||||||
(re.compile(
|
(re.compile(
|
||||||
ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
|
u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
@ -23,7 +23,7 @@ class EsensjaRSS(BasicNewsRecipe):
|
|||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
|
preprocess_regexps = [(re.compile(r'alt="[^"]*"'), lambda match: ''),
|
||||||
(re.compile(
|
(re.compile(
|
||||||
ur'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
|
u'(title|alt)="[^"]*?"', re.DOTALL), lambda match: ''),
|
||||||
]
|
]
|
||||||
remove_attributes = ['style', 'bgcolor', 'alt', 'color']
|
remove_attributes = ['style', 'bgcolor', 'alt', 'color']
|
||||||
keep_only_tags = [dict(attrs={'class': 'sekcja'}), ]
|
keep_only_tags = [dict(attrs={'class': 'sekcja'}), ]
|
||||||
|
@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe):
|
|||||||
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
|
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
|
||||||
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
|
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
|
||||||
(re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
(re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
|
||||||
(re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')
|
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
|
||||||
]
|
]
|
||||||
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
|
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
|
||||||
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
|
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]
|
||||||
|
@ -17,8 +17,8 @@ class forbes_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
|
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
|
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
|
||||||
preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
|
preprocess_regexps = [(re.compile(u'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL),
|
||||||
lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
|
lambda match: ''), (re.compile(u'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
@ -43,7 +43,7 @@ class ForsalPL(BasicNewsRecipe):
|
|||||||
(u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
|
(u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
url_id = re.search(ur'/[0-9]+,', url)
|
url_id = re.search(u'/[0-9]+,', url)
|
||||||
if url_id:
|
if url_id:
|
||||||
return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
|
return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
|
||||||
else:
|
else:
|
||||||
|
@ -16,7 +16,7 @@ class Gildia(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '')]
|
preprocess_regexps = [(re.compile(u'</?sup>'), lambda match: '')]
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
remove_tags = [dict(name='div', attrs={'class': [
|
remove_tags = [dict(name='div', attrs={'class': [
|
||||||
'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
|
'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})]
|
||||||
|
@ -15,7 +15,7 @@ class in4(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
|
(re.compile(u'<a title="translate into.*?</a>', re.DOTALL), lambda match: '')]
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'left_alone'})]
|
keep_only_tags = [dict(name='div', attrs={'class': 'left_alone'})]
|
||||||
remove_tags_after = dict(name='img', attrs={'title': 'komentarze'})
|
remove_tags_after = dict(name='img', attrs={'title': 'komentarze'})
|
||||||
remove_tags = [dict(name='img', attrs={'title': 'komentarze'})]
|
remove_tags = [dict(name='img', attrs={'title': 'komentarze'})]
|
||||||
|
@ -14,7 +14,7 @@ class Ksiazka_net_pl(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
|
(re.compile(u'Podoba mi się, kupuję:'), lambda match: '<br />')]
|
||||||
remove_tags_before = dict(name='div', attrs={'class': 'm-body'})
|
remove_tags_before = dict(name='div', attrs={'class': 'm-body'})
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'm-body-link'})
|
remove_tags_after = dict(name='div', attrs={'class': 'm-body-link'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
|
@ -10,8 +10,8 @@ class NaTemat(BasicNewsRecipe):
|
|||||||
description = u'informacje, komentarze, opinie'
|
description = u'informacje, komentarze, opinie'
|
||||||
category = 'news'
|
category = 'news'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
preprocess_regexps = [(re.compile(ur'Czytaj też\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Zobacz też\:.*?</a>', re.IGNORECASE), lambda m: ''), # noqa
|
preprocess_regexps = [(re.compile(u'Czytaj też\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Zobacz też\\:.*?</a>', re.IGNORECASE), lambda m: ''), # noqa
|
||||||
(re.compile(ur'Czytaj więcej\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Czytaj również\:.*?</a>', re.IGNORECASE), lambda m: '')] # noqa
|
(re.compile(u'Czytaj więcej\\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(u'Czytaj również\\:.*?</a>', re.IGNORECASE), lambda m: '')] # noqa
|
||||||
cover_url = 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
|
cover_url = 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [dict(id='main')]
|
keep_only_tags = [dict(id='main')]
|
||||||
|
@ -14,8 +14,8 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
|
preprocess_regexps = [(re.compile(u'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''),
|
||||||
(re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
(re.compile(u'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
keep_only_tags = [dict(id='news_block')]
|
keep_only_tags = [dict(id='news_block')]
|
||||||
remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
|
remove_tags = [dict(attrs={'class': ['comments_icon', 'wp-polls', 'entry-comments',
|
||||||
'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
|
'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
|
||||||
|
@ -12,8 +12,8 @@ class tanuki(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
|
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
|
||||||
preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
|
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
|
||||||
ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
|
unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa
|
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa
|
||||||
|
@ -11,7 +11,7 @@ class TawernaRPG(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}' # noqa
|
extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}' # noqa
|
||||||
cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
|
cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
|
||||||
preprocess_regexps = [(re.compile(ur'<h2>Dodaj komentarz</h2>.*</body>',
|
preprocess_regexps = [(re.compile(u'<h2>Dodaj komentarz</h2>.*</body>',
|
||||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
@ -20,8 +20,8 @@ class Trojmiasto(BasicNewsRecipe):
|
|||||||
remove_attributes = ['style', 'font']
|
remove_attributes = ['style', 'font']
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(ur'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(ur'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
preprocess_regexps = [(re.compile(u'<strong>Czytaj więcej.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'<strong>Zobacz też.*?</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa
|
||||||
(re.compile(ur'<b>[A-ZĄĆĘŁŃÓŚŹŻ \-,.:]*?</b>', re.DOTALL), lambda match: ''), ]
|
(re.compile(u'<b>[A-ZĄĆĘŁŃÓŚŹŻ ,.:-]*?</b>', re.DOTALL), lambda match: ''), ]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(id=['logo', 'font_small', 'font_big']),
|
dict(id=['logo', 'font_small', 'font_big']),
|
||||||
|
@ -9,8 +9,8 @@ class WNP(BasicNewsRecipe):
|
|||||||
description = u'Wirtualny Nowy Przemysł'
|
description = u'Wirtualny Nowy Przemysł'
|
||||||
category = 'economy'
|
category = 'economy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
|
preprocess_regexps = [(re.compile(u'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''),
|
||||||
(re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
|
(re.compile(u'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
Loading…
x
Reference in New Issue
Block a user