This commit is contained in:
Kovid Goyal 2017-08-15 09:42:23 +05:30
parent ff53f9e3ad
commit eaed84a0a6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 4 additions and 6 deletions

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
class Adventure_zone(BasicNewsRecipe):

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class FilmOrgPl(BasicNewsRecipe):
title = u'Film.org.pl'
__author__ = 'fenuks'
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce." # noqa
category = 'film'
language = 'pl'
cover_url = 'http://film.org.pl/wp-content/uploads/2015/02/film.org.pl_film.org_.pl_kmfviolet4.png'

View File

@ -21,7 +21,7 @@ class FilmWebPl(BasicNewsRecipe):
'ul.inline > li {display: inline;} '
'ul.sep-line > li + li::before {content: " | "} '
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
(re.compile(u'(?:<sup>)?\(kliknij\,\ aby powiększyć\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
(re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')
]
@ -45,8 +45,8 @@ class FilmWebPl(BasicNewsRecipe):
]
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
for a in soup('a', href=True):
if 'http://' not in a['href'] and 'https://' not in a['href']:
a['href'] = self.index + a['href']
return soup

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class ParisReview(BasicNewsRecipe):