Merge from trunk
@ -7,6 +7,7 @@ class Archeowiesci(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||
oldest_article = 7
|
||||
needs_subscription='optional'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
|
||||
@ -16,6 +17,16 @@ class Archeowiesci(BasicNewsRecipe):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
if 'subskrypcja' in article.title:
|
||||
if self.username is None and 'subskrypcja' in article.title:
|
||||
feed.articles.remove(article)
|
||||
return feeds
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://archeowiesci.pl/wp-login.php')
|
||||
br.select_form(name='loginform')
|
||||
br['log'] = self.username
|
||||
br['pwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
@ -1,15 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class Astronomia_pl(BasicNewsRecipe):
|
||||
title = u'Astronomia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Astronomia - polish astronomy site'
|
||||
masthead_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
#no_stylesheets=True
|
||||
extra_css='#h2 {font-size: 18px;}'
|
||||
no_stylesheets=True
|
||||
preprocess_regexps = [(re.compile(ur'<b>Przeczytaj także:.*?</BODY>', re.DOTALL), lambda match: '</BODY>') ]
|
||||
remove_tags_before=dict(name='div', attrs={'id':'a1'})
|
||||
keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
|
||||
feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
|
||||
|
@ -4,16 +4,17 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
title = u'Benchmark.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'benchmark.pl -IT site'
|
||||
masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
|
||||
cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets=True
|
||||
preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'body'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
INDEX= 'http://www.benchmark.pl'
|
||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
|
||||
|
@ -10,10 +10,11 @@ class Biolog_pl(BasicNewsRecipe):
|
||||
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
|
||||
category = 'biology'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
no_stylesheets = True
|
||||
#keeps_only_tags=[dict(id='main')]
|
||||
remove_tags_before=dict(id='main')
|
||||
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
|
||||
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
|
||||
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'}), dict(name='span', attrs={'class':'menu_odsylacze'})]
|
||||
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
|
||||
|
@ -1,16 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CD_Action(BasicNewsRecipe):
|
||||
title = u'CD-Action'
|
||||
__author__ = 'fenuks'
|
||||
description = 'cdaction.pl - polish magazine about games site'
|
||||
description = 'cdaction.pl - polish games magazine site'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
|
||||
keep_only_tags= dict(id='news_content')
|
||||
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
|
||||
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
||||
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -5,6 +5,7 @@ class CGM(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'Codzienna Gazeta Muzyczna'
|
||||
masthead_url='http://www.cgm.pl/img/header/logo.gif'
|
||||
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
@ -23,21 +24,19 @@ class CGM(BasicNewsRecipe):
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||
if gallery:
|
||||
img=gallery.div
|
||||
gallery.img.extract()
|
||||
if img:
|
||||
img=img['style']
|
||||
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
||||
gallery.contents[1].name='img'
|
||||
gallery.contents[1]['src']=img
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ad=soup.findAll('a')
|
||||
for r in ad:
|
||||
if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:
|
||||
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
|
||||
r.extract()
|
||||
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||
if gallery:
|
||||
img=gallery.find('embed')
|
||||
if img:
|
||||
img=img['src'][35:]
|
||||
img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
|
||||
param=gallery.findAll(name='param')
|
||||
for i in param:
|
||||
i.extract()
|
||||
gallery.contents[1].name='img'
|
||||
gallery.contents[1]['src']=img
|
||||
return soup
|
@ -33,6 +33,32 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
requires_version = (0, 8, 39)
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
try:
|
||||
from html5lib import parse
|
||||
root = parse(raw, namespaceHTMLElements=False,
|
||||
treebuilder='lxml').getroot()
|
||||
from lxml import etree
|
||||
for tag in root.xpath(
|
||||
'//script|//style|//noscript|//meta|//link|//object'):
|
||||
tag.getparent().remove(tag)
|
||||
for elem in list(root.iterdescendants(tag=etree.Comment)):
|
||||
elem.getparent().remove(elem)
|
||||
ans = etree.tostring(root, encoding=unicode)
|
||||
ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
|
||||
return ans
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
def index_to_soup(self, url):
|
||||
raw = BasicNewsRecipe.index_to_soup(self, url,
|
||||
raw=True).decode('utf-8')
|
||||
raw = self.preprocess_raw_html(raw, url)
|
||||
return BasicNewsRecipe.index_to_soup(self, raw)
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
nav = soup.find('div',attrs={'class':'navigation'})
|
||||
@ -78,14 +104,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
print_soup = soup
|
||||
return print_soup
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<!--.*?-->', lambda match : ''),
|
||||
(r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
|
||||
(r'<div class="pubdate">.*?</div>', lambda m: ''),
|
||||
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
|
||||
lambda match : '</body>'),
|
||||
]]
|
||||
extra_css = '''
|
||||
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
|
||||
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
|
||||
|
48
recipes/ciekawostki_historyczne.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Ciekawostki_Historyczne(BasicNewsRecipe):
|
||||
title = u'Ciekawostki Historyczne'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
cover_url='http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
||||
no_stylesheets=True
|
||||
remove_empty_feeds=True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags=[dict(id='singlepostinfo')]
|
||||
feeds = [(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(name='h7')
|
||||
if tag:
|
||||
if tag.br:
|
||||
pass
|
||||
elif tag.nextSibling.name=='p':
|
||||
tag=tag.nextSibling
|
||||
nexturl = tag.findAll('a')
|
||||
for nextpage in nexturl:
|
||||
tag.extract()
|
||||
nextpage= nextpage['href']
|
||||
soup2 = self.index_to_soup(nextpage)
|
||||
pagetext = soup2.find(name='div', attrs={'class':'post'})
|
||||
for r in pagetext.findAll('div', attrs={'id':'singlepostinfo'}):
|
||||
r.extract()
|
||||
for r in pagetext.findAll('div', attrs={'class':'wp-caption alignright'}):
|
||||
r.extract()
|
||||
for r in pagetext.findAll('h1'):
|
||||
r.extract()
|
||||
pagetext.find('h6').nextSibling.extract()
|
||||
pagetext.find('h7').nextSibling.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
@ -7,10 +7,11 @@ class Computerworld_pl(BasicNewsRecipe):
|
||||
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'s'})]
|
||||
keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
||||
|
@ -7,6 +7,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
__licence__ ='GPL v3'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
encoding = 'utf-8'
|
||||
@ -16,7 +17,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
||||
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
|
||||
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']})]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
@ -8,15 +8,17 @@ class Dziennik_pl(BasicNewsRecipe):
|
||||
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
|
||||
masthead_url= 'http://5.s.dziennik.pl/images/logos.png'
|
||||
cover_url= 'http://5.s.dziennik.pl/images/logos.png'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
remove_empty_feeds=True
|
||||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')]
|
||||
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
|
||||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">>>> CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
|
||||
keep_only_tags=[dict(id='article')]
|
||||
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
|
||||
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
|
||||
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
|
||||
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
|
||||
@ -30,6 +32,12 @@ class Dziennik_pl(BasicNewsRecipe):
|
||||
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
|
||||
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find('a', attrs={'class':'page_next'})
|
||||
if tag:
|
||||
@ -56,3 +64,4 @@ class Dziennik_pl(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
@ -10,7 +10,8 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = '.hdrBig {font-size:22px;}'
|
||||
remove_empty_feeds=True
|
||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||
|
21
recipes/gameplay_pl.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Gameplay_pl(BasicNewsRecipe):
|
||||
title = u'Gameplay.pl'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
||||
category = 'games, movies, books, music'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if 'http' not in url:
|
||||
return 'http://gameplay.pl'+ url[2:]
|
||||
else:
|
||||
return url
|
@ -4,10 +4,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
language = 'pl'
|
||||
description ='news from gazeta.pl'
|
||||
category='newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
INDEX='http://wyborcza.pl'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 3
|
||||
@ -81,3 +82,10 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
return url
|
||||
else:
|
||||
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
|
||||
cover=soup.find(id='GWmini2')
|
||||
soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
|
||||
self.cover_url='http://wyborcza.pl' + soup.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
@ -8,29 +8,31 @@ class Gry_online_pl(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 13
|
||||
INDEX= 'http://www.gry-online.pl/'
|
||||
cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
|
||||
masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
|
||||
cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'p.wn1{font-size:22px;}'
|
||||
remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
|
||||
#remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'gc660'})]
|
||||
remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
|
||||
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = soup.find('a', attrs={'class':'num_str_nex'})
|
||||
if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
|
||||
appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
|
||||
if nexturl is not None:
|
||||
if 'strona' in nexturl.div.string:
|
||||
nexturl= self.INDEX + nexturl['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
|
||||
for tag in pagetext:
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, tag)
|
||||
self.append_page(soup2, appendtag)
|
||||
tag = appendtag.find('div', attrs={'class':'n5p'})
|
||||
if tag:
|
||||
nexturls=tag.findAll('a')
|
||||
for nexturl in nexturls[1:]:
|
||||
try:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
|
||||
except:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class':'gc660'})
|
||||
for r in pagetext.findAll(name='header'):
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
|
||||
r.extract()
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
BIN
recipes/icons/ciekawostki_historyczne.png
Normal file
After Width: | Height: | Size: 994 B |
BIN
recipes/icons/gameplay_pl.png
Normal file
After Width: | Height: | Size: 991 B |
BIN
recipes/icons/in4_pl.png
Normal file
After Width: | Height: | Size: 357 B |
BIN
recipes/icons/informacje_usa.png
Normal file
After Width: | Height: | Size: 808 B |
BIN
recipes/icons/kresy_pl.png
Normal file
After Width: | Height: | Size: 4.0 KiB |
BIN
recipes/icons/oclab_pl.png
Normal file
After Width: | Height: | Size: 881 B |
BIN
recipes/icons/overclock_pl.png
Normal file
After Width: | Height: | Size: 817 B |
BIN
recipes/icons/palmtop_pl.png
Normal file
After Width: | Height: | Size: 366 B |
BIN
recipes/icons/pc_arena.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/pc_centre_pl.png
Normal file
After Width: | Height: | Size: 2.8 KiB |
BIN
recipes/icons/pc_foster.png
Normal file
After Width: | Height: | Size: 694 B |
BIN
recipes/icons/polska_times.png
Normal file
After Width: | Height: | Size: 322 B |
BIN
recipes/icons/pure_pc.png
Normal file
After Width: | Height: | Size: 386 B |
BIN
recipes/icons/tanuki.png
Normal file
After Width: | Height: | Size: 1017 B |
BIN
recipes/icons/tvn24.png
Normal file
After Width: | Height: | Size: 5.1 KiB |
BIN
recipes/icons/webhosting_pl.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
44
recipes/in4_pl.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class in4(BasicNewsRecipe):
|
||||
title = u'IN4.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
preprocess_regexps = [(re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '') ]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'left_alone'})]
|
||||
remove_tags_after=dict(name='img', attrs={'title':'komentarze'})
|
||||
remove_tags=[dict(name='img', attrs={'title':'komentarze'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://www.in4.pl/rss.php'), (u'Recenzje', u'http://www.in4.pl/rss_recenzje.php'), (u'Mini recenzje', u'http://www.in4.pl/rss_mini.php')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
a=soup.findAll('a')
|
||||
nexturl=None
|
||||
for i in a:
|
||||
if i.string and 'następna str' in i.string:
|
||||
nexturl='http://www.in4.pl/' + i['href']
|
||||
i.extract()
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.find(id='news')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
nexturl=None
|
||||
tag=soup2.findAll('a')
|
||||
for z in tag:
|
||||
if z.string and u'następna str' in z.string:
|
||||
nexturl='http://www.in4.pl/' + z['href']
|
||||
break
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
18
recipes/informacje_usa.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Informacje_USA(BasicNewsRecipe):
|
||||
title = u'Informacje USA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'portal wiadomości amerykańskich'
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
|
||||
cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
|
||||
remove_tags_after= dict(attrs={'class':'tags'})
|
||||
remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
|
||||
feeds = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]
|
14
recipes/kresy_pl.recipe
Normal file
@ -0,0 +1,14 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Kresy(BasicNewsRecipe):
|
||||
title = u'Kresy'
|
||||
__author__ = 'fenuks'
|
||||
description = u'portal społeczności kresowej'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.kresy.pl/public/img/logo.png'
|
||||
cover_url= 'http://www.kresy.pl/public/img/logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
keep_only_tags= [dict(id='artykul')]
|
||||
remove_tags= [dict(attrs={'class':['twitter-share-button', 'likefbborder', 'tagi']})]
|
||||
feeds = [(u'Wszystkie', u'http://www.kresy.pl/rss')]
|
17
recipes/la_pausa_caffe.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '13, February 2011'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1329125921(BasicNewsRecipe):
|
||||
title = u'La pausa caff\xe8'
|
||||
__author__ = 'faber1971'
|
||||
description = 'An Italian satirical blog'
|
||||
language = 'it'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
feeds = [(u'La pausa caff\xe8', u'http://feeds.feedburner.com/LapausaCaffe')]
|
||||
|
@ -1,4 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1327062445(BasicNewsRecipe):
|
||||
@ -7,10 +8,13 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id':'ads0'})
|
||||
]
|
||||
masthead_url = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
|
||||
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
|
||||
description = 'Collection of Italian marketing websites - v1.03 (20, February 2012)'
|
||||
language = 'it'
|
||||
|
||||
|
||||
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
|
||||
|
@ -1,16 +1,17 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
|
||||
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
|
||||
'''
|
||||
Mediapart
|
||||
'''
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
import re
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Mediapart(BasicNewsRecipe):
|
||||
title = 'Mediapart'
|
||||
__author__ = 'Mathieu Godlewski'
|
||||
description = 'Global news in french from online newspapers'
|
||||
__author__ = 'Mathieu Godlewski, Louis Gesbert'
|
||||
description = 'Global news in french from news site Mediapart'
|
||||
oldest_article = 7
|
||||
language = 'fr'
|
||||
needs_subscription = True
|
||||
@ -18,52 +19,30 @@ class Mediapart(BasicNewsRecipe):
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
|
||||
cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'
|
||||
cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
|
||||
|
||||
feeds = [
|
||||
('Les articles', 'http://www.mediapart.fr/articles/feed'),
|
||||
]
|
||||
|
||||
# -- print-version has poor quality on this website, better do the conversion ourselves
|
||||
#
|
||||
# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||
# [
|
||||
# (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||
# (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
|
||||
# lambda match : '<i>'+match.group(1)+'</i>'),
|
||||
# (r'\'', lambda match: '’'),
|
||||
# ]
|
||||
# ]
|
||||
#
|
||||
# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
|
||||
# dict(name='div', attrs={'class':'print-links'}),
|
||||
# dict(name='img', attrs={'src':'entete_article.png'}),
|
||||
# dict(name='br') ]
|
||||
#
|
||||
# def print_version(self, url):
|
||||
# raw = self.browser.open(url).read()
|
||||
# soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
# div = soup.find('div', {'id':re.compile('node-\d+')})
|
||||
# if div is None:
|
||||
# return None
|
||||
# article_id = string.replace(div['id'], 'node-', '')
|
||||
# if article_id is None:
|
||||
# return None
|
||||
# return 'http://www.mediapart.fr/print/'+article_id
|
||||
# -- print-version
|
||||
|
||||
# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'title'}),
|
||||
dict(name='div', attrs={'class':'page_papier_detail'}),
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
|
||||
(r'\'', lambda match: '’')
|
||||
]
|
||||
]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
for title in soup.findAll('div', {'class':'titre'}):
|
||||
tag = Tag(soup, 'h3')
|
||||
title.replaceWith(tag)
|
||||
tag.insert(0,title)
|
||||
return soup
|
||||
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]
|
||||
|
||||
def print_version(self, url):
|
||||
raw = self.browser.open(url).read()
|
||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
link = soup.find('a', {'title':'Imprimer'})
|
||||
if link is None:
|
||||
return None
|
||||
return link['href']
|
||||
|
||||
# -- Handle login
|
||||
|
||||
@ -76,4 +55,3 @@ class Mediapart(BasicNewsRecipe):
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class naczytniki(BasicNewsRecipe):
|
||||
title = u'naczytniki.pl'
|
||||
__author__ = 'fenuks'
|
||||
masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||
language = 'pl'
|
||||
description ='everything about e-readers'
|
||||
@ -10,6 +11,7 @@ class naczytniki(BasicNewsRecipe):
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
||||
|
@ -1,21 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
title = u'Nowa Fantastyka'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
__modified_by__ = 'zaslav'
|
||||
language = 'pl'
|
||||
encoding='latin2'
|
||||
description ='site for fantasy readers'
|
||||
category='fantasy'
|
||||
masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg'
|
||||
#extra_css='.tytul {font-size: 20px;}' #not working
|
||||
max_articles_per_feed = 100
|
||||
INDEX='http://www.fantastyka.pl/'
|
||||
no_stylesheets=True
|
||||
needs_subscription = 'optional'
|
||||
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
|
||||
remove_tags_before=dict(attrs={'class':'naglowek2'})
|
||||
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
|
||||
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
|
||||
remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
|
||||
remove_tags_after=dict(name='form', attrs={'name':'form1'})
|
||||
remove_tags=[dict(attrs={'class':['avatar2', 'belka-margin', 'naglowek2']}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'}), dict(name='form')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<td.*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<center\>'), lambda match: '')]
|
||||
|
||||
|
||||
|
||||
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
@ -41,10 +53,10 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
|
||||
cover=soup.find(name='img', attrs={'class':'okladka'})
|
||||
self.cover_url=self.INDEX+ cover['src']
|
||||
soup = self.index_to_soup('http://www.e-kiosk.pl/nowa_fantastyka')
|
||||
self.cover_url='http://www.e-kiosk.pl' + soup.find(name='a', attrs={'class':'img'})['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def get_browser(self):
|
||||
@ -56,3 +68,18 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(font=True):
|
||||
del item['font']
|
||||
for item in soup.findAll(align=True):
|
||||
del item['align']
|
||||
for item in soup.findAll(name='tr'):
|
||||
item.name='div'
|
||||
title=soup.find(attrs={'class':'tytul'})
|
||||
if title:
|
||||
title['style']='font-size: 20px; font-weight: bold;'
|
||||
self.log.warn(soup)
|
||||
return soup
|
||||
|
31
recipes/oclab_pl.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class OCLab(BasicNewsRecipe):
|
||||
title = u'OCLab.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Portal OCLab.pl jest miejscem przyjaznym pasjonatom sprzętu komputerowego, w szczególności overclockerom, które będzie służyć im za aktualną bazę wiedzy o podkręcaniu komputera, źródło aktualnych informacji z rynku oraz opinii na temat sprzętu komputerowego.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
cover_url= 'http://www.idealforum.ru/attachment.php?attachmentid=7963&d=1316008118'
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(id='main')]
|
||||
remove_tags_after= dict(attrs={'class':'single-postmetadata'})
|
||||
remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})]
|
||||
feeds = [(u'Wpisy', u'http://oclab.pl/feed/')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(attrs={'class':'contentjumpddl'})
|
||||
if tag:
|
||||
nexturl=tag.findAll('option')
|
||||
for nextpage in nexturl[1:-1]:
|
||||
soup2 = self.index_to_soup(nextpage['value'])
|
||||
pagetext = soup2.find(attrs={'class':'single-entry'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':'post-nav-bottom-list'}):
|
||||
r.extract()
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
37
recipes/overclock_pl.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Overclock_pl(BasicNewsRecipe):
|
||||
title = u'Overclock.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Vortal poświęcony tematyce hardware, kładący największy nacisk na podkręcanie / overclocking (włącznie z extreme) i chłodzenie / cooling (air cooling, water cooling, freon cooling, dry ice, liquid nitrogen).'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://www.overclock.pl/gfx/logo_m.png'
|
||||
cover_url='http://www.overclock.pl/gfx/logo_m.png'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
preprocess_regexps = [(re.compile(ur'<b>Komentarze do aktualności:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'<h3>Nawigacja</h3>', re.DOTALL), lambda match: '') ]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'news'}), dict(id='articleContent')]
|
||||
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(id='navigation')
|
||||
if tag:
|
||||
nexturl=tag.findAll('option')
|
||||
tag.extract()
|
||||
for nextpage in nexturl[2:]:
|
||||
soup2 = self.index_to_soup(nextpage['value'])
|
||||
pagetext = soup2.find(id='content')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
rem=appendtag.find(attrs={'alt':'Pierwsza'})
|
||||
if rem:
|
||||
rem.parent.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
14
recipes/palmtop_pl.recipe
Normal file
@ -0,0 +1,14 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class palmtop_pl(BasicNewsRecipe):
|
||||
title = u'Palmtop.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'wortal technologii mobilnych'
|
||||
category = 'mobile'
|
||||
language = 'pl'
|
||||
cover_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
|
||||
masthead_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
31
recipes/pc_arena.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class PC_Arena(BasicNewsRecipe):
|
||||
title = u'PCArena'
|
||||
oldest_article = 18300
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
|
||||
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
||||
remove_tags=[dict(attrs={'class':'pages'})]
|
||||
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(name='div', attrs={'class':'pagNum'})
|
||||
if tag:
|
||||
nexturl=tag.findAll('a')
|
||||
tag.extract()
|
||||
for nextpage in nexturl[1:]:
|
||||
nextpage= 'http://pcarena.pl' + nextpage['href']
|
||||
soup2 = self.index_to_soup(nextpage)
|
||||
pagetext = soup2.find(attrs={'class':'artBody'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
41
recipes/pc_centre_pl.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class PC_Centre(BasicNewsRecipe):
|
||||
title = u'PC Centre'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||
no_stylesheets = True
|
||||
keep_only_tags= [dict(id='content')]
|
||||
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
||||
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find(name='div', attrs={'class':'pages'})
|
||||
if tag:
|
||||
nexturl=tag.findAll('a')
|
||||
tag.extract()
|
||||
for nextpage in nexturl[:-1]:
|
||||
nextpage= 'http://pccentre.pl' + nextpage['href']
|
||||
soup2 = self.index_to_soup(nextpage)
|
||||
pagetext = soup2.find(id='content')
|
||||
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
|
||||
for r in rem:
|
||||
r.extract()
|
||||
rem=pagetext.findAll(id='comments')
|
||||
for r in rem:
|
||||
r.extract()
|
||||
rem=pagetext.findAll('h1')
|
||||
for r in rem:
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
35
recipes/pc_foster.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class PC_Foster(BasicNewsRecipe):
|
||||
title = u'PC Foster'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://pcfoster.pl/public/images/logo.png'
|
||||
cover_url= 'http://pcfoster.pl/public/images/logo.png'
|
||||
no_stylesheets= True
|
||||
remove_empty_feeds= True
|
||||
keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
|
||||
remove_tags=[dict(name='p', attrs={'class':'right'})]
|
||||
feeds = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl= appendtag.find(attrs={'alt':u'Następna strona'})
|
||||
if nexturl:
|
||||
appendtag.find(attrs={'class':'pager more_top'}).extract()
|
||||
while nexturl:
|
||||
nexturl='http://pcfoster.pl' + nexturl.parent['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
nexturl=soup2.find(attrs={'alt':u'Następna strona'})
|
||||
pagetext = soup2.find(attrs={'class':'content'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':'review_content double'}):
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
81
recipes/polska_times.recipe
Normal file
@ -0,0 +1,81 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Polska_times(BasicNewsRecipe):
|
||||
title = u'Polska Times'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_emty_feeds= True
|
||||
no_stylesheets = True
|
||||
preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
|
||||
keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])]
|
||||
remove_tags_after= dict(id='material-tagi')
|
||||
remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
|
||||
feeds = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if 'Advertisement' in soup.title:
|
||||
nexturl=soup.find('a')['href']
|
||||
return self.index_to_soup(nexturl, raw=True)
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl=soup.find(id='nastepna_strona')
|
||||
while nexturl:
|
||||
soup2= self.index_to_soup(nexturl['href'])
|
||||
nexturl=soup2.find(id='nastepna_strona')
|
||||
pagetext = soup2.find(id='tresc')
|
||||
for dictionary in self.remove_tags:
|
||||
v=pagetext.findAll(attrs=dictionary['attrs'])
|
||||
for delete in v:
|
||||
delete.extract()
|
||||
for b in pagetext.findAll(name='b'):
|
||||
if b.string:
|
||||
if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
|
||||
b.extract()
|
||||
for center in pagetext.findAll(name='center'):
|
||||
if center.h4:
|
||||
if center.h4.a:
|
||||
center.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
|
||||
paginator.extract()
|
||||
|
||||
def image_article(self, soup, appendtag):
|
||||
nexturl=soup.find('a', attrs={'class':'nastepna'})
|
||||
urls=[]
|
||||
while nexturl:
|
||||
if nexturl not in urls:
|
||||
urls.append(nexturl)
|
||||
else:
|
||||
break
|
||||
soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
|
||||
nexturl=soup2.find('a', attrs={'class':'nastepna'})
|
||||
if nexturl in urls:
|
||||
break;
|
||||
pagetext = soup2.find(id='galeria-material')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, '<br />')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
|
||||
rem.extract()
|
||||
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
|
||||
paginator.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if soup.find('a', attrs={'class':'nastepna'}):
|
||||
self.image_article(soup, soup.body)
|
||||
elif soup.find(id='nastepna_strona'):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
|
||||
self.cover_url=soup.find(id='pojemnik').img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
33
recipes/pure_pc.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class PurePC(BasicNewsRecipe):
|
||||
title = u'PurePC'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
|
||||
cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
|
||||
no_stylesheets = True
|
||||
keep_only_tags= [dict(id='content')]
|
||||
remove_tags_after= dict(attrs={'class':'fivestar-widget'})
|
||||
remove_tags= [dict(id='navigator'), dict(attrs={'class':['box-tools', 'fivestar-widget', 'PageMenuList']})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl= appendtag.find(attrs={'class':'pager-next'})
|
||||
if nexturl:
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href'])
|
||||
nexturl=soup2.find(attrs={'class':'pager-next'})
|
||||
pagetext = soup2.find(attrs={'class':'article'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':['PageMenuList', 'pager', 'fivestar-widget']}):
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -1,14 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class Tablety_pl(BasicNewsRecipe):
|
||||
title = u'Tablety.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'tablety.pl - latest tablet news'
|
||||
masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
||||
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
|
||||
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
|
||||
|
37
recipes/tanuki.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class tanuki(BasicNewsRecipe):
|
||||
title = u'Tanuki'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
category = 'anime, manga'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
encoding='utf-8'
|
||||
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
|
||||
preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(attrs={'class':['animename', 'storyname', 'nextarrow','sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={'summary':'Technikalia'}), dict(attrs={'class':['chaptername','copycat']}), dict(id='rightcolumn'), dict(attrs={'class':['headn_tt', 'subtable']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':'screen'}), dict(id='randomtoplist'), dict(attrs={'class':'note'})]
|
||||
feeds = [(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'), (u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'), (u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'), (u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'), (u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl= appendtag.find(attrs={'class':'nextarrow'})
|
||||
if nexturl:
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup('http://czytelnia.tanuki.pl'+ nexturl['href'])
|
||||
nexturl=soup2.find(attrs={'class':'nextarrow'})
|
||||
pagetext = soup2.find(attrs={'class':['chaptername', 'copycat']})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
pagetext = soup2.find(attrs={'class':'copycat'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':'nextarrow'}):
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -1,49 +1,57 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1268409464(BasicNewsRecipe):
|
||||
title = u'The Sun'
|
||||
__author__ = 'Chaz Ralph'
|
||||
description = 'News from The Sun'
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
|
||||
title = u'The Sun UK'
|
||||
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||
|
||||
description = 'A Recipe for The Sun tabloid UK - uses feed43'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 20/2/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
max_articles_per_feed = 15
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
encoding= 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
|
||||
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
|
||||
encoding = 'cp1251'
|
||||
|
||||
encoding = 'cp1252'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
'''
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(id='column-print')
|
||||
dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
|
||||
dict(name='div',attrs={'class' : 'text-center'}),
|
||||
dict(name='div',attrs={'id' : 'bodyText'})
|
||||
# dict(name='p')
|
||||
]
|
||||
|
||||
remove_tags=[
|
||||
#dict(name='head'),
|
||||
dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
|
||||
dict(name='div',attrs={'class' : 'cf'}),
|
||||
dict(attrs={'title' : 'download flash'}),
|
||||
dict(attrs={'style' : 'padding: 5px'})
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'News','http://feed43.com/2517447382644748.xml'),
|
||||
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
|
||||
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
|
||||
(u'Film',u'http://feed43.com/1307545221226200.xml'),
|
||||
(u'Music',u'http://feed43.com/1701513435064132.xml'),
|
||||
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':[
|
||||
'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
|
||||
'clear width-625 bg-fff padding-top-10'
|
||||
]}),
|
||||
dict(name='video'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
h1 = soup.find('h1')
|
||||
if h1 is not None:
|
||||
text = self.tag_to_string(h1)
|
||||
nh = Tag(soup, 'h1')
|
||||
nh.insert(0, text)
|
||||
h1.replaceWith(nh)
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
|
||||
,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
|
||||
,(u'Football', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247739.ece')
|
||||
,(u'Gizmo', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247829.ece')
|
||||
,(u'Bizarre', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247767.ece')]
|
||||
|
||||
def print_version(self, url):
|
||||
return re.sub(r'\?OTC-RSS&ATTR=[-a-zA-Z]+', '?print=yes', url)
|
||||
|
||||
|
||||
|
24
recipes/tvn24.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class tvn24(BasicNewsRecipe):
|
||||
title = u'TVN24'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
|
||||
cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
|
||||
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})]
|
||||
remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'})
|
||||
remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})]
|
||||
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -4,10 +4,12 @@ class Ubuntu_pl(BasicNewsRecipe):
|
||||
title = u'UBUNTU.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'UBUNTU.pl - polish ubuntu community site'
|
||||
masthead_url= 'http://ubuntu.pl/img/logo.jpg'
|
||||
cover_url = 'http://ubuntu.pl/img/logo.jpg'
|
||||
category = 'linux, IT'
|
||||
language = 'pl'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '#main {text-align:left;}'
|
||||
|
39
recipes/webhosting_pl.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class webhosting_pl(BasicNewsRecipe):
|
||||
title = u'Webhosting.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Webhosting.pl to pierwszy na polskim rynku serwis poruszający w szerokim aspekcie tematy związane z hostingiem, globalną Siecią i usługami internetowymi. Głównym celem przedsięwzięcia jest dostarczanie przydatnej i bogatej merytorycznie wiedzy osobom, które chcą tworzyć i efektywnie wykorzystywać współczesny Internet.'
|
||||
category = 'web'
|
||||
language = 'pl'
|
||||
cover_url='http://webhosting.pl/images/logo.png'
|
||||
masthead_url='http://webhosting.pl/images/logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
#keep_only_tags= [dict(name='div', attrs={'class':'content_article'}), dict(attrs={'class':'paging'})]
|
||||
#remove_tags=[dict(attrs={'class':['tags', 'wykop', 'facebook_button_count', 'article_bottom']})]
|
||||
feeds = [(u'Newsy', u'http://webhosting.pl/feed/rss/an'),
|
||||
(u'Artyku\u0142y', u'http://webhosting.pl/feed/rss/aa'),
|
||||
(u'Software', u'http://webhosting.pl/feed/rss/n/12'),
|
||||
(u'Internet', u'http://webhosting.pl/feed/rss/n/9'),
|
||||
(u'Biznes', u'http://webhosting.pl/feed/rss/n/13'),
|
||||
(u'Bezpiecze\u0144stwo', u'http://webhosting.pl/feed/rss/n/10'),
|
||||
(u'Blogi', u'http://webhosting.pl/feed/rss/ab'),
|
||||
(u'Programowanie', u'http://webhosting.pl/feed/rss/n/8'),
|
||||
(u'Kursy', u'http://webhosting.pl/feed/rss/n/11'),
|
||||
(u'Tips&Tricks', u'http://webhosting.pl/feed/rss/n/15'),
|
||||
(u'Imprezy', u'http://webhosting.pl/feed/rss/n/22'),
|
||||
(u'Wywiady', u'http://webhosting.pl/feed/rss/n/24'),
|
||||
(u'Porady', u'http://webhosting.pl/feed/rss/n/3027'),
|
||||
(u'Znalezione w sieci', u'http://webhosting.pl/feed/rss/n/6804'),
|
||||
(u'Dev area', u'http://webhosting.pl/feed/rss/n/24504'),
|
||||
(u"Webmaster's blog", u'http://webhosting.pl/feed/rss/n/29195'),
|
||||
(u'Domeny', u'http://webhosting.pl/feed/rss/n/11513'),
|
||||
(u'Praktyka', u'http://webhosting.pl/feed/rss/n/2'),
|
||||
(u'Serwery', u'http://webhosting.pl/feed/rss/n/11514'),
|
||||
(u'Inne', u'http://webhosting.pl/feed/rss/n/24811'),
|
||||
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('webhosting.pl', 'webhosting.pl/print')
|
@ -190,3 +190,16 @@ def get_windows_username():
|
||||
return buf.value
|
||||
|
||||
return get_unicode_windows_env_var(u'USERNAME')
|
||||
|
||||
def get_windows_temp_path():
|
||||
import ctypes
|
||||
n = ctypes.windll.kernel32.GetTempPathW(0, None)
|
||||
if n == 0:
|
||||
return None
|
||||
buf = ctypes.create_unicode_buffer(u'\0'*n)
|
||||
ctypes.windll.kernel32.GetTempPathW(n, buf)
|
||||
ans = buf.value
|
||||
if ans[-1] == u'\\':
|
||||
ans = ans[:-1]
|
||||
return ans if ans else None
|
||||
|
||||
|
@ -192,9 +192,13 @@ class InputFormatPlugin(Plugin):
|
||||
|
||||
def __call__(self, stream, options, file_ext, log,
|
||||
accelerators, output_dir):
|
||||
log('InputFormatPlugin: %s running'%self.name)
|
||||
if hasattr(stream, 'name'):
|
||||
log('on', stream.name)
|
||||
try:
|
||||
log('InputFormatPlugin: %s running'%self.name)
|
||||
if hasattr(stream, 'name'):
|
||||
log('on', stream.name)
|
||||
except:
|
||||
# In case stdout is broken
|
||||
pass
|
||||
|
||||
with CurrentDir(output_dir):
|
||||
for x in os.listdir('.'):
|
||||
|
@ -184,14 +184,14 @@ class ANDROID(USBMS):
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER']
|
||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER']
|
||||
'USB_2.0_DRIVER', 'I9100T']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -153,7 +153,9 @@ def get_metadata(stream):
|
||||
mi = MetaInformation(None, [])
|
||||
if data.has_key('title'):
|
||||
mi.title = data['title']
|
||||
if data.has_key('creator'):
|
||||
if data.get('initial-creator', '').strip():
|
||||
mi.authors = string_to_authors(data['initial-creator'])
|
||||
elif data.has_key('creator'):
|
||||
mi.authors = string_to_authors(data['creator'])
|
||||
if data.has_key('description'):
|
||||
mi.comments = data['description']
|
||||
|
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
import urllib
|
||||
from contextlib import closing
|
||||
|
||||
|
@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import traceback, cPickle, copy
|
||||
from itertools import repeat
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
|
||||
from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
|
||||
QMimeData, QModelIndex, pyqtSignal, QObject)
|
||||
|
@ -16,7 +16,7 @@ from PyQt4.Qt import (QWizard, QWizardPage, QPixmap, Qt, QAbstractListModel,
|
||||
from calibre import __appname__, patheq
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.library.move import MoveLibrary
|
||||
from calibre.constants import filesystem_encoding, iswindows
|
||||
from calibre.constants import filesystem_encoding, iswindows, plugins
|
||||
from calibre.gui2.wizard.send_email import smtp_prefs
|
||||
from calibre.gui2.wizard.device_ui import Ui_WizardPage as DeviceUI
|
||||
from calibre.gui2.wizard.library_ui import Ui_WizardPage as LibraryUI
|
||||
@ -30,6 +30,9 @@ from calibre.gui2 import NONE, choose_dir, error_dialog
|
||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||
from calibre.customize.ui import device_plugins
|
||||
|
||||
if iswindows:
|
||||
winutil = plugins['winutil'][0]
|
||||
|
||||
# Devices {{{
|
||||
|
||||
class Device(object):
|
||||
@ -302,13 +305,13 @@ class HanlinV5(HanlinV3):
|
||||
class BeBook(HanlinV3):
|
||||
|
||||
name = 'BeBook'
|
||||
manufacturer = 'Endless Ideas'
|
||||
manufacturer = 'BeBook'
|
||||
id = 'bebook'
|
||||
|
||||
class BeBookMini(HanlinV5):
|
||||
|
||||
name = 'BeBook Mini'
|
||||
manufacturer = 'Endless Ideas'
|
||||
manufacturer = 'BeBook'
|
||||
id = 'bebook_mini'
|
||||
|
||||
class EZReader(HanlinV3):
|
||||
@ -420,9 +423,9 @@ class KindlePage(QWizardPage, KindleUI):
|
||||
def commit(self):
|
||||
x = unicode(self.to_address.text()).strip()
|
||||
parts = x.split('@')
|
||||
if len(parts) < 2 or not parts[0]: return
|
||||
|
||||
if self.send_email_widget.set_email_settings(True):
|
||||
if (self.send_email_widget.set_email_settings(True) and len(parts) >= 2
|
||||
and parts[0]):
|
||||
conf = smtp_prefs()
|
||||
accounts = conf.parse().accounts
|
||||
if not accounts: accounts = {}
|
||||
@ -751,19 +754,20 @@ class LibraryPage(QWizardPage, LibraryUI):
|
||||
self.default_library_name = None
|
||||
if not lp:
|
||||
fname = _('Calibre Library')
|
||||
if isinstance(fname, unicode):
|
||||
try:
|
||||
fname = fname.encode(filesystem_encoding)
|
||||
except:
|
||||
fname = 'Calibre Library'
|
||||
lp = os.path.expanduser('~'+os.sep+fname)
|
||||
base = os.path.expanduser(u'~')
|
||||
if iswindows:
|
||||
x = winutil.special_folder_path(winutil.CSIDL_PERSONAL)
|
||||
if x and os.access(x, os.W_OK):
|
||||
base = x
|
||||
|
||||
lp = os.path.join(base, fname)
|
||||
self.default_library_name = lp
|
||||
if not os.path.exists(lp):
|
||||
try:
|
||||
os.makedirs(lp)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
lp = os.path.expanduser('~')
|
||||
lp = os.path.expanduser(u'~')
|
||||
self.location.setText(lp)
|
||||
|
||||
def isComplete(self):
|
||||
@ -779,12 +783,10 @@ class LibraryPage(QWizardPage, LibraryUI):
|
||||
oldloc = prefs['library_path']
|
||||
newloc = unicode(self.location.text())
|
||||
try:
|
||||
newloce = newloc.encode(filesystem_encoding)
|
||||
if self.default_library_name is not None and \
|
||||
os.path.exists(self.default_library_name) and \
|
||||
not os.listdir(self.default_library_name) and \
|
||||
newloce != self.default_library_name:
|
||||
os.rmdir(self.default_library_name)
|
||||
dln = self.default_library_name
|
||||
if (dln and os.path.exists(dln) and not os.listdir(dln) and newloc
|
||||
!= dln):
|
||||
os.rmdir(dln)
|
||||
except:
|
||||
pass
|
||||
if not os.path.exists(newloc):
|
||||
|
@ -7,7 +7,8 @@ being closed.
|
||||
"""
|
||||
import tempfile, os, atexit, binascii, cPickle
|
||||
|
||||
from calibre.constants import __version__, __appname__
|
||||
from calibre.constants import (__version__, __appname__,
|
||||
get_unicode_windows_env_var, iswindows, get_windows_temp_path)
|
||||
|
||||
def cleanup(path):
|
||||
try:
|
||||
@ -47,7 +48,18 @@ def base_dir():
|
||||
_base_dir = td
|
||||
else:
|
||||
base = os.environ.get('CALIBRE_TEMP_DIR', None)
|
||||
if base is not None and iswindows:
|
||||
base = get_unicode_windows_env_var('CALIBRE_TEMP_DIR')
|
||||
prefix = app_prefix(u'tmp_')
|
||||
if base is None and iswindows:
|
||||
# On windows always use a unicode temp path, as for some
|
||||
# localized (east asian) windows builds, there's no reliable
|
||||
# way to escalate to unicode only when needed. See
|
||||
# https://bugs.launchpad.net/bugs/937389 Hopefully, by now, the
|
||||
# rest of calibre can deal with unicode temp paths. We'll leave
|
||||
# temp paths as bytestring on Unix, as the temp dir on unix is
|
||||
# very rarely non ascii anyway.
|
||||
base = get_windows_temp_path()
|
||||
try:
|
||||
# First try an ascii path as that is what was done historically
|
||||
# and we dont want to break working code
|
||||
@ -66,7 +78,9 @@ def base_dir():
|
||||
def _make_file(suffix, prefix, base):
|
||||
try:
|
||||
fd, name = tempfile.mkstemp(suffix, prefix, dir=base)
|
||||
except UnicodeDecodeError:
|
||||
except (UnicodeDecodeError, OSError):
|
||||
# On some windows systems, we get an OSError because base is not
|
||||
# unicode and windows cannot find the path pointed to by base
|
||||
global _base_dir
|
||||
from calibre.constants import filesystem_encoding
|
||||
base_dir()
|
||||
@ -79,7 +93,9 @@ def _make_file(suffix, prefix, base):
|
||||
def _make_dir(suffix, prefix, base):
|
||||
try:
|
||||
tdir = tempfile.mkdtemp(suffix, prefix, base)
|
||||
except ValueError:
|
||||
except (ValueError, OSError):
|
||||
# On some windows systems, we get an OSError because base is not
|
||||
# unicode and windows cannot find the path pointed to by base
|
||||
global _base_dir
|
||||
from calibre.constants import filesystem_encoding
|
||||
base_dir()
|
||||
|
@ -4,20 +4,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
Builtin recipes.
|
||||
'''
|
||||
import re, imp, inspect, time, os
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, \
|
||||
AutomaticNewsRecipe, CalibrePeriodical
|
||||
import re, time, io
|
||||
from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
|
||||
AutomaticNewsRecipe, CalibrePeriodical)
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre import __appname__, english_sort
|
||||
from calibre.utils.config import JSONConfig
|
||||
|
||||
BeautifulSoup, time, english_sort
|
||||
|
||||
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
|
||||
CalibrePeriodical)
|
||||
_tdir = None
|
||||
_crep = 0
|
||||
|
||||
custom_recipes = JSONConfig('custom_recipes/index.json')
|
||||
|
||||
@ -28,39 +22,33 @@ def custom_recipe_filename(id_, title):
|
||||
|
||||
def compile_recipe(src):
|
||||
'''
|
||||
Compile the code in src and return the first object that is a recipe or profile.
|
||||
@param src: Python source code
|
||||
@type src: string
|
||||
@return: Recipe class or None, if no such class was found in C{src}
|
||||
Compile the code in src and return a recipe object, if found.
|
||||
|
||||
:param src: Python source code as bytestring or unicode object
|
||||
|
||||
:return: Recipe class or None, if no such class was found in src
|
||||
'''
|
||||
global _tdir, _crep
|
||||
if _tdir is None or not os.path.exists(_tdir):
|
||||
_tdir = PersistentTemporaryDirectory('_recipes')
|
||||
temp = os.path.join(_tdir, 'recipe%d.py'%_crep)
|
||||
_crep += 1
|
||||
if not isinstance(src, unicode):
|
||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
||||
enc = match.group(1) if match else 'utf-8'
|
||||
src = src.decode(enc)
|
||||
src = re.sub(r'from __future__.*', '', src)
|
||||
f = open(temp, 'wb')
|
||||
src = 'from %s.web.feeds.news import BasicNewsRecipe, AutomaticNewsRecipe\n'%__appname__ + src
|
||||
src = '# coding: utf-8\n' + src
|
||||
src = 'from __future__ import with_statement\n' + src
|
||||
# Python complains if there is a coding declaration in a unicode string
|
||||
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
|
||||
# Translate newlines to \n
|
||||
src = io.StringIO(src, newline=None).getvalue()
|
||||
|
||||
src = src.replace('from libprs500', 'from calibre').encode('utf-8')
|
||||
f.write(src)
|
||||
f.close()
|
||||
module = imp.find_module(os.path.splitext(os.path.basename(temp))[0],
|
||||
[os.path.dirname(temp)])
|
||||
module = imp.load_module(os.path.splitext(os.path.basename(temp))[0], *module)
|
||||
classes = inspect.getmembers(module,
|
||||
lambda x : inspect.isclass(x) and \
|
||||
issubclass(x, (BasicNewsRecipe,)) and \
|
||||
x not in basic_recipes)
|
||||
if not classes:
|
||||
return None
|
||||
namespace = {
|
||||
'BasicNewsRecipe':BasicNewsRecipe,
|
||||
'AutomaticNewsRecipe':AutomaticNewsRecipe,
|
||||
'time':time, 're':re,
|
||||
'BeautifulSoup':BeautifulSoup
|
||||
}
|
||||
exec src in namespace
|
||||
|
||||
return classes[0][1]
|
||||
for x in namespace.itervalues():
|
||||
if (isinstance(x, type) and issubclass(x, BasicNewsRecipe) and x not
|
||||
in basic_recipes):
|
||||
return x
|
||||
|
||||
return None
|
||||
|
||||
|