mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
568ac2a2d1
BIN
resources/images/news/hitro.png
Normal file
BIN
resources/images/news/hitro.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 521 B |
BIN
resources/images/news/kamikaze.png
Normal file
BIN
resources/images/news/kamikaze.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 262 B |
BIN
resources/images/news/trombon.png
Normal file
BIN
resources/images/news/trombon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 375 B |
BIN
resources/images/news/wallstreetro.png
Normal file
BIN
resources/images/news/wallstreetro.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 768 B |
49
resources/recipes/el_pais_babelia.recipe
Normal file
49
resources/recipes/el_pais_babelia.recipe
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElPaisBabelia(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'El Pais Babelia'
|
||||||
|
__author__ = 'oneillpt'
|
||||||
|
description = 'El Pais Babelia'
|
||||||
|
INDEX = 'http://www.elpais.com/suple/babelia/'
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'estructura_2col'})
|
||||||
|
keep_tags = [dict(name='div', attrs={'class':'estructura_2col'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'votos estirar'}),
|
||||||
|
dict(name='div', attrs={'id':'utilidades'}),
|
||||||
|
dict(name='div', attrs={'class':'info_relacionada'}),
|
||||||
|
dict(name='div', attrs={'class':'mod_apoyo'}),
|
||||||
|
dict(name='div', attrs={'class':'contorno_f'}),
|
||||||
|
dict(name='div', attrs={'class':'pestanias'}),
|
||||||
|
dict(name='div', attrs={'class':'otros_webs'}),
|
||||||
|
dict(name='div', attrs={'id':'pie'})
|
||||||
|
]
|
||||||
|
#no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
feeds = []
|
||||||
|
for section in soup.findAll('div', attrs={'class':'contenedor_nuevo'}):
|
||||||
|
section_title = self.tag_to_string(section.find('h1'))
|
||||||
|
articles = []
|
||||||
|
for post in section.findAll('a', href=True):
|
||||||
|
url = post['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.elpais.es'+url
|
||||||
|
title = self.tag_to_string(post)
|
||||||
|
if str(post).find('class=') > 0:
|
||||||
|
klass = post['class']
|
||||||
|
if klass != "":
|
||||||
|
self.log()
|
||||||
|
self.log('--> post: ', post)
|
||||||
|
self.log('--> url: ', url)
|
||||||
|
self.log('--> title: ', title)
|
||||||
|
self.log('--> class: ', klass)
|
||||||
|
articles.append({'title':title, 'url':url})
|
||||||
|
if articles:
|
||||||
|
feeds.append((section_title, articles))
|
||||||
|
return feeds
|
||||||
|
|
@ -1,52 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
evz.ro
|
evz.ro
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class EVZ_Ro(BasicNewsRecipe):
|
class EvenimentulZilei(BasicNewsRecipe):
|
||||||
title = 'evz.ro'
|
title = u'Evenimentul Zilei'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = 'News from Romania'
|
description = ''
|
||||||
publisher = 'evz.ro'
|
publisher = u'Evenimentul Zilei'
|
||||||
category = 'news, politics, Romania'
|
oldest_article = 5
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
|
max_articles_per_feed = 100
|
||||||
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comments' : description
|
||||||
, 'tags' : category
|
,'tags' : category
|
||||||
, 'publisher' : publisher
|
,'language' : language
|
||||||
, 'language' : language
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [
|
keep_only_tags = [
|
||||||
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
|
dict(name='div', attrs={'class':'single'})
|
||||||
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
, dict(name='img', attrs={'id':'placeholder'})
|
||||||
]
|
, dict(name='a', attrs={'id':'holderlink'})
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
|
dict(name='p', attrs={'class':['articleInfo']})
|
||||||
,dict(attrs={'class':['section','statsInfo','email il']})
|
, dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||||
,dict(attrs={'id' :'gallery'})
|
, dict(name='div', attrs={'id':['bannerAddoceansArticle']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(attrs={'class':'section'})
|
remove_tags_after = [
|
||||||
keep_only_tags = [dict(attrs={'class':'single'})]
|
dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||||
remove_attributes = ['height','width']
|
]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.evz.ro/rss.xml')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
return self.adeify_images(soup)
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
43
resources/recipes/hitro.recipe
Normal file
43
resources/recipes/hitro.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
hit.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Hit(BasicNewsRecipe):
|
||||||
|
title = u'HIT'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'IT'
|
||||||
|
publisher = 'HIT'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,IT'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.hit.ro/lib/images/frontend/hit_logo.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class':'art_titl'})
|
||||||
|
, dict(name='div', attrs={'id':'continut_articol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.hit.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
53
resources/recipes/kamikaze.recipe
Normal file
53
resources/recipes/kamikaze.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
kamikazeonline.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kamikaze(BasicNewsRecipe):
|
||||||
|
title = u'Kamikaze'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'S\u0103pt\u0103m\u00e2nal sc\u0103pat de sub control'
|
||||||
|
publisher = 'Kamikaze'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.kamikazeonline.ro/wp-content/themes/kamikaze/images/kamikazeonline_header.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'content'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['connect_confirmation_cell connect_confirmation_cell_no_like']})
|
||||||
|
, dict(name='h3', attrs={'id':['comments']})
|
||||||
|
, dict(name='ul', attrs={'class':['addtoany_list']})
|
||||||
|
, dict(name='p', attrs={'class':['postmetadata']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='p', attrs={'class':['postmetadata']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.kamikazeonline.ro/feed/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -1,36 +1,37 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
|
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
|
||||||
__author__ = 'Vadim Dyadkin'
|
__author__ = 'Vadim Dyadkin'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Computerra(BasicNewsRecipe):
|
class Computerra(BasicNewsRecipe):
|
||||||
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
||||||
recursion = 50
|
oldest_article = 100
|
||||||
oldest_article = 100
|
__author__ = 'Vadim Dyadkin (edited by A. Chewi)'
|
||||||
__author__ = 'Vadim Dyadkin'
|
max_articles_per_feed = 50
|
||||||
max_articles_per_feed = 100
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
remove_javascript = True
|
||||||
simultaneous_downloads = 5
|
no_stylesheets = True
|
||||||
language = 'ru'
|
conversion_options = {'linearize_tables' : True}
|
||||||
description = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u044b, \u043e\u043a\u043e\u043b\u043e\u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438 \u043e\u043a\u043e\u043b\u043e\u0444\u0438\u043b\u043e\u0441\u043e\u0444\u0441\u043a\u0438\u0435 \u0441\u0442\u0430\u0442\u044c\u0438, \u0433\u0430\u0434\u0436\u0435\u0442\u044b.'
|
simultaneous_downloads = 5
|
||||||
|
language = 'ru'
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
||||||
feeds = [(u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430', 'http://feeds.feedburner.com/ct_news/'),]
|
|
||||||
|
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
|
||||||
remove_tags = [dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
|
||||||
dict(name='ul', attrs={'class': "related_post"}),
|
remove_tags = [
|
||||||
dict(name='p', attrs={'class': 'info'}),
|
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||||
dict(name='a', attrs={'rel': 'tag', 'class': 'twitter-share-button', 'type': 'button_count'}),
|
dict(name='ul', attrs={'class': "related_post"}),
|
||||||
dict(name='h2', attrs={}),]
|
dict(name='p', attrs={'class': 'info'}),
|
||||||
|
dict(name='a', attrs={'class': 'twitter-share-button'}),
|
||||||
extra_css = 'body { text-align: justify; }'
|
dict(name='a', attrs={'type': 'button_count'}),
|
||||||
|
dict(name='h2', attrs={})
|
||||||
def get_article_url(self, article):
|
]
|
||||||
return article.get('feedburner:origLink', article.get('guid'))
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=true'
|
||||||
|
@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe):
|
|||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
||||||
publisher = 'National Geographic'
|
publisher = 'National Geographic'
|
||||||
oldest_article = 5
|
oldest_article = 35
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#Based on Lars Jacob's Taz Digiabo recipe
|
#Based on veezh's original recipe and Kovid Goyal's New York Times recipe
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, veezh'
|
__copyright__ = '2011, Snaab'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
www.nrc.nl
|
www.nrc.nl
|
||||||
'''
|
'''
|
||||||
import os, urllib2, zipfile
|
import os, zipfile
|
||||||
import time
|
import time
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
|
|||||||
class NRCHandelsblad(BasicNewsRecipe):
|
class NRCHandelsblad(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'NRC Handelsblad'
|
title = u'NRC Handelsblad'
|
||||||
description = u'De EPUB-versie van NRC'
|
description = u'De ePaper-versie van NRC'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
lang = 'nl-NL'
|
lang = 'nl-NL'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
__author__ = 'veezh'
|
__author__ = 'Snaab'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'no_default_epub_cover' : True
|
'no_default_epub_cover' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://login.nrc.nl/login')
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
def build_index(self):
|
def build_index(self):
|
||||||
|
|
||||||
today = time.strftime("%Y%m%d")
|
today = time.strftime("%Y%m%d")
|
||||||
|
|
||||||
domain = "http://digitaleeditie.nrc.nl"
|
domain = "http://digitaleeditie.nrc.nl"
|
||||||
|
|
||||||
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
|
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
|
||||||
# print url
|
#print url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = urllib2.urlopen(url)
|
br = self.get_browser()
|
||||||
except urllib2.HTTPError:
|
f = br.open(url)
|
||||||
|
except:
|
||||||
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
|
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
|
||||||
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
||||||
|
|
||||||
|
|
||||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||||
self.report_progress(0,_('downloading epub'))
|
self.report_progress(0,_('downloading epub'))
|
||||||
tmp.write(f.read())
|
tmp.write(f.read())
|
||||||
tmp.close()
|
f.close()
|
||||||
|
br.close()
|
||||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
if zipfile.is_zipfile(tmp):
|
||||||
self.report_progress(0,_('extracting epub'))
|
try:
|
||||||
|
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||||
zfile.extractall(self.output_dir)
|
zfile.extractall(self.output_dir)
|
||||||
|
self.report_progress(0,_('extracting epub'))
|
||||||
|
except zipfile.BadZipfile:
|
||||||
|
self.report_progress(0,_('BadZip error, continuing'))
|
||||||
|
|
||||||
tmp.close()
|
tmp.close()
|
||||||
index = os.path.join(self.output_dir, 'content.opf')
|
index = os.path.join(self.output_dir, 'metadata.opf')
|
||||||
|
|
||||||
self.report_progress(1,_('epub downloaded and extracted'))
|
self.report_progress(1,_('epub downloaded and extracted'))
|
||||||
|
|
||||||
|
51
resources/recipes/trombon.recipe
Normal file
51
resources/recipes/trombon.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
trombon.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Trombon(BasicNewsRecipe):
|
||||||
|
title = u'Trombon'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Parodii si Pamflete'
|
||||||
|
publisher = u'Trombon'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Fun'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.trombon.ro/i/trombon.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'articol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['info_2']})
|
||||||
|
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'id':'article_vote'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
54
resources/recipes/wallstreetro.recipe
Normal file
54
resources/recipes/wallstreetro.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
wall-street.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class WallStreetRo(BasicNewsRecipe):
|
||||||
|
title = u'Wall Street'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = ''
|
||||||
|
publisher = 'Wall Street'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'article_header'})
|
||||||
|
, dict(name='div', attrs={'class':'article_text'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='p', attrs={'class':['page_breadcrumbs']})
|
||||||
|
, dict(name='div', attrs={'id':['article_user_toolbox']})
|
||||||
|
, dict(name='p', attrs={'class':['comments_count_container']})
|
||||||
|
, dict(name='div', attrs={'class':['article_left_column']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'clearfloat'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -131,9 +131,12 @@ class PageProcessor(list): # {{{
|
|||||||
newsizey = int(newsizex / aspect)
|
newsizey = int(newsizex / aspect)
|
||||||
deltax = 0
|
deltax = 0
|
||||||
deltay = (SCRHEIGHT - newsizey) / 2
|
deltay = (SCRHEIGHT - newsizey) / 2
|
||||||
wand.size = (newsizex, newsizey)
|
if newsizex < 20000 and newsizey < 20000:
|
||||||
wand.set_border_color(pw)
|
# Too large and resizing fails, so better
|
||||||
wand.add_border(pw, deltax, deltay)
|
# to leave it as original size
|
||||||
|
wand.size = (newsizex, newsizey)
|
||||||
|
wand.set_border_color(pw)
|
||||||
|
wand.add_border(pw, deltax, deltay)
|
||||||
elif self.opts.wide:
|
elif self.opts.wide:
|
||||||
# Keep aspect and Use device height as scaled image width so landscape mode is clean
|
# Keep aspect and Use device height as scaled image width so landscape mode is clean
|
||||||
aspect = float(sizex) / float(sizey)
|
aspect = float(sizex) / float(sizey)
|
||||||
@ -152,11 +155,15 @@ class PageProcessor(list): # {{{
|
|||||||
newsizey = int(newsizex / aspect)
|
newsizey = int(newsizex / aspect)
|
||||||
deltax = 0
|
deltax = 0
|
||||||
deltay = (wscreeny - newsizey) / 2
|
deltay = (wscreeny - newsizey) / 2
|
||||||
wand.size = (newsizex, newsizey)
|
if newsizex < 20000 and newsizey < 20000:
|
||||||
wand.set_border_color(pw)
|
# Too large and resizing fails, so better
|
||||||
wand.add_border(pw, deltax, deltay)
|
# to leave it as original size
|
||||||
|
wand.size = (newsizex, newsizey)
|
||||||
|
wand.set_border_color(pw)
|
||||||
|
wand.add_border(pw, deltax, deltay)
|
||||||
else:
|
else:
|
||||||
wand.size = (SCRWIDTH, SCRHEIGHT)
|
if SCRWIDTH < 20000 and SCRHEIGHT < 20000:
|
||||||
|
wand.size = (SCRWIDTH, SCRHEIGHT)
|
||||||
|
|
||||||
if not self.opts.dont_sharpen:
|
if not self.opts.dont_sharpen:
|
||||||
wand.sharpen(0.0, 1.0)
|
wand.sharpen(0.0, 1.0)
|
||||||
|
@ -75,15 +75,20 @@ class SNBFile:
|
|||||||
for i in range(self.plainBlock):
|
for i in range(self.plainBlock):
|
||||||
bzdc = bz2.BZ2Decompressor()
|
bzdc = bz2.BZ2Decompressor()
|
||||||
if (i < self.plainBlock - 1):
|
if (i < self.plainBlock - 1):
|
||||||
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
|
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset
|
||||||
else:
|
else:
|
||||||
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
|
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset
|
||||||
snbFile.seek(self.blocks[self.binBlock + i].Offset);
|
snbFile.seek(self.blocks[self.binBlock + i].Offset)
|
||||||
try:
|
try:
|
||||||
data = snbFile.read(bSize)
|
data = snbFile.read(bSize)
|
||||||
uncompressedData += bzdc.decompress(data)
|
if len(data) < 32768:
|
||||||
|
uncompressedData += bzdc.decompress(data)
|
||||||
|
else:
|
||||||
|
uncompressedData += data
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
print e
|
print e
|
||||||
|
if len(uncompressedData) != self.plainStreamSizeUncompressed:
|
||||||
|
raise Exception()
|
||||||
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
|
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
|
||||||
plainPos += f.fileSize
|
plainPos += f.fileSize
|
||||||
elif f.attr & 0x01000000 == 0x01000000:
|
elif f.attr & 0x01000000 == 0x01000000:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user