mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
3700d81d4a
@ -18,6 +18,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
INDEX = 'http://spectator.org'
|
INDEX = 'http://spectator.org'
|
||||||
|
auto_cleanup = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
@ -26,17 +28,6 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class':'post inner'})
|
|
||||||
,dict(name='div', attrs={'class':'author-bio'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='object')
|
|
||||||
,dict(name='div', attrs={'class':['col3','post-options','social']})
|
|
||||||
,dict(name='p' , attrs={'class':['letter-editor','meta']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
|
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
11
recipes/diario_la_republica.recipe
Normal file
11
recipes/diario_la_republica.recipe
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
|
||||||
|
title = u'Diario La Republica'
|
||||||
|
__author__ = 'CAVALENCIA'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
language = 'es_CO'
|
||||||
|
|
||||||
|
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]
|
@ -2,12 +2,10 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
|
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
|
||||||
title = u'Periódico El Colombiano'
|
title = u'Periódico El Colombiano'
|
||||||
language = 'es_CO'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
__author__ = 'BIGO-CAVA'
|
||||||
|
language = 'es_CO'
|
||||||
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
|
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
|
||||||
remove_tags_before = dict(id='contenidoArt')
|
remove_tags_before = dict(id='contenidoArt')
|
||||||
remove_tags_after = dict(id='enviaTips')
|
remove_tags_after = dict(id='enviaTips')
|
||||||
|
54
recipes/el_espectador.recipe
Normal file
54
recipes/el_espectador.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ColombiaElEspectador(BasicNewsRecipe):
|
||||||
|
title = u'Periódico el Espectador'
|
||||||
|
__author__ = 'BIGO-CAVA'
|
||||||
|
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
|
||||||
|
#remove_tags_before = dict(id='fb-root')
|
||||||
|
remove_tags_before = dict(id='content')
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
|
||||||
|
language = 'es_CO'
|
||||||
|
#keep_only_tags = [dict(name='div', id='content')]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
|
||||||
|
dict(name='div', attrs={'class':'relpauta'}),
|
||||||
|
dict(name='div', attrs={'class':'recursosrelacionados'}),
|
||||||
|
dict(name='div', attrs={'class':'nav_negocios'})]
|
||||||
|
# dict(name='div', attrs={'class':'tags_playerrecurso'}),
|
||||||
|
# dict(name='div', attrs={'class':'ico-mail2'}),
|
||||||
|
# dict(name='div', attrs={'id':'caja-instapaper'}),
|
||||||
|
# dict(name='div', attrs={'class':'modulo herramientas'})]
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
extra_css = """
|
||||||
|
p{text-align: justify; font-size: 100%}
|
||||||
|
body{ text-align: left; font-size:100% }
|
||||||
|
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
|
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
|
||||||
|
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
|
||||||
|
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
|
||||||
|
(u'Economía', u'http://www.elespectador.com/economia/feed'),
|
||||||
|
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
|
||||||
|
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
|
||||||
|
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
|
||||||
|
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
|
||||||
|
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
|
||||||
|
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
|
||||||
|
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
|
||||||
|
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
|
||||||
|
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
|
||||||
|
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
|
||||||
|
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
|
||||||
|
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]
|
50
recipes/el_mundo_co.recipe
Normal file
50
recipes/el_mundo_co.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ColombiaElMundo02(BasicNewsRecipe):
|
||||||
|
title = u'Periódico El Mundo'
|
||||||
|
__author__ = 'BIGO-CAVA'
|
||||||
|
language = 'es_CO'
|
||||||
|
cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
|
||||||
|
remove_tags_before = dict(id='miga_pan')
|
||||||
|
#remove_tags_before = [dict(name='div', attrs={'class':'contenido'})]
|
||||||
|
remove_tags_after = [dict(name='div', attrs={'class':'cuadro_opciones_new1'})]
|
||||||
|
#keep_only_tags = [dict(name='div', id='miga_pan')]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'ruta'}),
|
||||||
|
dict(name='div', attrs={'class':'buscador'}),
|
||||||
|
dict(name='div', attrs={'class':'iconos'}),
|
||||||
|
dict(name='div', attrs={'class':'otros_iconos'}),
|
||||||
|
dict(name='div', attrs={'class':'cuadro_opciones_new1'}),
|
||||||
|
dict(name='div', attrs={'class':'otras_noticias'}),
|
||||||
|
dict(name='div', attrs={'class':'notas_relacionadas'}),
|
||||||
|
dict(name='div', attrs={'id':'lateral_2'})]
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
extra_css = """
|
||||||
|
p{text-align: justify; font-size: 100%}
|
||||||
|
body{ text-align: left; font-size:100% }
|
||||||
|
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
|
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
|
||||||
|
(u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
|
||||||
|
(u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
|
||||||
|
(u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
|
||||||
|
(u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
|
||||||
|
(u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
|
||||||
|
(u'Internacional', u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
|
||||||
|
(u'Servicios Públicos', u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
|
||||||
|
(u'Infraestructura', u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
|
||||||
|
(u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
|
||||||
|
(u'Derechos Humanos', u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
|
||||||
|
(u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
|
||||||
|
(u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]
|
@ -2,18 +2,17 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ColombiaElTiempo02(BasicNewsRecipe):
|
class ColombiaElTiempo02(BasicNewsRecipe):
|
||||||
title = u'Periódico el Tiempo'
|
title = u'Periódico el Tiempo'
|
||||||
language = 'es_CO'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
__author__ = 'BIGO-CAVA'
|
||||||
|
language = 'es_CO'
|
||||||
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
|
||||||
remove_tags_before = dict(id='fb-root')
|
#remove_tags_before = dict(id='fb-root')
|
||||||
|
remove_tags_before = dict(id='contenidoArt')
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
|
||||||
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
||||||
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
|
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
|
||||||
|
dict(name='div', attrs={'class':'recomend-art'}),
|
||||||
dict(name='div', attrs={'class':'caja-facebook'}),
|
dict(name='div', attrs={'class':'caja-facebook'}),
|
||||||
dict(name='div', attrs={'class':'caja-twitter'}),
|
dict(name='div', attrs={'class':'caja-twitter'}),
|
||||||
dict(name='div', attrs={'class':'caja-buzz'}),
|
dict(name='div', attrs={'class':'caja-buzz'}),
|
||||||
|
112
recipes/gosc_niedzielny.recipe
Normal file
112
recipes/gosc_niedzielny.recipe
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
import re
|
||||||
|
|
||||||
|
class GN(BasicNewsRecipe):
|
||||||
|
EDITION = 0
|
||||||
|
|
||||||
|
__author__ = 'Piotr Kontek'
|
||||||
|
title = u'Gość niedzielny'
|
||||||
|
description = 'Weekly magazine'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
remove_javascript = True
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
br = self.get_browser()
|
||||||
|
br.open(url)
|
||||||
|
source = br.response().read()
|
||||||
|
page = self.index_to_soup(source)
|
||||||
|
|
||||||
|
main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
|
||||||
|
|
||||||
|
title = main_section.find('h2')
|
||||||
|
info = main_section.find('div', attrs={'class' : 'cf doc_info'})
|
||||||
|
authors = info.find(attrs={'class':'l'})
|
||||||
|
article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
|
||||||
|
first = True
|
||||||
|
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
|
||||||
|
if first and p.find('img') != None:
|
||||||
|
article = article + '<p>'
|
||||||
|
article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
|
||||||
|
article = article + '<font size="-2">'
|
||||||
|
for s in p.findAll('span'):
|
||||||
|
article = article + self.tag_to_string(s)
|
||||||
|
article = article + '</font></p>'
|
||||||
|
else:
|
||||||
|
article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
|
||||||
|
first = False
|
||||||
|
|
||||||
|
html = unicode(title) + unicode(authors) + unicode(article)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
|
||||||
|
self.temp_files[-1].write(html)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
|
def find_last_issue(self):
|
||||||
|
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
|
||||||
|
#szukam zdjęcia i linka do porzedniego pełnego numeru
|
||||||
|
first = True
|
||||||
|
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
|
||||||
|
img = d.find('img')
|
||||||
|
if img != None:
|
||||||
|
a = img.parent
|
||||||
|
self.EDITION = a['href']
|
||||||
|
self.title = img['alt']
|
||||||
|
self.cover_url = 'http://www.gosc.pl' + img['src']
|
||||||
|
if not first:
|
||||||
|
break
|
||||||
|
first = False
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.find_last_issue()
|
||||||
|
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
|
||||||
|
feeds = []
|
||||||
|
#wstepniak
|
||||||
|
a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
|
||||||
|
articles = [
|
||||||
|
{'title' : self.tag_to_string(a),
|
||||||
|
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''}
|
||||||
|
]
|
||||||
|
feeds.append((u'Wstępniak',articles))
|
||||||
|
#kategorie
|
||||||
|
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
|
||||||
|
if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':
|
||||||
|
main_block = self.index_to_soup('http://www.gosc.pl' + addr['href'])
|
||||||
|
articles = list(self.find_articles(main_block))
|
||||||
|
if len(articles) > 0:
|
||||||
|
section = addr.string
|
||||||
|
feeds.append((section, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def find_articles(self, main_block):
|
||||||
|
for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
|
||||||
|
art = a.find('a')
|
||||||
|
yield {
|
||||||
|
'title' : self.tag_to_string(art),
|
||||||
|
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
for a in main_block.findAll('div', attrs={'class':'sr-document'}):
|
||||||
|
art = a.find('a')
|
||||||
|
yield {
|
||||||
|
'title' : self.tag_to_string(art),
|
||||||
|
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
|
@ -4,13 +4,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
|
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
|
||||||
title = u'Periódico Portafolio Colombia'
|
title = u'Periódico Portafolio Colombia'
|
||||||
language = 'es_CO'
|
|
||||||
__author__ = 'BIGO-CAVA'
|
__author__ = 'BIGO-CAVA'
|
||||||
|
language = 'es_CO'
|
||||||
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
|
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
|
||||||
remove_tags_before = dict(id='contenidoArt')
|
remove_tags_before = dict(id='contenidoArt')
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
|
||||||
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
keep_only_tags = [dict(name='div', id='contenidoArt')]
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '''
|
||||||
|
2010, Darko Miletic <darko.miletic at gmail.com>
|
||||||
|
2011, Przemyslaw Kryger <pkryger at gmail.com>
|
||||||
|
'''
|
||||||
'''
|
'''
|
||||||
readitlaterlist.com
|
readitlaterlist.com
|
||||||
'''
|
'''
|
||||||
@ -9,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Readitlater(BasicNewsRecipe):
|
class Readitlater(BasicNewsRecipe):
|
||||||
title = 'Read It Later'
|
title = 'Read It Later'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic, Przemyslaw Kryger'
|
||||||
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||||
setup up your news. Fill in your account
|
setup up your news. Fill in your account
|
||||||
username, and optionally you can add password.'''
|
username, and optionally you can add password.'''
|
||||||
@ -23,9 +26,6 @@ class Readitlater(BasicNewsRecipe):
|
|||||||
INDEX = u'http://readitlaterlist.com'
|
INDEX = u'http://readitlaterlist.com'
|
||||||
LOGIN = INDEX + u'/l'
|
LOGIN = INDEX + u'/l'
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Unread articles' , INDEX + u'/unread')]
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None:
|
if self.username is not None:
|
||||||
@ -37,12 +37,31 @@ class Readitlater(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
self.report_progress(0, ('Fetching list of feeds...'))
|
||||||
|
lfeeds = []
|
||||||
|
i = 1
|
||||||
|
feedurl = self.INDEX + u'/unread/1'
|
||||||
|
while True:
|
||||||
|
title = u'Unread articles, page ' + str(i)
|
||||||
|
lfeeds.append((title, feedurl))
|
||||||
|
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
|
||||||
|
i += 1
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
|
||||||
|
if ritem is None:
|
||||||
|
break
|
||||||
|
feedurl = self.INDEX + ritem['href']
|
||||||
|
if self.test:
|
||||||
|
return lfeeds[:2]
|
||||||
|
return lfeeds
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
lfeeds = self.get_feeds()
|
lfeeds = self.get_feeds()
|
||||||
for feedobj in lfeeds:
|
for feedobj in lfeeds:
|
||||||
feedtitle, feedurl = feedobj
|
feedtitle, feedurl = feedobj
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
ritem = soup.find('ul',attrs={'id':'list'})
|
ritem = soup.find('ul',attrs={'id':'list'})
|
||||||
|
11
recipes/revista_semana.recipe
Normal file
11
recipes/revista_semana.recipe
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1317341570(BasicNewsRecipe):
|
||||||
|
title = u'Revista Semana'
|
||||||
|
__author__ = 'BIGO-CAVA'
|
||||||
|
language = 'es_CO'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]
|
@ -35,14 +35,14 @@ class Smh_au(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'id':['googleAds','moreGoogleAds','comments']})
|
|
||||||
,dict(name='div', attrs={'class':'cT-imageMultimedia'})
|
|
||||||
,dict(name=['object','embed','iframe'])
|
|
||||||
]
|
|
||||||
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
|
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
|
||||||
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
|
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
|
dict(name='div',
|
||||||
|
attrs={'id':['googleAds','moreGoogleAds','comments',
|
||||||
|
'video-player-content']}),
|
||||||
|
dict(name='div', attrs={'class':'cT-imageMultimedia'}),
|
||||||
|
dict(name=['object','embed','iframe']),
|
||||||
dict(attrs={'class':'hidden'}),
|
dict(attrs={'class':'hidden'}),
|
||||||
dict(name=['link','meta','base','embed','object','iframe'])
|
dict(name=['link','meta','base','embed','object','iframe'])
|
||||||
]
|
]
|
||||||
|
@ -77,8 +77,12 @@ class ANDROID(USBMS):
|
|||||||
0xdeed : [0x0222],
|
0xdeed : [0x0222],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Viewsonic
|
# Viewsonic/Vizio
|
||||||
0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
|
0x0489 : {
|
||||||
|
0xc001 : [0x0226],
|
||||||
|
0xc004 : [0x0226],
|
||||||
|
0x8801 : [0x0226, 0x0227],
|
||||||
|
},
|
||||||
|
|
||||||
# Acer
|
# Acer
|
||||||
0x502 : { 0x3203 : [0x0100, 0x224]},
|
0x502 : { 0x3203 : [0x0100, 0x224]},
|
||||||
@ -134,7 +138,7 @@ class ANDROID(USBMS):
|
|||||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT']
|
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
|
||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
@ -144,11 +148,12 @@ class ANDROID(USBMS):
|
|||||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', 'ALPANDIGITAL']
|
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||||
|
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL']
|
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL', 'ANDROID_MID']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
|
@ -452,24 +452,26 @@ class BlockAttr(StyleObject, LRFObject):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def to_css(cls, obj, inline=False):
|
def to_css(cls, obj, inline=False):
|
||||||
ans = ''
|
ans = ''
|
||||||
|
|
||||||
def item(line):
|
def item(line):
|
||||||
ans += '' if inline else '\t'
|
ans = '' if inline else '\t'
|
||||||
ans += line
|
ans += line
|
||||||
ans += ' ' if inline else '\n'
|
ans += ' ' if inline else '\n'
|
||||||
|
return ans
|
||||||
|
|
||||||
if hasattr(obj, 'sidemargin'):
|
if hasattr(obj, 'sidemargin'):
|
||||||
margin = str(obj.sidemargin) + 'px'
|
margin = str(obj.sidemargin) + 'px'
|
||||||
item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin))
|
ans += item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin))
|
||||||
if hasattr(obj, 'topskip'):
|
if hasattr(obj, 'topskip'):
|
||||||
item('margin-top: %dpx;'%obj.topskip)
|
ans += item('margin-top: %dpx;'%obj.topskip)
|
||||||
if hasattr(obj, 'footskip'):
|
if hasattr(obj, 'footskip'):
|
||||||
item('margin-bottom: %dpx;'%obj.footskip)
|
ans += item('margin-bottom: %dpx;'%obj.footskip)
|
||||||
if hasattr(obj, 'framewidth'):
|
if hasattr(obj, 'framewidth'):
|
||||||
item('border: solid %dpx'%obj.framewidth)
|
ans += item('border: solid %dpx'%obj.framewidth)
|
||||||
if hasattr(obj, 'framecolor') and obj.framecolor.a < 255:
|
if hasattr(obj, 'framecolor') and obj.framecolor.a < 255:
|
||||||
item('border-color: %s;'%obj.framecolor.to_html())
|
ans += item('border-color: %s;'%obj.framecolor.to_html())
|
||||||
if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255:
|
if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255:
|
||||||
item('background-color: %s;'%obj.bgcolor.to_html())
|
ans += item('background-color: %s;'%obj.bgcolor.to_html())
|
||||||
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
@ -480,39 +482,41 @@ class TextCSS(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def to_css(cls, obj, inline=False):
|
def to_css(cls, obj, inline=False):
|
||||||
ans = ''
|
ans = ''
|
||||||
|
|
||||||
def item(line):
|
def item(line):
|
||||||
ans += '' if inline else '\t'
|
ans = '' if inline else '\t'
|
||||||
ans += line
|
ans += line
|
||||||
ans += ' ' if inline else '\n'
|
ans += ' ' if inline else '\n'
|
||||||
|
return ans
|
||||||
|
|
||||||
fs = getattr(obj, 'fontsize', None)
|
fs = getattr(obj, 'fontsize', None)
|
||||||
if fs is not None:
|
if fs is not None:
|
||||||
item('font-size: %fpt;'%(int(fs)/10.))
|
ans += item('font-size: %fpt;'%(int(fs)/10.))
|
||||||
fw = getattr(obj, 'fontweight', None)
|
fw = getattr(obj, 'fontweight', None)
|
||||||
if fw is not None:
|
if fw is not None:
|
||||||
item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal'))
|
ans += item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal'))
|
||||||
fn = getattr(obj, 'fontfacename', None)
|
fn = getattr(obj, 'fontfacename', None)
|
||||||
if fn is not None:
|
if fn is not None:
|
||||||
fn = cls.FONT_MAP[fn]
|
fn = cls.FONT_MAP[fn]
|
||||||
item('font-family: %s;'%fn)
|
ans += item('font-family: %s;'%fn)
|
||||||
fg = getattr(obj, 'textcolor', None)
|
fg = getattr(obj, 'textcolor', None)
|
||||||
if fg is not None:
|
if fg is not None:
|
||||||
fg = fg.to_html()
|
fg = fg.to_html()
|
||||||
item('color: %s;'%fg)
|
ans += item('color: %s;'%fg)
|
||||||
bg = getattr(obj, 'textbgcolor', None)
|
bg = getattr(obj, 'textbgcolor', None)
|
||||||
if bg is not None:
|
if bg is not None:
|
||||||
bg = bg.to_html()
|
bg = bg.to_html()
|
||||||
item('background-color: %s;'%bg)
|
ans += item('background-color: %s;'%bg)
|
||||||
al = getattr(obj, 'align', None)
|
al = getattr(obj, 'align', None)
|
||||||
if al is not None:
|
if al is not None:
|
||||||
al = dict(head='left', center='center', foot='right')
|
al = dict(head='left', center='center', foot='right')
|
||||||
item('text-align: %s;'%al)
|
ans += item('text-align: %s;'%al)
|
||||||
lh = getattr(obj, 'linespace', None)
|
lh = getattr(obj, 'linespace', None)
|
||||||
if lh is not None:
|
if lh is not None:
|
||||||
item('text-align: %fpt;'%(int(lh)/10.))
|
ans += item('text-align: %fpt;'%(int(lh)/10.))
|
||||||
pi = getattr(obj, 'parindent', None)
|
pi = getattr(obj, 'parindent', None)
|
||||||
if pi is not None:
|
if pi is not None:
|
||||||
item('text-indent: %fpt;'%(int(pi)/10.))
|
ans += item('text-indent: %fpt;'%(int(pi)/10.))
|
||||||
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -85,6 +85,8 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
|
|||||||
|
|
||||||
for field, display in get_field_list(fm):
|
for field, display in get_field_list(fm):
|
||||||
metadata = fm.get(field, None)
|
metadata = fm.get(field, None)
|
||||||
|
if field == 'sort':
|
||||||
|
field = 'title_sort'
|
||||||
if all_fields:
|
if all_fields:
|
||||||
display = True
|
display = True
|
||||||
if (not display or not metadata or mi.is_null(field) or
|
if (not display or not metadata or mi.is_null(field) or
|
||||||
|
Loading…
x
Reference in New Issue
Block a user