Merge branch 'master' of https://github.com/unkn0w7n/calibre
Before Width: | Height: | Size: 190 B |
Before Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 236 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 3.5 KiB |
Before Width: | Height: | Size: 736 B |
Before Width: | Height: | Size: 366 B |
Before Width: | Height: | Size: 523 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 430 B |
Before Width: | Height: | Size: 134 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 554 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 916 B |
Before Width: | Height: | Size: 677 B |
Before Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 579 B |
Before Width: | Height: | Size: 579 B |
Before Width: | Height: | Size: 590 B |
Before Width: | Height: | Size: 195 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 309 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 639 B |
Before Width: | Height: | Size: 190 B |
Before Width: | Height: | Size: 723 B |
Before Width: | Height: | Size: 240 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 689 B |
Before Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 456 B |
Before Width: | Height: | Size: 3.0 KiB |
Before Width: | Height: | Size: 227 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 703 B |
Before Width: | Height: | Size: 2.9 KiB |
Before Width: | Height: | Size: 223 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 600 B |
BIN
recipes/icons/science_x.png
Normal file
After Width: | Height: | Size: 784 B |
Before Width: | Height: | Size: 306 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 4.0 KiB |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 514 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 498 B |
Before Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 562 B |
Before Width: | Height: | Size: 510 B |
Before Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 165 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 2.0 KiB |
Before Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 990 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 921 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 245 B |
Before Width: | Height: | Size: 159 B |
Before Width: | Height: | Size: 495 B |
Before Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 317 B |
Before Width: | Height: | Size: 350 B |
Before Width: | Height: | Size: 802 B |
BIN
recipes/icons/sputnik.png
Normal file
After Width: | Height: | Size: 316 B |
Before Width: | Height: | Size: 545 B |
Before Width: | Height: | Size: 364 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 652 B |
Before Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 626 B |
Before Width: | Height: | Size: 120 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 267 B |
Before Width: | Height: | Size: 300 B |
Before Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 147 B |
Before Width: | Height: | Size: 801 B |
@ -1,56 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class RabbleCa(BasicNewsRecipe):
|
||||
title = u'Rabble.ca'
|
||||
__author__ = 'timtoo'
|
||||
language = 'en_CA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
cover_url = 'https://upload.wikimedia.org/wikipedia/en/4/44/Rabble.png'
|
||||
masthead_url = 'http://rabble.ca/sites/rabble/files/dreamyrabble_logo.jpg'
|
||||
|
||||
feeds = [(u'Rabble.ca', u'http://feeds.feedburner.com/rabble-news')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<a href="http://rabble.ca/user">.*?to post comments', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: 'Tags:'),
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
.print-taxonomy { display: inline }
|
||||
.print-taxonomy ul { display: inline; margin: 0px }
|
||||
.print-taxonomy ul li { display: inline; list-style: none }
|
||||
.field-type-date div { display: inline }
|
||||
.field-type-link div { display: inline }
|
||||
.field-type-text div { display: inline }
|
||||
.field-label { font-style: italic }
|
||||
"""
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://rabble.ca/', 'http://rabble.ca/print/')
|
||||
|
||||
remove_tags = [
|
||||
# print version of the web page
|
||||
dict(name='div', attrs={'class': ['print-logo']}),
|
||||
dict(name='div', attrs={'class': ['print-site_name']}),
|
||||
dict(name='hr', attrs={'class': ['print-hr']}),
|
||||
dict(name='div', attrs={'class': ['print-links']}),
|
||||
|
||||
# regular web page in case you need to download them
|
||||
dict(name='div', attrs={'id': ['header']}),
|
||||
dict(name='div', attrs={'class': ['container-submenu']}),
|
||||
dict(name='div', attrs={'id': ['sidebar']}),
|
||||
dict(name='div', attrs={'id': ['footer']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['rabble-nodelinks rabble-nodelinks-top']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['rabble-nodelinks rabble-nodelinks-bottom']}),
|
||||
dict(name='div', attrs={'class': ['tags-issues']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['field field-type-text field-field-summary']}),
|
||||
dict(name='span', attrs={'class': ['print-footnote']}),
|
||||
]
|
@ -1,59 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2014, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
radikal.com.tr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Radikal_tr(BasicNewsRecipe):
|
||||
title = 'Radikal - Turkey'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Turkey'
|
||||
publisher = 'radikal'
|
||||
category = 'news, politics, Turkey'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: 'PT Sans',Arial,Helvetica,sans1,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta', 'iframe', 'embed', 'object', 'link', 'base']),
|
||||
dict(name='div', attrs={
|
||||
'class': ['options', 'news_related', 'browserWidth_shareBox']}),
|
||||
dict(attrs={'class': ['breadcrumb clearfix', 'box_title']})
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class': ['news-content-header',
|
||||
'news-content-text clearfix',
|
||||
'author-content-text',
|
||||
'news_detail_top',
|
||||
'news_article']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml'),
|
||||
(u'Türkiye', u'http://www.radikal.com.tr/d/rss/Rss_77.xml'),
|
||||
(u'Politika', u'http://www.radikal.com.tr/d/rss/Rss_78.xml'),
|
||||
(u'Dünya', u'http://www.radikal.com.tr/d/rss/Rss_81.xml'),
|
||||
(u'Ekonomi', u'http://www.radikal.com.tr/d/rss/Rss_80.xml'),
|
||||
(u'Radikal 2', u'http://www.radikal.com.tr/d/rss/Rss_42.xml'),
|
||||
(u'Radikal Hayat', u'http://www.radikal.com.tr/d/rss/Rss_41.xml'),
|
||||
(u'Radikal Kitap', u'http://www.radikal.com.tr/d/rss/Rss_40.xml'),
|
||||
(u'Spor', u'http://www.radikal.com.tr/d/rss/Rss_84.xml')
|
||||
]
|
@ -1,45 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1291540961(BasicNewsRecipe):
|
||||
|
||||
title = u'Radio Praha'
|
||||
__author__ = 'Francois Pellicaan'
|
||||
description = u'Česká oficiální mezinárodní vysílací stanice.'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
encoding = 'utf8'
|
||||
publisher = u'Český rozhlas'
|
||||
category = 'News'
|
||||
language = 'cs'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
extra_css = u'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' # noqa
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['main']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['cleaner', 'options', 'toolsXXL']}),
|
||||
dict(name='ul', attrs={'class': ['tools']})
|
||||
]
|
||||
feeds = [
|
||||
(u'Domácí politika', 'http://www.radio.cz/feeds/rss/cs/oblast/dompol.xml'),
|
||||
(u'Společnost', 'http://www.radio.cz/feeds/rss/cs/oblast/spolecnost.xml'),
|
||||
(u'Evropská unie', 'http://www.radio.cz/feeds/rss/cs/oblast/eu.xml'),
|
||||
(u'Zahraniční politika',
|
||||
'http://www.radio.cz/feeds/rss/cs/oblast/zahrpol.xml'),
|
||||
(u'Ekonomika', 'http://www.radio.cz/feeds/rss/cs/oblast/ekonomika.xml'),
|
||||
(u'Kultura', 'http://www.radio.cz/feeds/rss/cs/oblast/kultura.xml'),
|
||||
(u'Krajané', 'http://www.radio.cz/feeds/rss/cs/oblast/krajane.xml'),
|
||||
(u'Historie', 'http://www.radio.cz/feeds/rss/cs/oblast/historie.xml'),
|
||||
(u'Příroda', 'http://www.radio.cz/feeds/rss/cs/oblast/priroda.xml'),
|
||||
(u'Věda', 'http://www.radio.cz/feeds/rss/cs/oblast/veda.xml'),
|
||||
(u'Sport', 'http://www.radio.cz/feeds/rss/cs/oblast/sport.xml'),
|
||||
(u'Cestování', 'http://www.radio.cz/feeds/rss/cs/oblast/cestovani.xml'),
|
||||
]
|
@ -1,32 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# https://manual.calibre-ebook.com/news_recipe.html
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Din avis Randers
|
||||
'''
|
||||
|
||||
|
||||
class RandersLokalavisen_dk(BasicNewsRecipe):
|
||||
__author__ = 'CoderAllan.github.com'
|
||||
title = 'Din avis Randers'
|
||||
description = 'Lokale og regionale nyheder, sport, kultur fra Randers og omegn på dinavis.lokalavisen.dk'
|
||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
language = 'da'
|
||||
|
||||
feeds = [
|
||||
('Seneste nyt fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestenytrss'),
|
||||
('Seneste lokale nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestelokalenyhederrss'),
|
||||
('Seneste sport fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestesportrss'),
|
||||
('Seneste 112 nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/seneste112rss'),
|
||||
('Seneste kultur nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestekulturrss'),
|
||||
('Seneste læserbreve fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestelaeserbreverss'),
|
||||
|
||||
]
|
||||
|
@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
realitatea.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Realitatea(BasicNewsRecipe):
|
||||
title = 'Realitatea'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
publisher = 'Realitatea'
|
||||
description = u'\u0218tiri din Rom\u00e2nia'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Romania'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://assets.realitatea.ro/images/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'articleTitle '}), dict(
|
||||
name='div', attrs={'class': 'articleBody'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id': 'aus'})]
|
||||
feeds = [(u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,37 +0,0 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class RebelionRecipe (BasicNewsRecipe):
|
||||
# Thanks to atlantique http://www.mobileread.com/forums/member.php?u=67876
|
||||
__author__ = u'Marc Busqué <marc@lamarciana.com>'
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
__version__ = '1.0'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'Rebelion.org'
|
||||
description = u'Rebelión pretende ser un medio de información alternativa que publique las noticias que no son consideradas importantes por los medios de comunicación tradicionales. También, dar a las noticias un tratamiento diferente en la línea de mostrar los intereses que los poderes económicos y políticos del mundo capitalista ocultan para mantener sus privilegios y el status actual. Queremos servir y ayudarnos de todos los grupos, colectivos y personas que trabajan por cambiar este mundo en una perspectiva radicalmente diferente, más justa, igualitaria y equilibrada social y ecológicamente. Es nuestro objetivo contar con la participación y colaboración de todos vosotros para que Rebelión sea un espacio serio, riguroso y actualizado en la difusión de noticias.' # noqa
|
||||
url = 'http://www.rebelion.org'
|
||||
language = 'es'
|
||||
tags = 'contrainformación, información alternativa'
|
||||
oldest_article = 1
|
||||
remove_empty_feeds = True
|
||||
encoding = 'latin1'
|
||||
keep_only_tags = [
|
||||
{'name': 'div', 'attrs': {'id': 'CuerpoNoticia'}}
|
||||
]
|
||||
no_stylesheets = True
|
||||
extra_css = '.autor {font-style: italic;} .titulo {font-size: 150%;} .titulo, .pretitulo {text-align: center;} #TextoNoticia {text-align:justify;} .autor, .fuente, .entradilla {font-size: 90%; text-align: left;}' # noqa
|
||||
|
||||
feeds = [
|
||||
(u'Titulares del día', u'http://www.rebelion.org/rss_portada.php'),
|
||||
]
|
||||
|
||||
# See http://www.mobileread.com/forums/showthread.php?t=174501
|
||||
def print_version(self, url):
|
||||
id = re.compile(r'\d*$').search(url).group()
|
||||
return u'http://www.rebelion.org/noticia.php?id=%s' % id
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '11 December 2010, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__description__ = 'Entertainment guide from Aragon'
|
||||
__version__ = 'v0.01'
|
||||
__date__ = '11, December 2010'
|
||||
'''
|
||||
[url]http://www.redaragon.es/[/url]
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class heraldo(BasicNewsRecipe):
|
||||
__author__ = 'desUBIKado'
|
||||
description = u'Guia de ocio desde Aragon'
|
||||
title = u'RedAragon'
|
||||
publisher = 'Grupo Z'
|
||||
category = 'Concerts, Movies, Entertainment news'
|
||||
cover_url = 'http://www.redaragon.com/2008_img/logotipo.gif'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'iso-8859-1'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Conciertos', u'http://redaragon.com/rss/agenda.asp?tid=1'),
|
||||
(u'Exposiciones', u'http://redaragon.com/rss/agenda.asp?tid=5'),
|
||||
(u'Teatro', u'http://redaragon.com/rss/agenda.asp?tid=10'),
|
||||
(u'Conferencias', u'http://redaragon.com/rss/agenda.asp?tid=2'),
|
||||
(u'Ferias', u'http://redaragon.com/rss/agenda.asp?tid=6'),
|
||||
(u'Filmotecas/Cineclubs',
|
||||
u'http://redaragon.com/rss/agenda.asp?tid=7'),
|
||||
(u'Presentaciones',
|
||||
u'http://redaragon.com/rss/agenda.asp?tid=9'),
|
||||
(u'Fiestas', u'http://redaragon.com/rss/agenda.asp?tid=11'),
|
||||
(u'Infantil', u'http://redaragon.com/rss/agenda.asp?tid=13'),
|
||||
(u'Otros', u'http://redaragon.com/rss/agenda.asp?tid=8')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'FichaEventoAgenda'})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={
|
||||
'class': ['Comparte', 'CajaAgenda', 'Caja', 'Cintillo']})]
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'id': 'FichaEventoAgenda'})
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'Cintillo'})
|
@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, '
|
||||
'''
|
||||
replicavedetelor.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ReplicaVedetelor(BasicNewsRecipe):
|
||||
title = u'Replica Vedetelor'
|
||||
__author__ = u'Silviu Cotoara'
|
||||
description = u'Ofer\u0103 vedetelor dreptul la replic\u0103'
|
||||
publisher = 'Replica Vedetelor'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Vedete'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'zona-continut'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id': [
|
||||
'lista-imagini']}), dict(name='form', attrs={'id': ['f-trimite-unui-prieten']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='form', attrs={'id': ['f-trimite-unui-prieten']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.replicavedetelor.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,21 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1316862613(BasicNewsRecipe):
|
||||
title = u'Republica'
|
||||
__author__ = 'Manish Bhattarai'
|
||||
description = 'News from the Republica'
|
||||
language = 'en_NP'
|
||||
masthead_url = 'http://blog.nyayahealth.org/wp-content/uploads/2011/03/myrepublica1.gif'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://www.myrepublica.com/repub_front.jpg'
|
||||
feeds = [
|
||||
(u'Political Affairs', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=14'),
|
||||
(u'Business & Economy', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=15'),
|
||||
(u'International', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=21'),
|
||||
|
||||
(u'Social Issues', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=16'),
|
||||
(u'Sports', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=18'),
|
||||
(u'Lifestyle', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=17')]
|
@ -1,70 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
republika.co.yu
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Republika(BasicNewsRecipe):
|
||||
title = 'Republika'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Glasilo gradjanskog samooslobadjanja. Protiv stihije straha, mrznje i nasilja'
|
||||
publisher = ' Zadruga Res Publica'
|
||||
category = 'news, politics, Serbia'
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.republika.co.yu/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .naslov{font-size: x-large; font-weight: bold} .autor{font-size: small; font-weight: bold} ' # noqa
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class': 'naslov'}), dict(attrs={'class': 'text1'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])]
|
||||
|
||||
feeds = [(u'Svi clanci', INDEX)]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
||||
(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class': 'naslovLink'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title': title, 'date': '', 'url': url, 'description': ''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
@ -1,22 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ReutersJa(BasicNewsRecipe):
|
||||
|
||||
title = 'Reuters(Japan)'
|
||||
description = 'Global news in Japanese'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
use_embedded_content = False
|
||||
language = 'ja'
|
||||
max_articles_per_feed = 10
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(
|
||||
'Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'),
|
||||
('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'),
|
||||
('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'),
|
||||
('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'),
|
||||
('Oddly Enough News',
|
||||
'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml')
|
||||
]
|
@ -1,57 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://www.revistabla.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = 'Revista Bla'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Moda | Uruguay'
|
||||
language = 'es_UY'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['body_container'])]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={
|
||||
'class': ['date_text', 'comments', 'form_section', 'share_it']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
|
||||
dict(name='p', attrs={'class': 'FacebookLikeButton'}),
|
||||
dict(name=['object', 'link'])]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Articulos', u'http://www.revistabla.com/feed/')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.revistabla.com'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div', attrs={'class': 'header_right'})
|
||||
if link_item:
|
||||
cover_url = link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|