Merge from trunk

This commit is contained in:
Charles Haley
2011-10-02 08:09:01 +02:00
199 changed files with 147109 additions and 143638 deletions
+91
View File
@@ -19,6 +19,97 @@
# new recipes:
# - title:
- version: 0.8.21
date: 2011-09-30
new features:
- title: "A Tips and Tricks blog at http://blog.calibre-ebook.com to introduce less well known calibre features in a simple way"
- title: "News download: Add list of articles in the downloaded issue to the comments metadata of the generated ebook. Makes it possible to search for a particular article in the calibre library."
ticket: [851717]
- title: "Toolbar buttons: You can now also right click the buttons to bring the popup of extra actions, in addition to clicking the small arrow next to the button."
- title: "Amazon metadata download plugin: Add option to download metadata from amazon.es"
- title: Driver for Vizio and iRobot A9 Android tablets
tickets: [854408,862175]
- title: "When switching to/starting with a library with a corrupted database, offer the user the option of rebuilding the database instead of erroring out."
- title: "Template language: Add list_equals function"
- title: "Add a special output profile for the PocketBook 900 as it does not resize images correctly by itself"
bug fixes:
- title: "Fix regression that cause PDF Output to generate very large files"
- title: Fix Title Sort field not being displayed in Book details panel
- title: Prevent renaming of languages in the Tag browser
tickets: [860943]
- title: "Get books: Fix getting price from Foyles"
- title: "Content server: When a search matches no queries, do not show an error message"
- title: "ODT Input: Add workaround for ADE to fix centering of block level images when converting to EPUB"
tickets: [859343]
- title: "Content server: When WSGI embedding fix handling of empty URL"
- title: "RTF Input: Fix spurious spaces inserted after some unicode characters"
tickets: [851215]
- title: "Fix regression that broke clicking on the first letter of author names in the Tag Browser when grouped"
tickets: [860615]
- title: "Fix reading metadata from filenames when the author regexp does not match anything"
- title: "Fix incorrect display of the month September in Finnish calibre"
tickets: [858737]
- title: "Do not delete the file when the user tries to add a format to a book from a file already in the books directory"
tickets: [856158]
- title: "Fix regression that broke customization of Kobo device plugin"
- title: "Allow user defined templates to be used in save to disk"
improved recipes:
- Read It Later
- American Spectator
- Sydney Morning Herald
- Chicago Tribune
- American Prospect
- DNA India
- Times of India
- Kurier
- xkcd
- Cnet
new recipes:
- title: Various Colombian news sources
author: BIGO-CAVA
- title: Gosc Niedzielny
author: Piotr Kontek
- title: Leipzer Volkszeitung
author: a.peter
- title: Folha de Sao Paulo (full edition)
author: fluzao
- title: Den of Geek
author: Jaded
- title: Republica
author: Manish Bhattarai
- title: Sign on San Diego
author: Jay Kindle
- version: 0.8.20
date: 2011-09-23
+7 -16
View File
@@ -18,25 +18,16 @@ class TheAmericanSpectator(BasicNewsRecipe):
use_embedded_content = False
language = 'en'
INDEX = 'http://spectator.org'
conversion_options = {
auto_cleanup = True
encoding = 'utf-8'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'post inner'})
,dict(name='div', attrs={'class':'author-bio'})
]
remove_tags = [
dict(name='object')
,dict(name='div', attrs={'class':['col3','post-options','social']})
,dict(name='p' , attrs={'class':['letter-editor','meta']})
]
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
def get_cover_url(self):
@@ -48,10 +39,10 @@ class TheAmericanSpectator(BasicNewsRecipe):
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
cover_url = self.INDEX + link_item2.img['src']
return cover_url
def print_version(self, url):
return url + '/print'
def get_article_url(self, article):
return article.get('guid', None)
+11 -19
View File
@@ -1,26 +1,18 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AmericanProspect(BasicNewsRecipe):
title = u'American Prospect'
__author__ = u'Michael Heinz'
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
__author__ = u'Michael Heinz, a.peter'
version = 2
preprocess_regexps = [
(re.compile(r'<body.*?<div class="pad_10L10R">', re.DOTALL|re.IGNORECASE), lambda match: '<body><div>'),
(re.compile(r'</div>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</div></body>'),
(re.compile('\r'),lambda match: ''),
(re.compile(r'<!-- .+? -->', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<link .+?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<script.*?</script>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<noscript.*?</noscript>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<meta .*?/>', re.DOTALL|re.IGNORECASE), lambda match: ''),
]
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
+18 -11
View File
@@ -8,21 +8,25 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
__author__ = 'Kovid Goyal and Sujata Raman'
__author__ = 'Kovid Goyal and Sujata Raman, a.peter'
description = 'Politics, local and business news from Chicago'
language = 'en'
language = 'en'
version = 2
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 1
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
]
remove_tags_after = [ {'class':['photo_article',]} ]
remove_tags_after = [{'class':['photo_article',]}]
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
match_regexps = [r'page=[0-9]+']
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
'''
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@@ -76,8 +80,12 @@ class ChicagoTribune(BasicNewsRecipe):
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def postprocess_html(self, soup, first_fetch):
# Remove the navigation bar. It was kept until now to be able to follow
# the links to further pages. But now we don't need them anymore.
for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
nav.extract()
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
@@ -88,4 +96,3 @@ class ChicagoTribune(BasicNewsRecipe):
return soup
+11
View File
@@ -0,0 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica'
__author__ = 'CAVALENCIA'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
language = 'es_CO'
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]
+1 -3
View File
@@ -2,12 +2,10 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
title = u'Periódico El Colombiano'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = dict(id='enviaTips')
+54
View File
@@ -0,0 +1,54 @@
# coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
language = 'es_CO'
#keep_only_tags = [dict(name='div', id='content')]
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
dict(name='div', attrs={'class':'relpauta'}),
dict(name='div', attrs={'class':'recursosrelacionados'}),
dict(name='div', attrs={'class':'nav_negocios'})]
# dict(name='div', attrs={'class':'tags_playerrecurso'}),
# dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]
+50
View File
@@ -0,0 +1,50 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElMundo02(BasicNewsRecipe):
title = u'Periódico El Mundo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
remove_tags_before = dict(id='miga_pan')
#remove_tags_before = [dict(name='div', attrs={'class':'contenido'})]
remove_tags_after = [dict(name='div', attrs={'class':'cuadro_opciones_new1'})]
#keep_only_tags = [dict(name='div', id='miga_pan')]
remove_tags = [dict(name='div', attrs={'class':'ruta'}),
dict(name='div', attrs={'class':'buscador'}),
dict(name='div', attrs={'class':'iconos'}),
dict(name='div', attrs={'class':'otros_iconos'}),
dict(name='div', attrs={'class':'cuadro_opciones_new1'}),
dict(name='div', attrs={'class':'otras_noticias'}),
dict(name='div', attrs={'class':'notas_relacionadas'}),
dict(name='div', attrs={'id':'lateral_2'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
(u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
(u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
(u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
(u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
(u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
(u'Internacional', u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
(u'Servicios Públicos', u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
(u'Infraestructura', u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
(u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
(u'Derechos Humanos', u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
(u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
(u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]
+4 -5
View File
@@ -2,18 +2,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
remove_tags_before = dict(id='fb-root')
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
dict(name='div', attrs={'class':'recomend-art'}),
dict(name='div', attrs={'class':'caja-facebook'}),
dict(name='div', attrs={'class':'caja-twitter'}),
dict(name='div', attrs={'class':'caja-buzz'}),
+96
View File
@@ -0,0 +1,96 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FSP(BasicNewsRecipe):
title = u'Folha de S\xE3o Paulo'
__author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt'
no_stylesheets = True
max_articles_per_feed = 40
remove_javascript = True
needs_subscription = True
remove_tags_before = dict(name='b')
remove_tags = [dict(name='td', attrs={'align':'center'})]
remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
# fixes the problem with the section names
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
# this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True}
# this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
# Indice e Comunicar Erros
preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r''),
(re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r'')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://acesso.uol.com.br/login.html')
br.form = br.forms().next()
br['user'] = self.username
br['pass'] = self.password
br.submit().read()
## if 'Please try again' in raw:
## raise Exception('Your username and password are incorrect')
return br
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
articles = []
section_title = "Preambulo"
for post in soup.findAll('a'):
# if name=True => new section
strpost = str(post)
if strpost.startswith('<a name'):
if articles:
feeds.append((section_title, articles))
self.log()
self.log('--> new section found, creating old section feed: ', section_title)
section_title = post['name']
if section_title in self.section_dict:
section_title = self.section_dict[section_title]
articles = []
self.log('--> new section title: ', section_title)
if strpost.startswith('<a href'):
url = post['href']
if url.startswith('/fsp'):
url = 'http://www1.folha.uol.com.br'+url
title = self.tag_to_string(post)
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
# keeping the front page url
minha_capa = feeds[0][1][1]['url']
# removing the 'Preambulo' section
del feeds[0]
# creating the url for the cover image
coverurl = feeds[0][1][0]['url']
coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
coverurl = coverurl.replace('01.htm', '.jpg')
self.cover_url = coverurl
# inserting the cover page as the first article (nicer for kindle users)
feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
return feeds
+112
View File
@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
import re
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
temp_files = []
articles_are_obfuscated = True
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
source = br.response().read()
page = self.index_to_soup(source)
main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
title = main_section.find('h2')
info = main_section.find('div', attrs={'class' : 'cf doc_info'})
authors = info.find(attrs={'class':'l'})
article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None:
article = article + '<p>'
article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article = article + '<font size="-2">'
for s in p.findAll('span'):
article = article + self.tag_to_string(s)
article = article + '</font></p>'
else:
article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False
html = unicode(title) + unicode(authors) + unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def find_last_issue(self):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
#szukam zdjęcia i linka do porzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if not first:
break
first = False
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = []
#wstepniak
a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
articles = [
{'title' : self.tag_to_string(a),
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''}
]
feeds.append((u'Wstępniak',articles))
#kategorie
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':
main_block = self.index_to_soup('http://www.gosc.pl' + addr['href'])
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = addr.string
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
}
for a in main_block.findAll('div', attrs={'class':'sr-document'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
}
+34
View File
@@ -0,0 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'
+2 -2
View File
@@ -4,13 +4,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
title = u'Periódico Portafolio Colombia'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
+25 -6
View File
@@ -1,5 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
'''
'''
readitlaterlist.com
'''
@@ -9,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Readitlater(BasicNewsRecipe):
title = 'Read It Later'
__author__ = 'Darko Miletic'
__author__ = 'Darko Miletic, Przemyslaw Kryger'
description = '''Personalized news feeds. Go to readitlaterlist.com to
setup up your news. Fill in your account
username, and optionally you can add password.'''
@@ -23,9 +26,6 @@ class Readitlater(BasicNewsRecipe):
INDEX = u'http://readitlaterlist.com'
LOGIN = INDEX + u'/l'
feeds = [(u'Unread articles' , INDEX + u'/unread')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None:
@@ -37,12 +37,31 @@ class Readitlater(BasicNewsRecipe):
br.submit()
return br
def get_feeds(self):
self.report_progress(0, ('Fetching list of feeds...'))
lfeeds = []
i = 1
feedurl = self.INDEX + u'/unread/1'
while True:
title = u'Unread articles, page ' + str(i)
lfeeds.append((title, feedurl))
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
i += 1
soup = self.index_to_soup(feedurl)
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
if ritem is None:
break
feedurl = self.INDEX + ritem['href']
if self.test:
return lfeeds[:2]
return lfeeds
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul',attrs={'id':'list'})
+11
View File
@@ -0,0 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341570(BasicNewsRecipe):
title = u'Revista Semana'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]
+13 -13
View File
@@ -22,10 +22,10 @@ class Smh_au(BasicNewsRecipe):
remove_empty_feeds = True
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
publication_type = 'newspaper'
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
"""
conversion_options = {
@@ -35,16 +35,16 @@ class Smh_au(BasicNewsRecipe):
, 'language' : language
}
remove_tags = [
dict(name='div', attrs={'id':['googleAds','moreGoogleAds','comments']})
,dict(name='div', attrs={'class':'cT-imageMultimedia'})
,dict(name=['object','embed','iframe'])
]
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
remove_tags = [
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
remove_tags = [
dict(name='div',
attrs={'id':['googleAds','moreGoogleAds','comments',
'video-player-content']}),
dict(name='div', attrs={'class':'cT-imageMultimedia'}),
dict(name=['object','embed','iframe']),
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
]
remove_attributes = ['width','height','lang']
@@ -84,4 +84,4 @@ class Smh_au(BasicNewsRecipe):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
@@ -285,6 +285,15 @@ function booklist(hide_sort) {
first_page();
}
function search_result() {
var test = $("#booklist #page0").html();
if (!test) {
$("#booklist").html("No books found matching this query");
return;
}
booklist();
}
function show_details(a_dom) {
var book = $(a_dom).closest('div.summary');
var bd = $('#book_details_dialog');
+9 -4
View File
@@ -2,7 +2,7 @@
let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
python << EOFPY
import os
import os, sys
import vipy
@@ -11,15 +11,20 @@ project_dir = os.path.dirname(source_file)
src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
base_dir = os.path.join(src_dir, 'calibre')
sys.path.insert(0, src_dir)
sys.resources_location = os.path.join(project_dir, 'resources')
sys.extensions_location = os.path.join(base_dir, 'plugins')
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=base_dir)
project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_')
vipy.session.add_content_browser('.r', ',r', 'Recipe',
vipy.session.add_content_browser('<leader>r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY
nmap \log :enew<CR>:read ! bzr log -l 500 ../.. <CR>:e ../../Changelog.yaml<CR>:e constants.py<CR>
nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>
+5
View File
@@ -177,6 +177,7 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \
poppler_error = None
poppler_cflags = ['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
if not poppler_inc_dirs or not os.path.exists(
os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
poppler_error = \
@@ -186,6 +187,10 @@ if not poppler_inc_dirs or not os.path.exists(
' the poppler XPDF headers. If your distro does not '
' include them you will have to re-compile poppler '
' by hand with --enable-xpdf-headers')
else:
lh = os.path.join(poppler_inc_dirs[0], 'Link.h')
if 'class AnnotLink' not in open(lh, 'rb').read():
poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE')
magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
+10 -10
View File
@@ -11,15 +11,15 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
icu_lib_dirs
from setup import Command, islinux, isbsd, isosx, SRC, iswindows
from setup.build_environment import (fc_inc, fc_lib, chmlib_inc_dirs,
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc,
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE,
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk,
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs,
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs,
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
icu_lib_dirs, poppler_cflags)
MT
isunix = islinux or isosx or isbsd
@@ -114,7 +114,7 @@ extensions = [
lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs+ft_lib_dirs+jpg_lib_dirs,
inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
error=reflow_error,
cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
cflags=poppler_cflags
),
Extension('lzx',
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1644 -1644
View File
File diff suppressed because it is too large Load Diff
+1492 -1492
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1613 -1613
View File
File diff suppressed because it is too large Load Diff
+2136 -2136
View File
File diff suppressed because it is too large Load Diff
+1406 -1406
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1411 -1411
View File
File diff suppressed because it is too large Load Diff
+1407 -1407
View File
File diff suppressed because it is too large Load Diff
+1406 -1406
View File
File diff suppressed because it is too large Load Diff
+1407 -1407
View File
File diff suppressed because it is too large Load Diff
+1395 -1395
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1400 -1400
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1401 -1401
View File
File diff suppressed because it is too large Load Diff
+2243 -2243
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+2575 -2575
View File
File diff suppressed because it is too large Load Diff
+1455 -1455
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1445 -1445
View File
File diff suppressed because it is too large Load Diff
+1400 -1400
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1394 -1394
View File
File diff suppressed because it is too large Load Diff
+1394 -1394
View File
File diff suppressed because it is too large Load Diff
+1926 -1926
View File
File diff suppressed because it is too large Load Diff
+1409 -1409
View File
File diff suppressed because it is too large Load Diff
+2693 -2693
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1522 -1522
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1401 -1401
View File
File diff suppressed because it is too large Load Diff
+2786 -2786
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1408 -1408
View File
File diff suppressed because it is too large Load Diff
+1400 -1400
View File
File diff suppressed because it is too large Load Diff
+1753 -1753
View File
File diff suppressed because it is too large Load Diff
+1398 -1398
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1401 -1401
View File
File diff suppressed because it is too large Load Diff
+2289 -2289
View File
File diff suppressed because it is too large Load Diff
+2737 -2737
View File
File diff suppressed because it is too large Load Diff
+1411 -1411
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1409 -1409
View File
File diff suppressed because it is too large Load Diff
+1407 -1407
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1400 -1400
View File
File diff suppressed because it is too large Load Diff
+1411 -1411
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1402 -1402
View File
File diff suppressed because it is too large Load Diff
+1411 -1411
View File
File diff suppressed because it is too large Load Diff
+1411 -1411
View File
File diff suppressed because it is too large Load Diff
+1402 -1402
View File
File diff suppressed because it is too large Load Diff
+2724 -2724
View File
File diff suppressed because it is too large Load Diff
+1433 -1433
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1399 -1399
View File
File diff suppressed because it is too large Load Diff
+1448 -1448
View File
File diff suppressed because it is too large Load Diff
+1402 -1402
View File
File diff suppressed because it is too large Load Diff
+1433 -1433
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1408 -1408
View File
File diff suppressed because it is too large Load Diff
+1404 -1404
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff
+1410 -1410
View File
File diff suppressed because it is too large Load Diff
+1397 -1397
View File
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More