Merge from trunk

This commit is contained in:
Charles Haley 2011-10-02 08:09:01 +02:00
commit 2e4533811a
199 changed files with 147109 additions and 143638 deletions

View File

@ -19,6 +19,97 @@
# new recipes:
# - title:
- version: 0.8.21
date: 2011-09-30
new features:
- title: "A Tips and Tricks blog at http://blog.calibre-ebook.com to introduce less well known calibre features in a simple way"
- title: "News download: Add list of articles in the downloaded issue to the comments metadata of the generated ebook. Makes it possible to search for a particular article in the calibre library."
ticket: [851717]
- title: "Toolbar buttons: You can now also right click the buttons to bring the popup of extra actions, in addition to clicking the small arrow next to the button."
- title: "Amazon metadata download plugin: Add option to download metadata from amazon.es"
- title: Driver for Vizio and iRobot A9 Android tablets
tickets: [854408,862175]
- title: "When switching to/starting with a library with a corrupted database, offer the user the option of rebuilding the database instead of erroring out."
- title: "Template language: Add list_equals function"
- title: "Add a special output profile for the PocketBook 900 as it does not resize images correctly by itself"
bug fixes:
- title: "Fix regression that cause PDF Output to generate very large files"
- title: Fix Title Sort field not being displayed in Book details panel
- title: Prevent renaming of languages in the Tag browser
tickets: [860943]
- title: "Get books: Fix getting price from Foyles"
- title: "Content server: When a search matches no queries, do not show an error message"
- title: "ODT Input: Add workaround for ADE to fix centering of block level images when converting to EPUB"
tickets: [859343]
- title: "Content server: When WSGI embedding fix handling of empty URL"
- title: "RTF Input: Fix spurious spaces inserted after some unicode characters"
tickets: [851215]
- title: "Fix regression that broke clicking on the first letter of author names in the Tag Browser when grouped"
tickets: [860615]
- title: "Fix reading metadata from filenames when the author regexp does not match anything"
- title: "Fix incorrect display of the month September in Finnish calibre"
tickets: [858737]
- title: "Do not delete the file when the user tries to add a format to a book from a file already in the books directory"
tickets: [856158]
- title: "Fix regression that broke customization of Kobo device plugin"
- title: "Allow user defined templates to be used in save to disk"
improved recipes:
- Read It Later
- American Spectator
- Sydney Morning Herald
- Chicago Tribune
- American Prospect
- DNA India
- Times of India
- Kurier
- xkcd
- Cnet
new recipes:
- title: Various Colombian news sources
author: BIGO-CAVA
- title: Gosc Niedzielny
author: Piotr Kontek
- title: Leipzer Volkszeitung
author: a.peter
- title: Folha de Sao Paulo (full edition)
author: fluzao
- title: Den of Geek
author: Jaded
- title: Republica
author: Manish Bhattarai
- title: Sign on San Diego
author: Jay Kindle
- version: 0.8.20
date: 2011-09-23

View File

@ -18,25 +18,16 @@ class TheAmericanSpectator(BasicNewsRecipe):
use_embedded_content = False
language = 'en'
INDEX = 'http://spectator.org'
conversion_options = {
auto_cleanup = True
encoding = 'utf-8'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'post inner'})
,dict(name='div', attrs={'class':'author-bio'})
]
remove_tags = [
dict(name='object')
,dict(name='div', attrs={'class':['col3','post-options','social']})
,dict(name='p' , attrs={'class':['letter-editor','meta']})
]
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
def get_cover_url(self):
@ -48,10 +39,10 @@ class TheAmericanSpectator(BasicNewsRecipe):
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
cover_url = self.INDEX + link_item2.img['src']
return cover_url
def print_version(self, url):
return url + '/print'
def get_article_url(self, article):
return article.get('guid', None)

View File

@ -1,26 +1,18 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AmericanProspect(BasicNewsRecipe):
title = u'American Prospect'
__author__ = u'Michael Heinz'
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
__author__ = u'Michael Heinz, a.peter'
version = 2
preprocess_regexps = [
(re.compile(r'<body.*?<div class="pad_10L10R">', re.DOTALL|re.IGNORECASE), lambda match: '<body><div>'),
(re.compile(r'</div>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</div></body>'),
(re.compile('\r'),lambda match: ''),
(re.compile(r'<!-- .+? -->', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<link .+?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<script.*?</script>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<noscript.*?</noscript>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<meta .*?/>', re.DOTALL|re.IGNORECASE), lambda match: ''),
]
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]

View File

@ -8,21 +8,25 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
__author__ = 'Kovid Goyal and Sujata Raman'
__author__ = 'Kovid Goyal and Sujata Raman, a.peter'
description = 'Politics, local and business news from Chicago'
language = 'en'
language = 'en'
version = 2
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 1
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
]
remove_tags_after = [ {'class':['photo_article',]} ]
remove_tags_after = [{'class':['photo_article',]}]
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
match_regexps = [r'page=[0-9]+']
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
'''
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@ -76,8 +80,12 @@ class ChicagoTribune(BasicNewsRecipe):
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def postprocess_html(self, soup, first_fetch):
# Remove the navigation bar. It was kept until now to be able to follow
# the links to further pages. But now we don't need them anymore.
for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
nav.extract()
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
@ -88,4 +96,3 @@ class ChicagoTribune(BasicNewsRecipe):
return soup

View File

@ -0,0 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341449(BasicNewsRecipe):
title = u'Diario La Republica'
__author__ = 'CAVALENCIA'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
language = 'es_CO'
feeds = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]

View File

@ -2,12 +2,10 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311790237(BasicNewsRecipe):
title = u'Periódico El Colombiano'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = dict(id='enviaTips')

View File

@ -0,0 +1,54 @@
# coding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElEspectador(BasicNewsRecipe):
title = u'Periódico el Espectador'
__author__ = 'BIGO-CAVA'
cover_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='content')
remove_tags_after = [dict(name='div', attrs={'class':'paginacion'})]
language = 'es_CO'
#keep_only_tags = [dict(name='div', id='content')]
remove_tags = [dict(name='div', attrs={'class':'herramientas_nota'}),
dict(name='div', attrs={'class':'relpauta'}),
dict(name='div', attrs={'class':'recursosrelacionados'}),
dict(name='div', attrs={'class':'nav_negocios'})]
# dict(name='div', attrs={'class':'tags_playerrecurso'}),
# dict(name='div', attrs={'class':'ico-mail2'}),
# dict(name='div', attrs={'id':'caja-instapaper'}),
# dict(name='div', attrs={'class':'modulo herramientas'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Política ', u' http://www.elespectador.com/noticias/politica/feed'),
(u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
(u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
(u'Economía', u'http://www.elespectador.com/economia/feed'),
(u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
(u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
(u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
(u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
(u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
(u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
(u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
(u'Deportes', u'http://www.elespectador.com/deportes/feed'),
(u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
(u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
(u'Opinión', u'http://www.elespectador.com/opinion/feed'),
(u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]

View File

@ -0,0 +1,50 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElMundo02(BasicNewsRecipe):
title = u'Periódico El Mundo'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
remove_tags_before = dict(id='miga_pan')
#remove_tags_before = [dict(name='div', attrs={'class':'contenido'})]
remove_tags_after = [dict(name='div', attrs={'class':'cuadro_opciones_new1'})]
#keep_only_tags = [dict(name='div', id='miga_pan')]
remove_tags = [dict(name='div', attrs={'class':'ruta'}),
dict(name='div', attrs={'class':'buscador'}),
dict(name='div', attrs={'class':'iconos'}),
dict(name='div', attrs={'class':'otros_iconos'}),
dict(name='div', attrs={'class':'cuadro_opciones_new1'}),
dict(name='div', attrs={'class':'otras_noticias'}),
dict(name='div', attrs={'class':'notas_relacionadas'}),
dict(name='div', attrs={'id':'lateral_2'})]
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
publication_type = 'newspaper'
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
(u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
(u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
(u'Política ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
(u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
(u'Nacional ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
(u'Internacional', u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
(u'Servicios Públicos', u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
(u'Infraestructura', u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
(u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
(u'Derechos Humanos', u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
(u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
(u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]

View File

@ -2,18 +2,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColombiaElTiempo02(BasicNewsRecipe):
title = u'Periódico el Tiempo'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
remove_tags_before = dict(id='fb-root')
#remove_tags_before = dict(id='fb-root')
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'modulo reporte'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
remove_tags = [dict(name='div', attrs={'class':'social-media'}),
dict(name='div', attrs={'class':'recomend-art'}),
dict(name='div', attrs={'class':'caja-facebook'}),
dict(name='div', attrs={'class':'caja-twitter'}),
dict(name='div', attrs={'class':'caja-buzz'}),

View File

@ -0,0 +1,96 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FSP(BasicNewsRecipe):
title = u'Folha de S\xE3o Paulo'
__author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt'
no_stylesheets = True
max_articles_per_feed = 40
remove_javascript = True
needs_subscription = True
remove_tags_before = dict(name='b')
remove_tags = [dict(name='td', attrs={'align':'center'})]
remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
# fixes the problem with the section names
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
# this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True}
# this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
# Indice e Comunicar Erros
preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r''),
(re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r'')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://acesso.uol.com.br/login.html')
br.form = br.forms().next()
br['user'] = self.username
br['pass'] = self.password
br.submit().read()
## if 'Please try again' in raw:
## raise Exception('Your username and password are incorrect')
return br
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
articles = []
section_title = "Preambulo"
for post in soup.findAll('a'):
# if name=True => new section
strpost = str(post)
if strpost.startswith('<a name'):
if articles:
feeds.append((section_title, articles))
self.log()
self.log('--> new section found, creating old section feed: ', section_title)
section_title = post['name']
if section_title in self.section_dict:
section_title = self.section_dict[section_title]
articles = []
self.log('--> new section title: ', section_title)
if strpost.startswith('<a href'):
url = post['href']
if url.startswith('/fsp'):
url = 'http://www1.folha.uol.com.br'+url
title = self.tag_to_string(post)
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
# keeping the front page url
minha_capa = feeds[0][1][1]['url']
# removing the 'Preambulo' section
del feeds[0]
# creating the url for the cover image
coverurl = feeds[0][1][0]['url']
coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
coverurl = coverurl.replace('01.htm', '.jpg')
self.cover_url = coverurl
# inserting the cover page as the first article (nicer for kindle users)
feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
return feeds

View File

@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
import re
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
temp_files = []
articles_are_obfuscated = True
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
source = br.response().read()
page = self.index_to_soup(source)
main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
title = main_section.find('h2')
info = main_section.find('div', attrs={'class' : 'cf doc_info'})
authors = info.find(attrs={'class':'l'})
article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None:
article = article + '<p>'
article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article = article + '<font size="-2">'
for s in p.findAll('span'):
article = article + self.tag_to_string(s)
article = article + '</font></p>'
else:
article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False
html = unicode(title) + unicode(authors) + unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def find_last_issue(self):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
#szukam zdjęcia i linka do porzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if not first:
break
first = False
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = []
#wstepniak
a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
articles = [
{'title' : self.tag_to_string(a),
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''}
]
feeds.append((u'Wstępniak',articles))
#kategorie
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':
main_block = self.index_to_soup('http://www.gosc.pl' + addr['href'])
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = addr.string
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
}
for a in main_block.findAll('div', attrs={'class':'sr-document'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
}

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'

View File

@ -4,13 +4,13 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311799898(BasicNewsRecipe):
title = u'Periódico Portafolio Colombia'
language = 'es_CO'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
remove_tags_before = dict(id='contenidoArt')
remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})]
keep_only_tags = [dict(name='div', id='contenidoArt')]
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True

View File

@ -1,5 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
'''
'''
readitlaterlist.com
'''
@ -9,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Readitlater(BasicNewsRecipe):
title = 'Read It Later'
__author__ = 'Darko Miletic'
__author__ = 'Darko Miletic, Przemyslaw Kryger'
description = '''Personalized news feeds. Go to readitlaterlist.com to
setup up your news. Fill in your account
username, and optionally you can add password.'''
@ -23,9 +26,6 @@ class Readitlater(BasicNewsRecipe):
INDEX = u'http://readitlaterlist.com'
LOGIN = INDEX + u'/l'
feeds = [(u'Unread articles' , INDEX + u'/unread')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None:
@ -37,12 +37,31 @@ class Readitlater(BasicNewsRecipe):
br.submit()
return br
def get_feeds(self):
self.report_progress(0, ('Fetching list of feeds...'))
lfeeds = []
i = 1
feedurl = self.INDEX + u'/unread/1'
while True:
title = u'Unread articles, page ' + str(i)
lfeeds.append((title, feedurl))
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
i += 1
soup = self.index_to_soup(feedurl)
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
if ritem is None:
break
feedurl = self.INDEX + ritem['href']
if self.test:
return lfeeds[:2]
return lfeeds
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul',attrs={'id':'list'})

View File

@ -0,0 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1317341570(BasicNewsRecipe):
title = u'Revista Semana'
__author__ = 'BIGO-CAVA'
language = 'es_CO'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]

View File

@ -22,10 +22,10 @@ class Smh_au(BasicNewsRecipe):
remove_empty_feeds = True
masthead_url = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
publication_type = 'newspaper'
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
extra_css = """
h1{font-family: Georgia,"Times New Roman",Times,serif }
body{font-family: Arial,Helvetica,sans-serif}
.cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
"""
conversion_options = {
@ -35,16 +35,16 @@ class Smh_au(BasicNewsRecipe):
, 'language' : language
}
remove_tags = [
dict(name='div', attrs={'id':['googleAds','moreGoogleAds','comments']})
,dict(name='div', attrs={'class':'cT-imageMultimedia'})
,dict(name=['object','embed','iframe'])
]
remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
keep_only_tags = [dict(name='div',attrs={'id':'content'})]
remove_tags = [
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
remove_tags = [
dict(name='div',
attrs={'id':['googleAds','moreGoogleAds','comments',
'video-player-content']}),
dict(name='div', attrs={'class':'cT-imageMultimedia'}),
dict(name=['object','embed','iframe']),
dict(attrs={'class':'hidden'}),
dict(name=['link','meta','base','embed','object','iframe'])
]
remove_attributes = ['width','height','lang']
@ -84,4 +84,4 @@ class Smh_au(BasicNewsRecipe):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -285,6 +285,15 @@ function booklist(hide_sort) {
first_page();
}
function search_result() {
var test = $("#booklist #page0").html();
if (!test) {
$("#booklist").html("No books found matching this query");
return;
}
booklist();
}
function show_details(a_dom) {
var book = $(a_dom).closest('div.summary');
var bd = $('#book_details_dialog');

View File

@ -2,7 +2,7 @@
let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
python << EOFPY
import os
import os, sys
import vipy
@ -11,15 +11,20 @@ project_dir = os.path.dirname(source_file)
src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
base_dir = os.path.join(src_dir, 'calibre')
sys.path.insert(0, src_dir)
sys.resources_location = os.path.join(project_dir, 'resources')
sys.extensions_location = os.path.join(base_dir, 'plugins')
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=base_dir)
project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_')
vipy.session.add_content_browser('.r', ',r', 'Recipe',
vipy.session.add_content_browser('<leader>r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY
nmap \log :enew<CR>:read ! bzr log -l 500 ../.. <CR>:e ../../Changelog.yaml<CR>:e constants.py<CR>
nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>

View File

@ -177,6 +177,7 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \
poppler_error = None
poppler_cflags = ['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
if not poppler_inc_dirs or not os.path.exists(
os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
poppler_error = \
@ -186,6 +187,10 @@ if not poppler_inc_dirs or not os.path.exists(
' the poppler XPDF headers. If your distro does not '
' include them you will have to re-compile poppler '
' by hand with --enable-xpdf-headers')
else:
lh = os.path.join(poppler_inc_dirs[0], 'Link.h')
if 'class AnnotLink' not in open(lh, 'rb').read():
poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE')
magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],

View File

@ -11,15 +11,15 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
icu_lib_dirs
from setup import Command, islinux, isbsd, isosx, SRC, iswindows
from setup.build_environment import (fc_inc, fc_lib, chmlib_inc_dirs,
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc,
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE,
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk,
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs,
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs,
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
icu_lib_dirs, poppler_cflags)
MT
isunix = islinux or isosx or isbsd
@ -114,7 +114,7 @@ extensions = [
lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs+ft_lib_dirs+jpg_lib_dirs,
inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
error=reflow_error,
cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
cflags=poppler_cflags
),
Extension('lzx',

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More