Sync to trunk.

This commit is contained in:
John Schember 2010-01-11 06:09:02 -05:00
commit 5f89c14c19
37 changed files with 7359 additions and 6265 deletions

View File

@ -4,6 +4,98 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
- version: 0.6.33
date: 2010-01-10
new features:
- title: "The e-book viewer now has built-in dictionary lookup"
type: major
description: >
"You can now right click on a word to lookup its meaning in an online dictionary.
calibre uses the public domain dictionaries available at dict.org"
- title: "RTF Output: Add support for unicode characters"
- title: "Allow the metadata that is used to create collections when sending books to SONY readers to be customized"
description: >
"By default collections are created on the SONY reader corresponding to series and tags. Now you can add other
metadata fields, like author, or remove ones you dislike, by going to Preferences->Plugins and customizing
the device interface plugin corresponding to your device."
- title: "TXT Input: Add option to disable insertion of Table of Contents into output text."
tickets: [4506]
- title: "Remember state of cover and tag browsing views on restart"
bug fixes:
- title: "EPUB Output: Add id attributes to anchors that have only name, as Adobe Digital Editions apparently can't handle only name attributes"
tickets: [4474]
- title: "Conversion pipeline: Handle the list-style shortcut CSS property correctly"
tickets: [4418]
- title: "EPUB Output: Fix generation of comics with PNG images for the Nook"
tickets: [4492]
- title: "Fix bug that could prevent loading of some custom plugins"
tickets: [4414]
- title: "News download: Handle URLs with both commas and non-ASCII characters correctly"
- title: "Ignore invalid metadata when adding books from command line instead of erroring out"
tickets: [4496]
- title: Fix remove header/footer assistant when converting HTML files
tickets: [4484]
- title: "Workaround for browsers like iPhone Safari that send extra arguments when downloading books from the content server"
- title: "Content server: Recognize the HTC HD2 as a mobile browser and add series information to the mobile version of the web page."
tickets: [4488]
- title: "FB2 Output: Properly escape metadata before inserting it into the file"
- title: "Don't accept rich text in the comments fields of the edit meta information dialog"
- title: "Fix device detection for Cybook gen 3 with firmware 2.0"
- title: "Send to device: Use default save template when driver specific one is empty or unspecified"
- title: "Fix framework for running post-process and preprocess file type plugins"
- title: "Linux develop/install commands: Use bindir instead of staging bindir in the launchers"
tickets: [4437]
- title: "E-book viewer: Sanitize file names when unzipping EPUB files"
tickets: [4426]
new recipes:
- title: The Escapist
author: Lorenzo Vigentini
- title: Washington Post cartoons
author: kwetal
- title: The Dallas Morning News
author: Krittika Goyal
- title: sg.hu
author: davotibarna
- title: The New Zealand Herald
author: Krittika Goyal
- title: Nature News
author: Krittika Goyal
improved recipes:
- El Pais
- The Economist
- The New York Times
- Entrepreneur Magazine
- CNN
- version: 0.6.32
date: 2010-01-03

Binary file not shown.

After

Width:  |  Height:  |  Size: 311 B

View File

@ -1,27 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '10, January 2010'
__description__ = 'Italian daily newspaper (english version)'
'''
www.corriere.it/english
http://www.corriere.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Corriere_en(BasicNewsRecipe):
title = 'Corriere della Sera in English'
__author__ = 'Darko Miletic'
description = 'News from Milan and Italy'
oldest_article = 15
publisher = 'Corriere della Sera'
category = 'news, politics, Italy'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = 'en'
class ilCorriere(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
description = 'Italian daily newspaper (english version)'
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
title = u'Il Corriere della sera (english) '
publisher = 'RCS Digital'
category = 'News, politics, culture, economy, general interest'
language = 'en'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
html2lrf_options = [
'--comment', description
@ -35,12 +43,13 @@ class Corriere_en(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
remove_tags = [
dict(name=['base','object','link','embed','img'])
,dict(name='div', attrs={'class':'news-goback'})
,dict(name='ul', attrs={'class':'toolbar'})
dict(name=['base','object','link','embed']),
dict(name='div', attrs={'class':'news-goback'}),
dict(name='ul', attrs={'class':'toolbar'})
]
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
feeds = [
(u'News' , u'http://www.corriere.it/rss/english.xml' )
]

View File

@ -1,26 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '10, January 2010'
__description__ = 'Italian daily newspaper'
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.corriere.it
http://www.corriere.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Corriere_it(BasicNewsRecipe):
title = 'Corriere della Sera'
__author__ = 'Darko Miletic'
description = 'News from Milan and Italy'
oldest_article = 7
publisher = 'Corriere della Sera'
category = 'news, politics, Italy'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = 'it'
class ilCorriere(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
description = 'Italian daily newspaper'
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
title = u'Il Corriere della sera '
publisher = 'RCS Digital'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
html2lrf_options = [
'--comment', description
@ -34,23 +44,24 @@ class Corriere_it(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
remove_tags = [
dict(name=['base','object','link','embed','img'])
,dict(name='div', attrs={'class':'news-goback'})
,dict(name='ul', attrs={'class':'toolbar'})
dict(name=['base','object','link','embed']),
dict(name='div', attrs={'class':'news-goback'}),
dict(name='ul', attrs={'class':'toolbar'})
]
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
feeds = [
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' ),
(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml'),
(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' ),
(u'Politica' , u'http://www.corriere.it/rss/politica.xml' ),
(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' ),
(u'Economia' , u'http://www.corriere.it/rss/economia.xml' ),
(u'Cultura' , u'http://www.corriere.it/rss/cultura.xml' ),
(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' ),
(u'Salute' , u'http://www.corriere.it/rss/salute.xml' ),
(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml'),
(u'Cinema e TV', u'http://www.corriere.it/rss/cinema.xml' ),
(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
]

View File

@ -106,7 +106,7 @@ class Economist(BasicNewsRecipe):
return ans
def eco_find_image_tables(self, soup):
for x in soup.findAll('table', align='right'):
for x in soup.findAll('table', align=['right', 'center']):
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
yield x

View File

@ -107,7 +107,7 @@ class Economist(BasicNewsRecipe):
self.log.debug(tb)
def eco_find_image_tables(self, soup):
for x in soup.findAll('table', align='right'):
for x in soup.findAll('table', align=['right', 'center']):
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
yield x

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
__docformat__ = 'restructuredtext en'
'''
@ -10,18 +12,54 @@ elpais.es
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais(BasicNewsRecipe):
title = u'EL PAIS'
language = 'es'
__author__ = 'Kovid Goyal & Lorenzo Vigentini'
description = 'Main daily newspaper from Spain'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
title = u'El Pais'
publisher = 'Ediciones El Pais SL'
category = 'News, politics, culture, economy, general interest'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 15
use_embedded_content = False
recursion = 5
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'zona_superior'}), dict(name='div', attrs={'class':'limpiar'}), dict(name='div', attrs={'id':'pie'})]
extra_css = 'h1 {font: sans-serif large;} \n h2 {font: sans-serif medium;} \n h3 {font: sans-serif small;} \n h4 {font: sans-serif bold small;} \n p{ font:10pt serif}'
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
extra_css = '''
p{style:normal size:12 serif}
feeds = [(u'Internacional', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporint'), (u'Espana', u'http://www.elpais.es/rss/rss_section.html?anchor=elppornac'), (u'Deportes', u'http://www.elpais.es/rss/rss_section.html?anchor=elppordep'), (u'Economia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporeco'), (u'Tecnologia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpportec'), (u'Cultura', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporcul'), (u'Gente', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporgen'), (u'Sociedad', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporsoc'), (u'Opinion', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporopi')]
'''
def print_version(self, url):
url = url+'?print=1'
return url
remove_tags = [
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
]
feeds = [
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
(u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
(u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
(u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
(u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
(u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
]
def print_version(self, url):
url = url+'?print=1'
return url

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.02'
__date__ = '10, January 2010'
__description__ = 'Italian weekly magazine'
'''espresso.repubblica.it'''
from calibre.web.feeds.news import BasicNewsRecipe
class laGazzetta(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini'
description = 'Italian weekly magazine'
cover_url = 'http://espresso.repubblica.it/images/logo_espresso.gif'
title = 'l Espresso '
publisher = 'Gruppo editoriale lEspresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
encoding = 'cp1252'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 16
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Espresso Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'),
(u'Espresso Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'),
(u'Espresso Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'),
(u'Espresso Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'),
(u'Espresso Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'),
(u'Espresso Limes', u'http://temi.repubblica.it/limes/feed/')
]
def print_version(self,url):
return url + '/&print=true'
keep_only_tags = [
dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
dict(name='div', attrs={'id':'content-second-right'})
]
remove_tags = [
dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left']}),
dict(name=['script','noscript','iframe'])
]
extra_css = '''
h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
.firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
.testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
'''

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.02'
__date__ = '10, January 2010'
__description__ = 'Sport news from the most read sport newspaper in Italy'
'''www.gazzetta.it'''
from calibre.web.feeds.news import BasicNewsRecipe
class laGazzetta(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini'
description = 'Sport news from the most read sport newspaper in Italy'
cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
title = 'La Gazzetta dello Sport '
publisher = 'RCS Digital'
category = 'Sport News'
language = 'it'
encoding = 'cp1252'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 20
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
remove_tags = [
dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
dict(name='iframe',attrs={'id':'mirago-feed'}),
dict(name='a',attrs={'id':'commenta-up'}),
dict(name='cite',attrs={'class':['signature','parag-title']}),
dict(name='a',attrs={'class':['last-comment','button-bold2']}),
dict(name=['base','object','link','a','script','noscript'])
]
extra_css = '''
h1 {font: sans-serif large;}
h2 {font: sans-serif medium;}
h3 {font: sans-serif small;}
h4 {font: sans-serif bold small;}
p {font:10pt helvetica}
dd {font:8pt helvetica}
'''
feeds = [
(u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
(u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
(u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
(u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
(u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
(u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
(u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
(u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
(u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
(u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
(u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
]
def print_version(self,url):
segments = url.split('/')
basename = '/'.join(segments[:3])+'/'
subPath= '/'.join(segments[3:7])+'/'
articleURL=(segments[len(segments)-1])[:-6]
myArticleSegs=articleURL.split('.')
myArticle=myArticleSegs[0]
printVerString=myArticle+ '_print.html'
myURL = basename + subPath + printVerString
print 'this is the url: ' + myURL
return basename + subPath + printVerString

View File

@ -1,29 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010)'
'''
http://www.repubblica.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LaRepublica(BasicNewsRecipe):
title = u'la Repubblica'
oldest_article = 1
language = 'it'
author = 'Lorenzo Vigentini, based on Darko Miletic'
description = 'Italian daily newspaper'
author = 'Darko Miletic'
cover_url = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif'
title = u'La Repubblica'
publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':'articolo'})]
remove_tags = [
dict(name=['object','link'])
,dict(name='span',attrs={'class':'linkindice'})
,dict(name='div',attrs={'class':'bottom-mobile'})
,dict(name='div',attrs={'id':['rssdiv','blocco']})
dict(name=['object','link']),
dict(name='span',attrs={'class':'linkindice'}),
dict(name='div',attrs={'class':'bottom-mobile'}),
dict(name='div',attrs={'id':['rssdiv','blocco']})
]
feeds = [
(u'Repubblica homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
(u'Repubblica Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
(u'Repubblica Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
(u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
(u'Repubblica Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
(u'Repubblica Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'),
(u'Repubblica Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'),
(u'Repubblica Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'),
(u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml')
(u'Repubblica Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'),
(u'Repubblica Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'),
(u'Repubblica Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'),
(u'Repubblica Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'),
(u'Repubblica Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
(u'Repubblica Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml')
]

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@ -20,29 +19,25 @@ class Nzz(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
lang = 'de-CH'
language = 'de'
language = 'de'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [
dict(name=['object','link','base','script'])
dict(name=['object','link','base'])
,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
]
feeds = [
(u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/' )
,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true')
,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true')
,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true')
@ -55,13 +50,7 @@ class Nzz(BasicNewsRecipe):
,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true')
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup
def print_version(self, url):
return url + '?printview=true'

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '10, January 2010'
__description__ = 'Italian weekly magazine'
'''
http://www.panorama.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class panorama(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
description = 'Italian weekly magazine'
cover_url = 'http://www.panorama.it/panorama/images/panorama_large.gif'
title = u'Panorama '
publisher = 'Mondadori'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':['post','article']})]
remove_tags = [
dict(name=['object','link']),
dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next']}),
dict(name='div',attrs={'id':['related-posts','footer']})
]
feeds = [
(u'Panorama Italia', u'http://blog.panorama.it/italia/feed'),
(u'Panorama Mondo', u'http://blog.panorama.it/mondo/feed'),
(u'Panorama Cultura e societa', u'http://blog.panorama.it/culturaesocieta/feed'),
(u'Panorama Hitech e scienza', u'http://blog.panorama.it/hitechescienza/feed'),
(u'Panorama Motori', u'http://blog.panorama.it/autoemoto/feed'),
(u'Panorama libri', u'http://blog.panorama.it/libri/feed'),
(u'Panorama Opinioni', u'http://blog.panorama.it/opinioni/feed'),
]

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '10, January 2010'
__description__ = 'Italian News Agency'
'''
http://www.quotidianonet.ilsole24ore.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class panorama(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
description = 'Italian News Agency'
cover_url = 'http://quotidianonet.ilsole24ore.com/file_generali/img/logo_quotidianonet-top.gif'
title = u'Quotidiano Net '
publisher = 'italiaNews'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'box_contenuto articolo'})]
remove_tags = [
dict(name=['object','link']),
dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next','box_contenuto adsense']}),
dict(name='div',attrs={'id':['strumenti','related-posts','footer','inline_boxes','inline_boxes_header','inline_boxes_body','bottom']}),
dict(name='span',attrs={'class':'titolosezione default'})
]
feeds = [
(u'Prima pagina', u'http://quotidianonet.ilsole24ore.com/rss/home.xml'),
(u'Cronaca', u'http://quotidianonet.ilsole24ore.com/rss/cronaca.xml'),
(u'Economia', u'http://quotidianonet.ilsole24ore.com/rss/economia.xml'),
(u'Esteri', u'http://quotidianonet.ilsole24ore.com/rss/esteri.xml'),
(u'Politica', u'http://quotidianonet.ilsole24ore.com/rss/politica.xml'),
(u'Salute', u'http://quotidianonet.ilsole24ore.com/rss/salute.xml'),
(u'Tecnologia', u'http://quotidianonet.ilsole24ore.com/rss/tecnologia.xml'),
]

View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TheForce(BasicNewsRecipe):
title = u'The Force'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
encoding = 'cp1252'
remove_stylesheets = True
#remove_javascripts = True
conversion_options = { 'linearize_tables' : True }
remove_tags_after= dict(name='div', attrs={'class':'KonaBody'})
keep_only_tags = dict(name='td', attrs={'background':'/images/span/tile_story_bgtile.gif'})
#keep_only_tags = dict(name='div', attrs={'class':'KonaBody'})
remove_tags = [
dict(name='iframe'),
#dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content', 'blog-entry-footer', 'item-list', 'article-sub-meta']}),
#dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}),
#dict(name='table', attrs={'cellspacing':'0'}),
#dict(name='ul', attrs={'class':'articleTools'}),
]
feeds = [
('The Force',
'http://www.theforce.net/outnews/tfnrdf.xml'),
]
def preprocess_html(self, soup):
for tag in soup.findAll(name='i'):
if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag):
for x in tag.findAllNext():
x.extract()
tag.extract()
break
tag = soup.find(attrs={'class':'articleoption'})
if tag is not None:
tag = tag.findParent('table')
if tag is not None:
for x in tag.findAllNext():
x.extract()
tag.extract()
for img in soup.findAll('img', src=True):
a = img.findParent('a', href=True)
if a is None: continue
url = a.get('href').split('?')[-1].partition('=')[-1]
if url:
img.extract()
a.name = 'img'
a['src'] = url
del a['href']
img['src'] = url
return soup

View File

@ -0,0 +1,145 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from datetime import date, timedelta
class WaPoCartoonsRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 2
title = u'Washington Post Cartoons'
publisher = u'Washington Post'
category = u'News, Cartoons'
description = u'Cartoons from the Washington Post'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
feeds = []
feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
#name {margin-bottom: 0.2em}
#copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
'''
def parse_index(self):
index = []
oldestDate = date.today() - timedelta(days = self.oldest_article)
oldest = oldestDate.strftime('%Y%m%d')
for feed in self.feeds:
cartoons = []
soup = self.index_to_soup(feed[1])
cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
cartoons.append(cartoon)
select = soup.find('select', attrs = {'name': ['url', 'dest']})
if select:
cartoonCandidates = []
if select['name'] == 'url':
cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
else:
cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
for cartoon in cartoonCandidates:
cartoons.append(cartoon)
index.append([feed[0], cartoons])
return index
def preprocess_html(self, soup):
freshSoup = self.getFreshSoup(soup)
div = soup.find('div', attrs = {'id': 'name'})
if div:
freshSoup.body.append(div)
comic = soup.find('div', attrs = {'id': 'comic_full'})
img = comic.find('img')
if '&' in img['src']:
img['src'], sep, bad = img['src'].rpartition('&')
freshSoup.body.append(comic)
freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
else:
span = soup.find('span', attrs = {'class': 'title'})
if span:
del span['class']
span['id'] = 'name'
span.name = 'div'
freshSoup.body.append(span)
img = soup.find('img', attrs = {'class': 'pic_big'})
if img:
td = img.parent
if td.has_key('style'):
del td['style']
td.name = 'div'
td['id'] = 'comic_full'
freshSoup.body.append(td)
td = soup.find('td', attrs = {'class': 'copy'})
if td:
for a in td.find('a'):
a.extract()
del td['class']
td['id'] = 'copyright'
td.name = 'div'
freshSoup.body.append(td)
return freshSoup
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup
def cartoonCandidatesWaPo(self, select, oldest):
opts = select.findAll('option')
for i in range(1, len(opts)):
url = opts[i]['value'].rstrip('/')
dateparts = url.split('/')[-3:]
datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
if datenum >= oldest:
yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
else:
return
def cartoonCandidatesCreatorsCom(self, select, oldest):
monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
'November': '11', 'December': '12'}
opts = select.findAll('option')
for i in range(1, len(opts)):
if opts[i].has_key('selected'):
continue
dateString = self.tag_to_string(opts[i])
rest, sep, year = dateString.rpartition(', ')
parts = rest.split(' ')
day = parts[2].rjust(2, '0')
month = monthNames[parts[1]]
datenum = str(year) + month + str(day)
if datenum >= oldest:
yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
else:
return

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.6.32'
__version__ = '0.6.33'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -264,6 +264,11 @@ class EPUBOutput(OutputFormatPlugin):
if body:
body = body[0]
# Add id attribute to <a> tags that have name
for x in XPath('//h:a[@name]')(body):
if not x.get('id', False):
x.set('id', x.get('name'))
# Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(body, 'xpath'):
for br in XPath('./h:br')(body):

View File

@ -237,6 +237,8 @@ class Stylizer(object):
style.update(self._normalize_edge(prop.cssValue, name))
elif name == 'font':
style.update(self._normalize_font(prop.cssValue))
elif name == 'list-style':
style.update(self._normalize_list_style(prop.cssValue))
else:
style[name] = prop.value
if 'font-size' in style:
@ -269,6 +271,31 @@ class Stylizer(object):
style["%s-%s" % (name, edge)] = value
return style
def _normalize_list_style(self, cssvalue):
composition = ('list-style-type', 'list-style-position',
'list-style-image')
style = {}
if cssvalue.cssText == 'inherit':
for key in composition:
style[key] = 'inherit'
else:
try:
primitives = [v.cssText for v in cssvalue]
except TypeError:
primitives = [cssvalue.cssText]
primitives.reverse()
value = primitives.pop()
for key in composition:
if cssprofiles.validate(key, value):
style[key] = value
if not primitives: break
value = primitives.pop()
for key in composition:
if key not in style:
style[key] = DEFAULTS[key]
return style
def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family')

File diff suppressed because it is too large Load Diff

View File

@ -4,9 +4,9 @@
#
msgid ""
msgstr ""
"Project-Id-Version: calibre 0.6.32\n"
"POT-Creation-Date: 2010-01-09 10:18+MST\n"
"PO-Revision-Date: 2010-01-09 10:18+MST\n"
"Project-Id-Version: calibre 0.6.33\n"
"POT-Creation-Date: 2010-01-10 16:40+MST\n"
"PO-Revision-Date: 2010-01-10 16:40+MST\n"
"Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n"
@ -123,11 +123,11 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database.py:913
#: /home/kovid/work/calibre/src/calibre/library/database2.py:703
#: /home/kovid/work/calibre/src/calibre/library/database2.py:715
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1135
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1172
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1509
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1511
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1622
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1143
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1180
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1517
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1519
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1630
#: /home/kovid/work/calibre/src/calibre/library/server.py:645
#: /home/kovid/work/calibre/src/calibre/library/server.py:717
#: /home/kovid/work/calibre/src/calibre/library/server.py:764
@ -585,7 +585,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:132
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1068
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1072
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1409
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1417
msgid "News"
msgstr ""
@ -1955,6 +1955,10 @@ msgstr ""
msgid "Run the text input through the markdown pre-processor. To learn more about markdown see"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
msgid "Do not insert a Table of Contents into the output text."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/output.py:24
msgid "Type of newline to use. Options are %s. Default is 'system'. Use 'old_mac' for compatibility with Mac OS 9 and earlier. For Mac OS X use 'unix'. 'system' will default to the newline type used by this OS."
msgstr ""
@ -2203,7 +2207,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/rb_output_ui.py:28
#: /home/kovid/work/calibre/src/calibre/gui2/convert/structure_detection_ui.py:59
#: /home/kovid/work/calibre/src/calibre/gui2/convert/toc_ui.py:62
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:38
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:45
#: /home/kovid/work/calibre/src/calibre/gui2/convert/xexp_edit_ui.py:49
#: /home/kovid/work/calibre/src/calibre/gui2/convert/xpath_wizard_ui.py:67
@ -2752,12 +2756,12 @@ msgid "PDB Input"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:32
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:39
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:43
msgid "Treat each &line as a paragraph"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:33
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:44
msgid "Assume print formatting"
msgstr ""
@ -2975,14 +2979,18 @@ msgstr ""
msgid "TXT Input"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:40
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:45
msgid "Process using markdown"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:41
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:46
msgid "<p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href=\"http://daringfireball.net/projects/markdown\">markdown</a>."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:47
msgid "Do not insert Table of Contents into output text when using markdown"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output.py:16
msgid "TXT Output"
msgstr ""
@ -6639,27 +6647,27 @@ msgid ""
"For help on an individual command: %%prog command --help\n"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1648
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1656
msgid "<p>Migrating old database to ebook library in %s<br><center>"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1677
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1685
msgid "Copying <b>%s</b>"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1694
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1702
msgid "Compacting database"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1787
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1795
msgid "Checking SQL integrity..."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1824
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1832
msgid "Checking for missing files."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1846
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1854
msgid "Checked id"
msgstr ""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -158,7 +158,7 @@ class RecursiveFetcher(object):
pass
def remove_beyond(tag, next):
while tag is not None and tag.name != 'body':
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)