mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
5f89c14c19
@ -4,6 +4,98 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
- version: 0.6.33
|
||||
date: 2010-01-10
|
||||
|
||||
new features:
|
||||
- title: "The e-book viewer now has built-in dictionary lookup"
|
||||
type: major
|
||||
description: >
|
||||
"You can now right click on a word to lookup its meaning in an online dictionary.
|
||||
calibre uses the public domain dictionaries available at dict.org"
|
||||
|
||||
- title: "RTF Output: Add support for unicode characters"
|
||||
|
||||
- title: "Allow the metadata that is used to create collections when sending books to SONY readers to be customized"
|
||||
description: >
|
||||
"By default collections are created on the SONY reader corresponding to series and tags. Now you can add other
|
||||
metadata fields, like author, or remove ones you dislike, by going to Preferences->Plugins and customizing
|
||||
the device interface plugin corresponding to your device."
|
||||
|
||||
- title: "TXT Input: Add option to disable insertion of Table of Contents into output text."
|
||||
tickets: [4506]
|
||||
|
||||
- title: "Remember state of cover and tag browsing views on restart"
|
||||
|
||||
bug fixes:
|
||||
- title: "EPUB Output: Add id attributes to anchors that have only name, as Adobe Digital Editions apparently can't handle only name attributes"
|
||||
tickets: [4474]
|
||||
|
||||
- title: "Conversion pipeline: Handle the list-style shortcut CSS property correctly"
|
||||
tickets: [4418]
|
||||
|
||||
- title: "EPUB Output: Fix generation of comics with PNG images for the Nook"
|
||||
tickets: [4492]
|
||||
|
||||
- title: "Fix bug that could prevent loading of some custom plugins"
|
||||
tickets: [4414]
|
||||
|
||||
- title: "News download: Handle URLs with both commas and non-ASCII characters correctly"
|
||||
|
||||
- title: "Ignore invalid metadata when adding books from command line instead of erroring out"
|
||||
tickets: [4496]
|
||||
|
||||
- title: Fix remove header/footer assistant when converting HTML files
|
||||
tickets: [4484]
|
||||
|
||||
- title: "Workaround for browsers like iPhone Safari that send extra arguments when downloading books from the content server"
|
||||
|
||||
- title: "Content server: Recognize the HTC HD2 as a mobile browser and add series information to the mobile version of the web page."
|
||||
tickets: [4488]
|
||||
|
||||
- title: "FB2 Output: Properly escape metadata before inserting it into the file"
|
||||
|
||||
- title: "Don't accept rich text in the comments fields of the edit meta information dialog"
|
||||
|
||||
- title: "Fix device detection for Cybook gen 3 with firmware 2.0"
|
||||
|
||||
- title: "Send to device: Use default save template when driver specific one is empty or unspecified"
|
||||
|
||||
- title: "Fix framework for running post-process and preprocess file type plugins"
|
||||
|
||||
- title: "Linux develop/install commands: Use bindir instead of staging bindir in the launchers"
|
||||
tickets: [4437]
|
||||
|
||||
- title: "E-book viewer: Sanitize file names when unzipping EPUB files"
|
||||
tickets: [4426]
|
||||
|
||||
|
||||
new recipes:
|
||||
- title: The Escapist
|
||||
author: Lorenzo Vigentini
|
||||
|
||||
- title: Washington Post cartoons
|
||||
author: kwetal
|
||||
|
||||
- title: The Dallas Morning News
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: sg.hu
|
||||
author: davotibarna
|
||||
|
||||
- title: The New Zealand Herald
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: Nature News
|
||||
author: Krittika Goyal
|
||||
|
||||
improved recipes:
|
||||
- El Pais
|
||||
- The Economist
|
||||
- The New York Times
|
||||
- Entrepreneur Magazine
|
||||
- CNN
|
||||
|
||||
- version: 0.6.32
|
||||
date: 2010-01-03
|
||||
|
||||
|
BIN
resources/images/news/wapo_cartoons.png
Normal file
BIN
resources/images/news/wapo_cartoons.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 311 B |
@ -1,27 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian daily newspaper (english version)'
|
||||
'''
|
||||
www.corriere.it/english
|
||||
http://www.corriere.it/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Corriere_en(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 15
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'en'
|
||||
class ilCorriere(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
description = 'Italian daily newspaper (english version)'
|
||||
|
||||
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
|
||||
title = u'Il Corriere della sera (english) '
|
||||
publisher = 'RCS Digital'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'en'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
@ -35,12 +43,13 @@ class Corriere_en(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
dict(name=['base','object','link','embed']),
|
||||
dict(name='div', attrs={'class':'news-goback'}),
|
||||
dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.corriere.it/rss/english.xml' )
|
||||
]
|
||||
|
@ -1,26 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian daily newspaper'
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.corriere.it
|
||||
http://www.corriere.it/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Corriere_it(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 7
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'it'
|
||||
|
||||
class ilCorriere(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
description = 'Italian daily newspaper'
|
||||
|
||||
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
|
||||
title = u'Il Corriere della sera '
|
||||
publisher = 'RCS Digital'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
@ -28,29 +38,30 @@ class Corriere_it(BasicNewsRecipe):
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
dict(name=['base','object','link','embed']),
|
||||
dict(name='div', attrs={'class':'news-goback'}),
|
||||
dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [
|
||||
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
||||
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
||||
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
||||
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
||||
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
||||
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
||||
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
||||
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
||||
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
||||
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' ),
|
||||
(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml'),
|
||||
(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' ),
|
||||
(u'Politica' , u'http://www.corriere.it/rss/politica.xml' ),
|
||||
(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' ),
|
||||
(u'Economia' , u'http://www.corriere.it/rss/economia.xml' ),
|
||||
(u'Cultura' , u'http://www.corriere.it/rss/cultura.xml' ),
|
||||
(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' ),
|
||||
(u'Salute' , u'http://www.corriere.it/rss/salute.xml' ),
|
||||
(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml'),
|
||||
(u'Cinema e TV', u'http://www.corriere.it/rss/cinema.xml' ),
|
||||
(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||
]
|
||||
|
@ -106,7 +106,7 @@ class Economist(BasicNewsRecipe):
|
||||
return ans
|
||||
|
||||
def eco_find_image_tables(self, soup):
|
||||
for x in soup.findAll('table', align='right'):
|
||||
for x in soup.findAll('table', align=['right', 'center']):
|
||||
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
|
||||
yield x
|
||||
|
||||
|
@ -107,7 +107,7 @@ class Economist(BasicNewsRecipe):
|
||||
self.log.debug(tb)
|
||||
|
||||
def eco_find_image_tables(self, soup):
|
||||
for x in soup.findAll('table', align='right'):
|
||||
for x in soup.findAll('table', align=['right', 'center']):
|
||||
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
|
||||
yield x
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
description = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
@ -10,18 +12,54 @@ elpais.es
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElPais(BasicNewsRecipe):
|
||||
title = u'EL PAIS'
|
||||
language = 'es'
|
||||
__author__ = 'Kovid Goyal & Lorenzo Vigentini'
|
||||
description = 'Main daily newspaper from Spain'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
|
||||
title = u'El Pais'
|
||||
publisher = 'Ediciones El Pais SL'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 15
|
||||
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'zona_superior'}), dict(name='div', attrs={'class':'limpiar'}), dict(name='div', attrs={'id':'pie'})]
|
||||
extra_css = 'h1 {font: sans-serif large;} \n h2 {font: sans-serif medium;} \n h3 {font: sans-serif small;} \n h4 {font: sans-serif bold small;} \n p{ font:10pt serif}'
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
|
||||
extra_css = '''
|
||||
p{style:normal size:12 serif}
|
||||
|
||||
feeds = [(u'Internacional', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporint'), (u'Espana', u'http://www.elpais.es/rss/rss_section.html?anchor=elppornac'), (u'Deportes', u'http://www.elpais.es/rss/rss_section.html?anchor=elppordep'), (u'Economia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporeco'), (u'Tecnologia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpportec'), (u'Cultura', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporcul'), (u'Gente', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporgen'), (u'Sociedad', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporsoc'), (u'Opinion', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporopi')]
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
url = url+'?print=1'
|
||||
return url
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
|
||||
dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
|
||||
dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
|
||||
(u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
|
||||
(u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
|
||||
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
|
||||
(u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
|
||||
(u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
|
||||
(u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
|
||||
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
|
||||
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
|
||||
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
|
||||
(u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
|
||||
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
|
||||
(u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
url = url+'?print=1'
|
||||
return url
|
||||
|
67
resources/recipes/l_espresso.recipe
Normal file
67
resources/recipes/l_espresso.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.02'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian weekly magazine'
|
||||
|
||||
'''espresso.repubblica.it'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class laGazzetta(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
description = 'Italian weekly magazine'
|
||||
|
||||
cover_url = 'http://espresso.repubblica.it/images/logo_espresso.gif'
|
||||
title = 'l Espresso '
|
||||
publisher = 'Gruppo editoriale lEspresso'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
encoding = 'cp1252'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 16
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Espresso Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'),
|
||||
(u'Espresso Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'),
|
||||
(u'Espresso Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'),
|
||||
(u'Espresso Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'),
|
||||
(u'Espresso Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'),
|
||||
(u'Espresso Limes', u'http://temi.repubblica.it/limes/feed/')
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
return url + '/&print=true'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
|
||||
dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
|
||||
dict(name='div', attrs={'id':'content-second-right'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
|
||||
dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left']}),
|
||||
dict(name=['script','noscript','iframe'])
|
||||
]
|
||||
extra_css = '''
|
||||
h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
|
||||
h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||
h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||
h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||
h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||
.firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
|
||||
.testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
|
||||
'''
|
||||
|
79
resources/recipes/la_gazzeta_dello_sport.recipe
Normal file
79
resources/recipes/la_gazzeta_dello_sport.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.02'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Sport news from the most read sport newspaper in Italy'
|
||||
|
||||
'''www.gazzetta.it'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class laGazzetta(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
description = 'Sport news from the most read sport newspaper in Italy'
|
||||
|
||||
cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
|
||||
title = 'La Gazzetta dello Sport '
|
||||
publisher = 'RCS Digital'
|
||||
category = 'Sport News'
|
||||
|
||||
language = 'it'
|
||||
encoding = 'cp1252'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
|
||||
dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
|
||||
dict(name='iframe',attrs={'id':'mirago-feed'}),
|
||||
dict(name='a',attrs={'id':'commenta-up'}),
|
||||
dict(name='cite',attrs={'class':['signature','parag-title']}),
|
||||
dict(name='a',attrs={'class':['last-comment','button-bold2']}),
|
||||
dict(name=['base','object','link','a','script','noscript'])
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1 {font: sans-serif large;}
|
||||
h2 {font: sans-serif medium;}
|
||||
h3 {font: sans-serif small;}
|
||||
h4 {font: sans-serif bold small;}
|
||||
p {font:10pt helvetica}
|
||||
dd {font:8pt helvetica}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
|
||||
(u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
|
||||
(u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
|
||||
(u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
|
||||
(u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
|
||||
(u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
|
||||
(u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
|
||||
(u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
|
||||
(u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
|
||||
(u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
|
||||
(u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
segments = url.split('/')
|
||||
basename = '/'.join(segments[:3])+'/'
|
||||
subPath= '/'.join(segments[3:7])+'/'
|
||||
articleURL=(segments[len(segments)-1])[:-6]
|
||||
myArticleSegs=articleURL.split('.')
|
||||
myArticle=myArticleSegs[0]
|
||||
printVerString=myArticle+ '_print.html'
|
||||
myURL = basename + subPath + printVerString
|
||||
print 'this is the url: ' + myURL
|
||||
return basename + subPath + printVerString
|
@ -1,29 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
description = 'Italian daily newspaper - v1.01 (04, January 2010)'
|
||||
|
||||
'''
|
||||
http://www.repubblica.it/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaRepublica(BasicNewsRecipe):
|
||||
title = u'la Repubblica'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
author = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
description = 'Italian daily newspaper'
|
||||
|
||||
author = 'Darko Miletic'
|
||||
cover_url = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif'
|
||||
title = u'La Repubblica'
|
||||
publisher = 'Gruppo editoriale L\'Espresso'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'articolo'})]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='span',attrs={'class':'linkindice'})
|
||||
,dict(name='div',attrs={'class':'bottom-mobile'})
|
||||
,dict(name='div',attrs={'id':['rssdiv','blocco']})
|
||||
dict(name=['object','link']),
|
||||
dict(name='span',attrs={'class':'linkindice'}),
|
||||
dict(name='div',attrs={'class':'bottom-mobile'}),
|
||||
dict(name='div',attrs={'id':['rssdiv','blocco']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Repubblica homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
|
||||
(u'Repubblica Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
|
||||
(u'Repubblica Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
|
||||
(u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
|
||||
(u'Repubblica Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
|
||||
(u'Repubblica Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'),
|
||||
(u'Repubblica Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'),
|
||||
(u'Repubblica Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'),
|
||||
(u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml')
|
||||
(u'Repubblica Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'),
|
||||
(u'Repubblica Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'),
|
||||
(u'Repubblica Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'),
|
||||
(u'Repubblica Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'),
|
||||
(u'Repubblica Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
|
||||
(u'Repubblica Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml')
|
||||
]
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
@ -20,29 +19,25 @@ class Nzz(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
lang = 'de-CH'
|
||||
language = 'de'
|
||||
language = 'de'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base','script'])
|
||||
dict(name=['object','link','base'])
|
||||
,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
|
||||
,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/' )
|
||||
,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
|
||||
(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
|
||||
,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true')
|
||||
,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true')
|
||||
,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true')
|
||||
@ -55,13 +50,7 @@ class Nzz(BasicNewsRecipe):
|
||||
,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?printview=true'
|
||||
|
||||
|
||||
|
51
resources/recipes/panorama.recipe
Normal file
51
resources/recipes/panorama.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian weekly magazine'
|
||||
|
||||
'''
|
||||
http://www.panorama.it/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class panorama(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
description = 'Italian weekly magazine'
|
||||
|
||||
cover_url = 'http://www.panorama.it/panorama/images/panorama_large.gif'
|
||||
title = u'Panorama '
|
||||
publisher = 'Mondadori'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link']),
|
||||
dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next']}),
|
||||
dict(name='div',attrs={'id':['related-posts','footer']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Panorama Italia', u'http://blog.panorama.it/italia/feed'),
|
||||
(u'Panorama Mondo', u'http://blog.panorama.it/mondo/feed'),
|
||||
(u'Panorama Cultura e societa', u'http://blog.panorama.it/culturaesocieta/feed'),
|
||||
(u'Panorama Hitech e scienza', u'http://blog.panorama.it/hitechescienza/feed'),
|
||||
(u'Panorama Motori', u'http://blog.panorama.it/autoemoto/feed'),
|
||||
(u'Panorama libri', u'http://blog.panorama.it/libri/feed'),
|
||||
(u'Panorama Opinioni', u'http://blog.panorama.it/opinioni/feed'),
|
||||
|
||||
]
|
52
resources/recipes/quotidiano.recipe
Normal file
52
resources/recipes/quotidiano.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Italian News Agency'
|
||||
|
||||
'''
|
||||
http://www.quotidianonet.ilsole24ore.com/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class panorama(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||
description = 'Italian News Agency'
|
||||
|
||||
cover_url = 'http://quotidianonet.ilsole24ore.com/file_generali/img/logo_quotidianonet-top.gif'
|
||||
title = u'Quotidiano Net '
|
||||
publisher = 'italiaNews'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'box_contenuto articolo'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link']),
|
||||
dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next','box_contenuto adsense']}),
|
||||
dict(name='div',attrs={'id':['strumenti','related-posts','footer','inline_boxes','inline_boxes_header','inline_boxes_body','bottom']}),
|
||||
dict(name='span',attrs={'class':'titolosezione default'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Prima pagina', u'http://quotidianonet.ilsole24ore.com/rss/home.xml'),
|
||||
(u'Cronaca', u'http://quotidianonet.ilsole24ore.com/rss/cronaca.xml'),
|
||||
(u'Economia', u'http://quotidianonet.ilsole24ore.com/rss/economia.xml'),
|
||||
(u'Esteri', u'http://quotidianonet.ilsole24ore.com/rss/esteri.xml'),
|
||||
(u'Politica', u'http://quotidianonet.ilsole24ore.com/rss/politica.xml'),
|
||||
(u'Salute', u'http://quotidianonet.ilsole24ore.com/rss/salute.xml'),
|
||||
(u'Tecnologia', u'http://quotidianonet.ilsole24ore.com/rss/tecnologia.xml'),
|
||||
|
||||
]
|
57
resources/recipes/starwars.recipe
Normal file
57
resources/recipes/starwars.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class TheForce(BasicNewsRecipe):
|
||||
title = u'The Force'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
encoding = 'cp1252'
|
||||
|
||||
remove_stylesheets = True
|
||||
#remove_javascripts = True
|
||||
conversion_options = { 'linearize_tables' : True }
|
||||
remove_tags_after= dict(name='div', attrs={'class':'KonaBody'})
|
||||
keep_only_tags = dict(name='td', attrs={'background':'/images/span/tile_story_bgtile.gif'})
|
||||
#keep_only_tags = dict(name='div', attrs={'class':'KonaBody'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
#dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content', 'blog-entry-footer', 'item-list', 'article-sub-meta']}),
|
||||
#dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}),
|
||||
#dict(name='table', attrs={'cellspacing':'0'}),
|
||||
#dict(name='ul', attrs={'class':'articleTools'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('The Force',
|
||||
'http://www.theforce.net/outnews/tfnrdf.xml'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name='i'):
|
||||
if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag):
|
||||
for x in tag.findAllNext():
|
||||
x.extract()
|
||||
tag.extract()
|
||||
break
|
||||
tag = soup.find(attrs={'class':'articleoption'})
|
||||
if tag is not None:
|
||||
tag = tag.findParent('table')
|
||||
if tag is not None:
|
||||
for x in tag.findAllNext():
|
||||
x.extract()
|
||||
tag.extract()
|
||||
|
||||
for img in soup.findAll('img', src=True):
|
||||
a = img.findParent('a', href=True)
|
||||
if a is None: continue
|
||||
url = a.get('href').split('?')[-1].partition('=')[-1]
|
||||
if url:
|
||||
img.extract()
|
||||
a.name = 'img'
|
||||
a['src'] = url
|
||||
del a['href']
|
||||
img['src'] = url
|
||||
return soup
|
145
resources/recipes/wapo_cartoons.recipe
Normal file
145
resources/recipes/wapo_cartoons.recipe
Normal file
@ -0,0 +1,145 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from datetime import date, timedelta
|
||||
|
||||
class WaPoCartoonsRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 2
|
||||
|
||||
title = u'Washington Post Cartoons'
|
||||
publisher = u'Washington Post'
|
||||
category = u'News, Cartoons'
|
||||
description = u'Cartoons from the Washington Post'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = []
|
||||
feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
|
||||
feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
|
||||
feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
|
||||
feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
|
||||
feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
|
||||
feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
|
||||
feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
|
||||
feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
|
||||
feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||
h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
|
||||
#name {margin-bottom: 0.2em}
|
||||
#copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
index = []
|
||||
oldestDate = date.today() - timedelta(days = self.oldest_article)
|
||||
oldest = oldestDate.strftime('%Y%m%d')
|
||||
for feed in self.feeds:
|
||||
cartoons = []
|
||||
soup = self.index_to_soup(feed[1])
|
||||
|
||||
cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
|
||||
cartoons.append(cartoon)
|
||||
|
||||
select = soup.find('select', attrs = {'name': ['url', 'dest']})
|
||||
if select:
|
||||
cartoonCandidates = []
|
||||
if select['name'] == 'url':
|
||||
cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
|
||||
else:
|
||||
cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
|
||||
|
||||
for cartoon in cartoonCandidates:
|
||||
cartoons.append(cartoon)
|
||||
|
||||
index.append([feed[0], cartoons])
|
||||
|
||||
return index
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
freshSoup = self.getFreshSoup(soup)
|
||||
|
||||
div = soup.find('div', attrs = {'id': 'name'})
|
||||
if div:
|
||||
freshSoup.body.append(div)
|
||||
comic = soup.find('div', attrs = {'id': 'comic_full'})
|
||||
|
||||
img = comic.find('img')
|
||||
if '&' in img['src']:
|
||||
img['src'], sep, bad = img['src'].rpartition('&')
|
||||
|
||||
freshSoup.body.append(comic)
|
||||
freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
|
||||
else:
|
||||
span = soup.find('span', attrs = {'class': 'title'})
|
||||
if span:
|
||||
del span['class']
|
||||
span['id'] = 'name'
|
||||
span.name = 'div'
|
||||
freshSoup.body.append(span)
|
||||
|
||||
img = soup.find('img', attrs = {'class': 'pic_big'})
|
||||
if img:
|
||||
td = img.parent
|
||||
if td.has_key('style'):
|
||||
del td['style']
|
||||
td.name = 'div'
|
||||
td['id'] = 'comic_full'
|
||||
freshSoup.body.append(td)
|
||||
|
||||
td = soup.find('td', attrs = {'class': 'copy'})
|
||||
if td:
|
||||
for a in td.find('a'):
|
||||
a.extract()
|
||||
del td['class']
|
||||
td['id'] = 'copyright'
|
||||
td.name = 'div'
|
||||
freshSoup.body.append(td)
|
||||
|
||||
return freshSoup
|
||||
|
||||
def getFreshSoup(self, oldSoup):
|
||||
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||
if oldSoup.head.title:
|
||||
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||
return freshSoup
|
||||
|
||||
def cartoonCandidatesWaPo(self, select, oldest):
|
||||
opts = select.findAll('option')
|
||||
for i in range(1, len(opts)):
|
||||
url = opts[i]['value'].rstrip('/')
|
||||
dateparts = url.split('/')[-3:]
|
||||
datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
|
||||
if datenum >= oldest:
|
||||
yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
|
||||
else:
|
||||
return
|
||||
|
||||
def cartoonCandidatesCreatorsCom(self, select, oldest):
|
||||
monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
|
||||
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
|
||||
'November': '11', 'December': '12'}
|
||||
|
||||
opts = select.findAll('option')
|
||||
for i in range(1, len(opts)):
|
||||
if opts[i].has_key('selected'):
|
||||
continue
|
||||
|
||||
dateString = self.tag_to_string(opts[i])
|
||||
rest, sep, year = dateString.rpartition(', ')
|
||||
parts = rest.split(' ')
|
||||
day = parts[2].rjust(2, '0')
|
||||
month = monthNames[parts[1]]
|
||||
datenum = str(year) + month + str(day)
|
||||
if datenum >= oldest:
|
||||
yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
|
||||
else:
|
||||
return
|
||||
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.6.32'
|
||||
__version__ = '0.6.33'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -264,6 +264,11 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
if body:
|
||||
body = body[0]
|
||||
|
||||
# Add id attribute to <a> tags that have name
|
||||
for x in XPath('//h:a[@name]')(body):
|
||||
if not x.get('id', False):
|
||||
x.set('id', x.get('name'))
|
||||
|
||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||
if hasattr(body, 'xpath'):
|
||||
for br in XPath('./h:br')(body):
|
||||
|
@ -237,6 +237,8 @@ class Stylizer(object):
|
||||
style.update(self._normalize_edge(prop.cssValue, name))
|
||||
elif name == 'font':
|
||||
style.update(self._normalize_font(prop.cssValue))
|
||||
elif name == 'list-style':
|
||||
style.update(self._normalize_list_style(prop.cssValue))
|
||||
else:
|
||||
style[name] = prop.value
|
||||
if 'font-size' in style:
|
||||
@ -269,6 +271,31 @@ class Stylizer(object):
|
||||
style["%s-%s" % (name, edge)] = value
|
||||
return style
|
||||
|
||||
def _normalize_list_style(self, cssvalue):
|
||||
composition = ('list-style-type', 'list-style-position',
|
||||
'list-style-image')
|
||||
style = {}
|
||||
if cssvalue.cssText == 'inherit':
|
||||
for key in composition:
|
||||
style[key] = 'inherit'
|
||||
else:
|
||||
try:
|
||||
primitives = [v.cssText for v in cssvalue]
|
||||
except TypeError:
|
||||
primitives = [cssvalue.cssText]
|
||||
primitives.reverse()
|
||||
value = primitives.pop()
|
||||
for key in composition:
|
||||
if cssprofiles.validate(key, value):
|
||||
style[key] = value
|
||||
if not primitives: break
|
||||
value = primitives.pop()
|
||||
for key in composition:
|
||||
if key not in style:
|
||||
style[key] = DEFAULTS[key]
|
||||
|
||||
return style
|
||||
|
||||
def _normalize_font(self, cssvalue):
|
||||
composition = ('font-style', 'font-variant', 'font-weight',
|
||||
'font-size', 'line-height', 'font-family')
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,9 +4,9 @@
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: calibre 0.6.32\n"
|
||||
"POT-Creation-Date: 2010-01-09 10:18+MST\n"
|
||||
"PO-Revision-Date: 2010-01-09 10:18+MST\n"
|
||||
"Project-Id-Version: calibre 0.6.33\n"
|
||||
"POT-Creation-Date: 2010-01-10 16:40+MST\n"
|
||||
"PO-Revision-Date: 2010-01-10 16:40+MST\n"
|
||||
"Last-Translator: Automatically generated\n"
|
||||
"Language-Team: LANGUAGE\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
@ -123,11 +123,11 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database.py:913
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:703
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:715
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1135
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1172
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1509
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1511
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1622
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1143
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1180
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1517
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1519
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1630
|
||||
#: /home/kovid/work/calibre/src/calibre/library/server.py:645
|
||||
#: /home/kovid/work/calibre/src/calibre/library/server.py:717
|
||||
#: /home/kovid/work/calibre/src/calibre/library/server.py:764
|
||||
@ -585,7 +585,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:132
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1068
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1072
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1409
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1417
|
||||
msgid "News"
|
||||
msgstr ""
|
||||
|
||||
@ -1955,6 +1955,10 @@ msgstr ""
|
||||
msgid "Run the text input through the markdown pre-processor. To learn more about markdown see"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
|
||||
msgid "Do not insert a Table of Contents into the output text."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/txt/output.py:24
|
||||
msgid "Type of newline to use. Options are %s. Default is 'system'. Use 'old_mac' for compatibility with Mac OS 9 and earlier. For Mac OS X use 'unix'. 'system' will default to the newline type used by this OS."
|
||||
msgstr ""
|
||||
@ -2203,7 +2207,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/rb_output_ui.py:28
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/structure_detection_ui.py:59
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/toc_ui.py:62
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:38
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:45
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/xexp_edit_ui.py:49
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/xpath_wizard_ui.py:67
|
||||
@ -2752,12 +2756,12 @@ msgid "PDB Input"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:32
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:39
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:43
|
||||
msgid "Treat each &line as a paragraph"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:33
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:44
|
||||
msgid "Assume print formatting"
|
||||
msgstr ""
|
||||
|
||||
@ -2975,14 +2979,18 @@ msgstr ""
|
||||
msgid "TXT Input"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:40
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:45
|
||||
msgid "Process using markdown"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:41
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:46
|
||||
msgid "<p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href=\"http://daringfireball.net/projects/markdown\">markdown</a>."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:47
|
||||
msgid "Do not insert Table of Contents into output text when using markdown"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output.py:16
|
||||
msgid "TXT Output"
|
||||
msgstr ""
|
||||
@ -6639,27 +6647,27 @@ msgid ""
|
||||
"For help on an individual command: %%prog command --help\n"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1648
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1656
|
||||
msgid "<p>Migrating old database to ebook library in %s<br><center>"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1677
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1685
|
||||
msgid "Copying <b>%s</b>"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1694
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1702
|
||||
msgid "Compacting database"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1787
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1795
|
||||
msgid "Checking SQL integrity..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1824
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1832
|
||||
msgid "Checking for missing files."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1846
|
||||
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1854
|
||||
msgid "Checked id"
|
||||
msgstr ""
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -158,7 +158,7 @@ class RecursiveFetcher(object):
|
||||
pass
|
||||
|
||||
def remove_beyond(tag, next):
|
||||
while tag is not None and tag.name != 'body':
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
while after is not None:
|
||||
ns = getattr(tag, next)
|
||||
|
Loading…
x
Reference in New Issue
Block a user