diff --git a/Changelog.yaml b/Changelog.yaml index 478e6a311b..21b92493a7 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -19,6 +19,63 @@ # new recipes: # - title: +- version: 0.8.28 + date: 2011-11-25 + + new features: + - title: "Get Books: Add litres.ru store" + + - title: "Change the algorithm that generates title sort strings to strip leading articles from both english and the current language set for the calibre user interface. In addition, in the edit metadata dialog, calibre will use the book's language when calculating the sort string. This behavior can be adjusted via Preferences->Tweaks." + tickets: [886763] + + - title: "Driver for Cybook Odyssey." + tickets: [893457] + + - title: "Irex driver: Put books into the top level directory instead of into /ebooks or /Books." + tickets: [883616] + + bug fixes: + - title: "Have downloaded periodicals recognized when transferred via USB to the Kindle Fire" + + - title: "MOBI Output: Fix underline and strikethrough properties declared on parents not being rendered on child tags." + tickets: [894245] + + - title: "Template language: Fix regression that broke ordering of items when formatting a list" + + - title: "Conversion pipeline: When removing obsolete tags convert them to
instead of if they contain block level tags." + tickets: [892525] + + - title: "When downloading metadata, fix the case normalization of double-barelled author names." + tickets: [893257] + + - title: "Template language: Fix regression that broke using general program mode in save to disk templates" + + - title: "calibredb: Fix use of ranges when specifying ids for the remove command" + + - title: "Apple driver: Add ids for iPhone 4S. More robust against iTunes automation errors when adding artwork." + tickets: [892468] + + - title: "Fix encoding of comments incorrectly detected when downloading metadata from ozon.ru" + + - title: "Fix calibre not getting list of books on the Kindle Fire" + + improved recipes: + - El Mundo + - BBC + - NIN Online + - ABC Australia + - Salon.com + - Expansion (Spanish) + - The Week + - Heise Online + + new recipes: + - title: Give me something to read and Let's get Critical + author: Barty + + - title: Worldcrunch + author: Krittika Goyal + - version: 0.8.27 date: 2011-11-18 diff --git a/recipes/buffalo_news.recipe b/recipes/buffalo_news.recipe index 51985a3c51..ae84e4433e 100644 --- a/recipes/buffalo_news.recipe +++ b/recipes/buffalo_news.recipe @@ -10,49 +10,39 @@ http://www.buffalonews.com/RSS/ from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1298680852(BasicNewsRecipe): +class BuffaloNews(BasicNewsRecipe): title = u'Buffalo News' oldest_article = 2 language = 'en' - __author__ = 'ChappyOnIce' + __author__ = 'ChappyOnIce, Krittika Goyal' max_articles_per_feed = 20 encoding = 'utf-8' masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png' - remove_javascript = True - extra_css = 'body {text-align: justify;}\n \ - p {text-indent: 20px;}' + auto_cleanup = True + remove_empty_feeds = True - keep_only_tags = [ - dict(name='div', attrs={'class':['main-content-left']}) - ] - - remove_tags = [ - dict(name='div', attrs={'id':['commentCount']}), - dict(name='div', attrs={'class':['story-list-links']}) - ] - - remove_tags_after = dict(name='div', attrs={'class':['body storyContent']}) - - feeds = [(u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'), - (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'), - (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'), - (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'), - (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'), - (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'), - (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'), - (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'), - (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'), - (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'), - (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'), - (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'), - (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'), - (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'), - (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'), - (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944') + feeds = [ + (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'), + (u'Southern Erie County', u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'), + (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'), + (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'), + (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'), + (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'), + (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'), + (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'), + (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'), + (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'), + (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'), + (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'), + (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'), + (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'), + (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'), + (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'), + (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'), + (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'), + (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'), + (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'), + (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'), + (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944') ] + diff --git a/recipes/cosmopolitan_uk.recipe b/recipes/cosmopolitan_uk.recipe new file mode 100644 index 0000000000..21317063ab --- /dev/null +++ b/recipes/cosmopolitan_uk.recipe @@ -0,0 +1,51 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe +#from calibre import __appname__ +from calibre.utils.magick import Image +class AdvancedUserRecipe1306097511(BasicNewsRecipe): + title = u'Cosmopolitan UK' + description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' + + __author__ = 'Dave Asbury' + # greyscale code by Starson + cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg' + no_stylesheets = True + oldest_article = 7 + max_articles_per_feed = 20 + remove_empty_feeds = True + remove_javascript = True + + preprocess_regexps = [ + (re.compile(r'.*?', re.IGNORECASE | re.DOTALL), lambda match: '')] + language = 'en_GB' + + + masthead_url = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif' + + + keep_only_tags = [ + dict(attrs={'class' : ['dateAuthor', 'publishDate']}), + dict(name='div',attrs ={'id' : ['main_content']}) + ] + remove_tags = [ + dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}), + dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}), + dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}), + dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}) + ] + + feeds = [ + (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')] + + def postprocess_html(self, soup, first): + #process all the images + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + if img < 0: + raise RuntimeError('Out of memory') + img.type = "GrayscaleType" + img.save(iurl) + return soup + diff --git a/recipes/elmundo.recipe b/recipes/elmundo.recipe index 76ce785ba3..4f04f68575 100644 --- a/recipes/elmundo.recipe +++ b/recipes/elmundo.recipe @@ -4,7 +4,8 @@ __copyright__ = '2009-2011, Darko Miletic ' ''' elmundo.es ''' - +import re +import time from calibre.web.feeds.news import BasicNewsRecipe class ElMundo(BasicNewsRecipe): @@ -18,12 +19,15 @@ class ElMundo(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'iso8859_15' + remove_javascript = True + remove_empty_feeds = True language = 'es' masthead_url = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png' publication_type = 'newspaper' extra_css = """ body{font-family: Arial,Helvetica,sans-serif} .metadata_noticia{font-size: small} + .pestana_GDP{font-size: small; font-weight:bold} h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974} .hora{color: red} .update{color: gray} @@ -41,22 +45,43 @@ class ElMundo(BasicNewsRecipe): remove_tags_after = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']}) remove_attributes = ['lang','border'] remove_tags = [ - dict(name='div', attrs={'class':['herramientas','publicidad_google']}) - ,dict(name='div', attrs={'id':'modulo_multimedia' }) + dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']}) + ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']}) + ,dict(name='div', attrs={'class':['num_comentarios estirar']}) + ,dict(name='span', attrs={'class':['links_comentar']}) + ,dict(name='div', attrs={'id':['comentar']}) ,dict(name='ul', attrs={'class':'herramientas' }) ,dict(name=['object','link','embed','iframe','base','meta']) ] feeds = [ - (u'Portada' , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml' ) + (u'Portada' , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml' ) ,(u'Deportes' , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml') - ,(u'Economia' , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml' ) - ,(u'Espana' , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml' ) + ,(u'Econom\xeda' , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml' ) + ,(u'Espa\xf1a' , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml' ) ,(u'Internacional' , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' ) ,(u'Cultura' , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml' ) - ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml' ) - ,(u'Comunicacion' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml' ) - ,(u'Television' , u'http://estaticos.elmundo.es/elmundo/rss/television.xml' ) + ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml' ) + ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml' ) + ,(u'Televisi\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/television.xml' ) + + ,(u'Salud' , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml' ) + ,(u'Solidaridad' , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml' ) + ,(u'Su vivienda' , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml' ) + ,(u'Motor' , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml' ) + + ,(u'Madrid' , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml' ) + ,(u'Barcelona' , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml' ) + ,(u'Pa\xeds Vasco' , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml' ) + ,(u'Baleares' , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml' ) + ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' ) + ,(u'Valladolid' , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml' ) + ,(u'Valencia' , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml' ) + ,(u'Alicante' , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml' ) + ,(u'Castell\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml' ) + ,(u'Andaluc\xeda' , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml' ) + ,(u'Sevilla' , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml' ) + ,(u'M\xe1laga' , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml' ) ] def preprocess_html(self, soup): @@ -67,3 +92,34 @@ class ElMundo(BasicNewsRecipe): def get_article_url(self, article): return article.get('guid', None) + + preprocess_regexps = [ + # Para presentar la imagen de los videos incrustados + + (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '-->'), + (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '