diff --git a/resources/images/news/zerohedge.png b/resources/images/news/zerohedge.png new file mode 100644 index 0000000000..a2bc6cde14 Binary files /dev/null and b/resources/images/news/zerohedge.png differ diff --git a/resources/recipes/expansion_spanish.recipe b/resources/recipes/expansion_spanish.recipe index 31a1504eb0..f2229e90e6 100644 --- a/resources/recipes/expansion_spanish.recipe +++ b/resources/recipes/expansion_spanish.recipe @@ -1,59 +1,79 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__author__ = 'Gerardo Diez' +__copyright__ = 'Gerardo Diez' +description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)' +__docformat__ = 'restructuredtext en' + ''' -www.expansion.com +expansion.es ''' +from calibre.web.feeds.recipes import BasicNewsRecipe +class Publico(BasicNewsRecipe): + title =u'Expansion.com' + __author__ ='Gerardo Diez' + publisher =u'Unidad Editorial Información Económica, S.L.' + category ='finances, catalunya' + oldest_article =1 + max_articles_per_feed =100 + simultaneous_downloads =10 + cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png' + timefmt ='[%A, %d %B, %Y]' + encoding ='latin' + language ='es' + remove_javascript =True + no_stylesheets =True + keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']}) + remove_tags =[ + dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}), + dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}), + dict(name='span', attrs={'class':['comentarios']}), + dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}), + dict(name='div', attrs={'id':['comentarios_lectores_listado']}) + ] + feeds =[ + (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'), + (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'), + (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'), + (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'), + (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'), + (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'), -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag + (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'), + (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'), + (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'), + (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'), + (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'), + (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'), -class Expansion(BasicNewsRecipe): - title = 'Diario Expansion' - __author__ = 'Darko Miletic' - description = 'Lider de informacion de mercados, economica y politica' - publisher = 'expansion.com' - category = 'news, politics, Spain' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - delay = 1 - encoding = 'iso-8859-15' - language = 'es' + (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'), + (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'), + (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'), + (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'), + (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'), + (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'), + (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'), + (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'), + (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'), + (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'), + (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'), + (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'), - direction = 'ltr' + (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'), + (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'), + (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'), - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] + (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'), + (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'), + (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'), - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'), + (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'), + (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'), + (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'), - feeds = [ - (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ] - - - keep_only_tags = [dict(name='div', attrs={'id':'principal'})] - - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})] - - def preprocess_html(self, soup): - soup.html['dir' ] = self.direction - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mcharset) - for item in soup.findAll(style=True): - del item['style'] - return soup + (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'), + (u'Cataluña', u'http://estaticos.expansion.com/rss/catalunya.xml'), + (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml') + ] diff --git a/resources/recipes/msnbc.recipe b/resources/recipes/msnbc.recipe index 6e2fc50aaa..f093479e2f 100644 --- a/resources/recipes/msnbc.recipe +++ b/resources/recipes/msnbc.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' msnbc.msn.com ''' @@ -19,7 +19,16 @@ class MsNBC(BasicNewsRecipe): publisher = 'msnbc.com' category = 'news, USA, world' language = 'en' - extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} ' + extra_css = """ + body{ font-family: Georgia,Times,serif } + .hide{display: none} + .caption{font-family: Arial,sans-serif; font-size: x-small} + .entry-summary{font-family: Arial,sans-serif} + .copyright{font-size: 0.95em; font-style: italic} + .source-org{font-size: small; font-family: Arial,sans-serif} + img{display: block; margin-bottom: 0.5em} + span.byline{display: none} + """ conversion_options = { 'comments' : description @@ -28,14 +37,20 @@ class MsNBC(BasicNewsRecipe): ,'publisher': publisher } - preprocess_regexps = [ - (re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '') - ,(re.compile(r'
', re.DOTALL|re.IGNORECASE),lambda match: '
'), - ] + remove_tags_before = dict(name='h1', attrs={'id':'headline'}) + remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']}) + keep_only_tags=[ + dict(attrs={'id':['headline','deck','byline','source','intelliTXT']}) + ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']}) + ] + remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace'] + + remove_tags = [ + dict(name=['iframe','object','link','embed','meta','table']) + ,dict(name='span', attrs={'class':['copyright','Linear copyright']}) + ,dict(name='div', attrs={'class':'social'}) + ] - remove_tags_before = dict(name='div', attrs={'class':'head'}) - remove_tags_after = dict(name='div', attrs={'class':'copyright'}) - remove_tags = [dict(name=['iframe','object','link','script','form'])] feeds = [ (u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' ) @@ -48,11 +63,26 @@ class MsNBC(BasicNewsRecipe): ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' ) ] - def print_version(self, url): - return url + 'print/1/displaymode/1098/' - def preprocess_html(self, soup): - for item in soup.head.findAll('div'): - item.extract() + for item in soup.body.findAll('html'): + item.name='div' + for item in soup.body.findAll('div'): + if item.has_key('id') and item['id'].startswith('vine-'): + item.extract() + if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')): + item.extract() + for item in soup.body.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + for item in soup.body.findAll('ol'): + if item.has_key('class') and item['class'].startswith('grid'): + item.extract() + for item in soup.body.findAll('span'): + if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ): + item.extract() + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) return soup diff --git a/resources/recipes/technology_review.recipe b/resources/recipes/technology_review.recipe index cc8f13733e..e7cc6700d7 100644 --- a/resources/recipes/technology_review.recipe +++ b/resources/recipes/technology_review.recipe @@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe): def get_article_url(self, article): return article.get('guid', article.get('id', None)) - def print_version(self, url): baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' split1 = string.split(url,"/") @@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe): split2= string.split(xxx,"/") s = baseurl + split2[0] return s + + + def postprocess_html(self,soup, True): + #remove picture + headerhtml = soup.find(True, {'class':'header'}) + headerhtml.replaceWith("") + + #remove close button + closehtml = soup.find(True, {'class':'close'}) + closehtml.replaceWith("") + + #remove banner advertisement + bannerhtml = soup.find(True, {'class':'bannerad'}) + bannerhtml.replaceWith("") + + #thanks kiklop74! This code removes all links from the text + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + + return soup diff --git a/resources/recipes/zerohedge.recipe b/resources/recipes/zerohedge.recipe new file mode 100644 index 0000000000..09f62e5b52 --- /dev/null +++ b/resources/recipes/zerohedge.recipe @@ -0,0 +1,33 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +www.zerohedge.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class ZeroHedge(BasicNewsRecipe): + title = 'Zero Hedge' + __author__ = 'Darko Miletic' + description = 'On a long enough timeline the survival rate for everyone drops to zero' + oldest_article = 10 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + encoding = 'utf8' + publisher = 'zero hedge' + category = 'news, USA, world, economy, politics' + language = 'en' + masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png' + publication_type = 'blog' + extra_css = 'body{ font-family: sans-serif }' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher': publisher + } + + + feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')] diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index 4009f99201..2edf19d0c4 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -195,7 +195,9 @@ class SearchBar(QWidget): # {{{ x.setToolTip(_("Reset Quick Search")) x = parent.search_highlight_only = QCheckBox() - x.setText(_('Highlight')) + x.setText(_('&Highlight')) + x.setToolTip(_('Highlight matched books in the book list, instead ' + 'of restricting the book list to the matches.')) l.addWidget(x) x = parent.saved_search = SavedSearchBox(self) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 349800c8ba..98cc4b7ecd 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1524,19 +1524,19 @@ class EPUB_MOBI(CatalogPlugin): this_title['formats'] = formats # Add user notes to be displayed in header - # Special case handling for datetime fields + # Special case handling for datetime fields and lists if self.opts.header_note_source_field: field_md = self.__db.metadata_for_field(self.opts.header_note_source_field) notes = self.__db.get_field(record['id'], self.opts.header_note_source_field, index_is_id=True) - if notes and field_md['datatype'] == 'datetime': - # Reformat date fields to match UI presentation: dd MMM YYYY - notes = format_date(notes,'dd MMM yyyy') - if notes: + if field_md['datatype'] == 'text' and isinstance(notes,list): + notes = ' · '.join(notes) + elif field_md['datatype'] == 'datetime': + notes = format_date(notes,'dd MMM yyyy') this_title['notes'] = {'source':field_md['name'], - 'content':notes} + 'content':notes} titles.append(this_title)