diff --git a/resources/images/news/la_jornada.png b/resources/images/news/la_jornada.png new file mode 100644 index 0000000000..718731a380 Binary files /dev/null and b/resources/images/news/la_jornada.png differ diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe index 7bbb663d1d..cf9440ad55 100644 --- a/resources/recipes/clarin.recipe +++ b/resources/recipes/clarin.recipe @@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True encoding = 'utf8' - language = 'es_AR' + language = 'es' publication_type = 'newspaper' INDEX = 'http://www.clarin.com' masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' diff --git a/resources/recipes/europasur.recipe b/resources/recipes/europasur.recipe index 3179c83234..cda111e995 100644 --- a/resources/recipes/europasur.recipe +++ b/resources/recipes/europasur.recipe @@ -20,7 +20,7 @@ class Europasur(BasicNewsRecipe): delay = 2 no_stylesheets = True encoding = 'cp1252' - language = 'es_ES' + language = 'es' publication_type = 'newspaper' extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif} h2{font-family: Georgia,Times New Roman,Times,serif} diff --git a/resources/recipes/la_jornada.recipe b/resources/recipes/la_jornada.recipe index edcd1ec9a7..2e1a3bb50d 100644 --- a/resources/recipes/la_jornada.recipe +++ b/resources/recipes/la_jornada.recipe @@ -1,120 +1,64 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2010, Rogelio Dominguez ' +__copyright__ = '2010, Darko Miletic ' ''' www.jornada.unam.mx ''' +from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup -import re +class LaJornada_mx(BasicNewsRecipe): + title = 'La Jornada (Mexico)' + __author__ = 'Darko Miletic' + description = 'Noticias del diario mexicano La Jornada' + publisher = 'DEMOS, Desarrollo de Medios, S.A. de C.V.' + category = 'news, Mexico' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es' + remove_empty_feeds = True + cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/planitas/portadita.jpg") + masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png' + extra_css = """ + body{font-family: "Times New Roman",serif } + .cabeza{font-size: xx-large; font-weight: bold } + .credito-articulo{font-size: 1.3em} + """ -class LaJornada(BasicNewsRecipe): - title = u'La Jornada' - language = 'es' - oldest_article = 1 - __author__ = 'rogeliodh' - max_articles_per_feed = 100 - remove_tags = [dict(name='div', attrs={'class':['go gui','go gui top','comment-cont',]})] - remove_tags_before = dict(id='article-cont') - remove_tags_after = dict(id='article-cont') - no_stylesheets = True - extra_css = ' .series{ \ - border-bottom: 1px solid #626366; \ - font-weight: bold; \ - } \ - .sumario{ \ - font-weight: bold; \ - margin-top: 2em; \ - text-align: center \ - } \ - p.sumario{ \ - text-align: center \ - } \ - .sumarios{font-weight: bold} \ - .cabeza{ font-size: 1.5em} \ - .pie-foto { \ - text-align: justify; \ - font-size: 0.8em; \ - text-align: justify; \ - } \ - .pie-foto .credito { \ - font-weight: bold; \ - display: block \ - } \ - .credito-autor{ \ - margin-top: 1.5em; \ - padding-left: 0.6em; \ - border-bottom: 1px solid #626366; \ - font-variant: small-caps; \ - font-weight: bold \ - } \ - .credito-articulo{ \ - margin-top: 1.5em; \ - padding-left: 0.6em; \ - border-bottom: 1px solid #626366; \ - font-variant: small-caps; \ - font-weight: bold \ - } \ - .credito-titulo{text-align: right} \ - .hemero { \ - text-align: right; \ - font-size: 0.9em; \ - margin-bottom: 8px; \ - } \ - .loc { \ - font-weight: bold; \ - } \ - .carton { \ - text-align: center; \ - } \ - .credit { \ - font-weight: bold; \ - } \ - ' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - preprocess_regexps = [ - # Remove capitalized initial letter on some articles (editorial) - (re.compile(r'
(.*)

', re.DOTALL|re.IGNORECASE), - lambda match: match.group(1)), - # Cartons section uses a class instead of a div to identify the main content. Change it. - (re.compile(r'class="carton"', re.DOTALL|re.IGNORECASE), - lambda match: 'id="article-cont" class="carton"'), - # Remove as calibre has a bug (to report) - (re.compile(r'', re.DOTALL|re.IGNORECASE), - lambda match: ''), - ] - - INDEX = 'http://www.jornada.unam.mx/rss/edicion.xml' - feeds = [ - (u'Opinion','http://www.jornada.unam.mx/rss/opinion.xml'), - (u'Cartones','http://www.jornada.unam.mx/rss/cartones.xml'), - (u'Política','http://www.jornada.unam.mx/rss/politica.xml'), - (u'Economía','http://www.jornada.unam.mx/rss/economia.xml'), - (u'Mundo','http://www.jornada.unam.mx/rss/mundo.xml'), - (u'Estados','http://www.jornada.unam.mx/rss/estados.xml'), - (u'Capital','http://www.jornada.unam.mx/rss/capital.xml'), - (u'Sociedad','http://www.jornada.unam.mx/rss/sociedad.xml'), - (u'Ciencias','http://www.jornada.unam.mx/rss/ciencias.xml'), - (u'Cultura','http://www.jornada.unam.mx/rss/cultura.xml'), - (u'Gastronomia','http://www.jornada.unam.mx/rss/gastronomia.xml'), - (u'Espectáculos','http://www.jornada.unam.mx/rss/espectaculos.xml'), - (u'Deportes','http://www.jornada.unam.mx/rss/deportes.xml'), + keep_only_tags = [ + dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','text']}) + ,dict(name='div', attrs={'id':'renderComments'}) ] + remove_tags = [dict(name='div', attrs={'class':'buttonbar'})] - def get_cover_url(self): - ''' - Cover URL is http://www.jornada.unam.mx/YYYY/MM/DD/portada.pdf - ''' - cover_url = None - soup = self.index_to_soup(self.INDEX) - soupstone = BeautifulStoneSoup(str(soup)) - urlbase = str(soupstone('link')[0]) - r= re.compile(r'.*http://www.jornada.unam.mx/([0-9]{4})/([0-9]{2})/([0-9]{2})', re.DOTALL|re.IGNORECASE) - m = r.match(urlbase) - if m: - cover_url = 'http://www.jornada.unam.mx/' + m.groups()[0] + '/' + m.groups()[1] + '/' + m.groups()[2] + '/portada.pdf' + feeds = [ + (u'Ultimas noticias' , u'http://www.jornada.unam.mx/ultimas/news/RSS' ) + ,(u'Opinion' , u'http://www.jornada.unam.mx/rss/opinion.xml' ) + ,(u'Politica' , u'http://www.jornada.unam.mx/rss/politica.xml' ) + ,(u'Economia' , u'http://www.jornada.unam.mx/rss/economia.xml' ) + ,(u'Mundo' , u'http://www.jornada.unam.mx/rss/mundo.xml' ) + ,(u'Estados' , u'http://www.jornada.unam.mx/rss/estados.xml' ) + ,(u'Capital' , u'http://www.jornada.unam.mx/rss/capital.xml' ) + ,(u'Sociedad y justicia' , u'http://www.jornada.unam.mx/rss/sociedad.xml' ) + ,(u'Ciencias' , u'http://www.jornada.unam.mx/rss/ciencias.xml' ) + ,(u'Cultura' , u'http://www.jornada.unam.mx/rss/cultura.xml' ) + ,(u'Gastronomia' , u'http://www.jornada.unam.mx/rss/gastronomia.xml' ) + ,(u'Espectaculos' , u'http://www.jornada.unam.mx/rss/espectaculos.xml' ) + ,(u'Deportes' , u'http://www.jornada.unam.mx/rss/deportes.xml' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup - return cover_url diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 6016b072de..e9573e91be 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -166,7 +166,7 @@ Search & Sort The Search & Sort section allows you to perform several powerful actions on your book collections. - * You can sort them by title, author, date, rating etc. by clicking on the column titles. + * You can sort them by title, author, date, rating etc. by clicking on the column titles. You can also sub-sort (i.e. sort on multiple columns). For example, if you click on the title column and then the author column, the book will be sorted by author and then all the entries for the same author will be sorted by title. * You can search for a particular book or set of books using the search bar. More on that below.