From a3bb2c06fb2d4f1b1da2214295f59334d621e092 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 30 Aug 2010 18:59:15 -0600 Subject: [PATCH] Updated La Jornada --- resources/images/news/la_jornada.png | Bin 0 -> 943 bytes resources/recipes/clarin.recipe | 2 +- resources/recipes/europasur.recipe | 2 +- resources/recipes/la_jornada.recipe | 160 +++++++++------------------ src/calibre/manual/gui.rst | 2 +- 5 files changed, 55 insertions(+), 111 deletions(-) create mode 100644 resources/images/news/la_jornada.png diff --git a/resources/images/news/la_jornada.png b/resources/images/news/la_jornada.png new file mode 100644 index 0000000000000000000000000000000000000000..718731a380702fbcf3de68792f80a1bf94f6ebc8 GIT binary patch literal 943 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?^H&x;TbdoNk>Qn=`po;%NQ<`siI{hqsAD z%4%;|X>g2da$({jwwMLSHaLd##7XsB+2{1elqZ zbLN~1cTdP(y`gZUOzprY=0)g)C z<@@?x?z``?=dawt^H!S6)eVmw)8vUe?zN1efaQhtA(o!>v4YjW3igsrjp zzCl&>mCjZ`_@^P8ilxy5? zCM5|nXwN(($;Ft^bH`D7@nVVXd5?Km3fPz|R%um=O_}$5udQ{r@s7>0p;xY-O!@A~ zv_4GWWcSnD#uQN@;^ML&fdFhTk`YI9f6CI zYK4=gZ&bVV;KCJIyCZv4!uEVoT7O6Cm^7d9p1;01x0e;@dR<*6W+OdY^Qdsd_jirg zuE<*Uub)qS`D)eGqW?bc_ZaC~B%BwK7o2pw@ATHT zuR_(YV?F2oaLhS8;s59Vtg82VUGIzQ_yH5VYKdz^NlIc#s#S7PDv)9@GB7mIHL%b% zHV82^u`)5RGBVUPFtai+xaAwT1Vuw`eoAIqC5i?!D-&}oBQuDGH|CWf4Gf;HelF{r G5}E)MpsW%A literal 0 HcmV?d00001 diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe index 7bbb663d1d..cf9440ad55 100644 --- a/resources/recipes/clarin.recipe +++ b/resources/recipes/clarin.recipe @@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True encoding = 'utf8' - language = 'es_AR' + language = 'es' publication_type = 'newspaper' INDEX = 'http://www.clarin.com' masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' diff --git a/resources/recipes/europasur.recipe b/resources/recipes/europasur.recipe index 3179c83234..cda111e995 100644 --- a/resources/recipes/europasur.recipe +++ b/resources/recipes/europasur.recipe @@ -20,7 +20,7 @@ class Europasur(BasicNewsRecipe): delay = 2 no_stylesheets = True encoding = 'cp1252' - language = 'es_ES' + language = 'es' publication_type = 'newspaper' extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif} h2{font-family: Georgia,Times New Roman,Times,serif} diff --git a/resources/recipes/la_jornada.recipe b/resources/recipes/la_jornada.recipe index edcd1ec9a7..2e1a3bb50d 100644 --- a/resources/recipes/la_jornada.recipe +++ b/resources/recipes/la_jornada.recipe @@ -1,120 +1,64 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2010, Rogelio Dominguez ' +__copyright__ = '2010, Darko Miletic ' ''' www.jornada.unam.mx ''' +from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup -import re +class LaJornada_mx(BasicNewsRecipe): + title = 'La Jornada (Mexico)' + __author__ = 'Darko Miletic' + description = 'Noticias del diario mexicano La Jornada' + publisher = 'DEMOS, Desarrollo de Medios, S.A. de C.V.' + category = 'news, Mexico' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es' + remove_empty_feeds = True + cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/planitas/portadita.jpg") + masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png' + extra_css = """ + body{font-family: "Times New Roman",serif } + .cabeza{font-size: xx-large; font-weight: bold } + .credito-articulo{font-size: 1.3em} + """ -class LaJornada(BasicNewsRecipe): - title = u'La Jornada' - language = 'es' - oldest_article = 1 - __author__ = 'rogeliodh' - max_articles_per_feed = 100 - remove_tags = [dict(name='div', attrs={'class':['go gui','go gui top','comment-cont',]})] - remove_tags_before = dict(id='article-cont') - remove_tags_after = dict(id='article-cont') - no_stylesheets = True - extra_css = ' .series{ \ - border-bottom: 1px solid #626366; \ - font-weight: bold; \ - } \ - .sumario{ \ - font-weight: bold; \ - margin-top: 2em; \ - text-align: center \ - } \ - p.sumario{ \ - text-align: center \ - } \ - .sumarios{font-weight: bold} \ - .cabeza{ font-size: 1.5em} \ - .pie-foto { \ - text-align: justify; \ - font-size: 0.8em; \ - text-align: justify; \ - } \ - .pie-foto .credito { \ - font-weight: bold; \ - display: block \ - } \ - .credito-autor{ \ - margin-top: 1.5em; \ - padding-left: 0.6em; \ - border-bottom: 1px solid #626366; \ - font-variant: small-caps; \ - font-weight: bold \ - } \ - .credito-articulo{ \ - margin-top: 1.5em; \ - padding-left: 0.6em; \ - border-bottom: 1px solid #626366; \ - font-variant: small-caps; \ - font-weight: bold \ - } \ - .credito-titulo{text-align: right} \ - .hemero { \ - text-align: right; \ - font-size: 0.9em; \ - margin-bottom: 8px; \ - } \ - .loc { \ - font-weight: bold; \ - } \ - .carton { \ - text-align: center; \ - } \ - .credit { \ - font-weight: bold; \ - } \ - ' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - preprocess_regexps = [ - # Remove capitalized initial letter on some articles (editorial) - (re.compile(r'
(.*)

', re.DOTALL|re.IGNORECASE), - lambda match: match.group(1)), - # Cartons section uses a class instead of a div to identify the main content. Change it. - (re.compile(r'class="carton"', re.DOTALL|re.IGNORECASE), - lambda match: 'id="article-cont" class="carton"'), - # Remove as calibre has a bug (to report) - (re.compile(r'', re.DOTALL|re.IGNORECASE), - lambda match: ''), - ] - - INDEX = 'http://www.jornada.unam.mx/rss/edicion.xml' - feeds = [ - (u'Opinion','http://www.jornada.unam.mx/rss/opinion.xml'), - (u'Cartones','http://www.jornada.unam.mx/rss/cartones.xml'), - (u'Política','http://www.jornada.unam.mx/rss/politica.xml'), - (u'Economía','http://www.jornada.unam.mx/rss/economia.xml'), - (u'Mundo','http://www.jornada.unam.mx/rss/mundo.xml'), - (u'Estados','http://www.jornada.unam.mx/rss/estados.xml'), - (u'Capital','http://www.jornada.unam.mx/rss/capital.xml'), - (u'Sociedad','http://www.jornada.unam.mx/rss/sociedad.xml'), - (u'Ciencias','http://www.jornada.unam.mx/rss/ciencias.xml'), - (u'Cultura','http://www.jornada.unam.mx/rss/cultura.xml'), - (u'Gastronomia','http://www.jornada.unam.mx/rss/gastronomia.xml'), - (u'Espectáculos','http://www.jornada.unam.mx/rss/espectaculos.xml'), - (u'Deportes','http://www.jornada.unam.mx/rss/deportes.xml'), + keep_only_tags = [ + dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','text']}) + ,dict(name='div', attrs={'id':'renderComments'}) ] + remove_tags = [dict(name='div', attrs={'class':'buttonbar'})] - def get_cover_url(self): - ''' - Cover URL is http://www.jornada.unam.mx/YYYY/MM/DD/portada.pdf - ''' - cover_url = None - soup = self.index_to_soup(self.INDEX) - soupstone = BeautifulStoneSoup(str(soup)) - urlbase = str(soupstone('link')[0]) - r= re.compile(r'.*http://www.jornada.unam.mx/([0-9]{4})/([0-9]{2})/([0-9]{2})', re.DOTALL|re.IGNORECASE) - m = r.match(urlbase) - if m: - cover_url = 'http://www.jornada.unam.mx/' + m.groups()[0] + '/' + m.groups()[1] + '/' + m.groups()[2] + '/portada.pdf' + feeds = [ + (u'Ultimas noticias' , u'http://www.jornada.unam.mx/ultimas/news/RSS' ) + ,(u'Opinion' , u'http://www.jornada.unam.mx/rss/opinion.xml' ) + ,(u'Politica' , u'http://www.jornada.unam.mx/rss/politica.xml' ) + ,(u'Economia' , u'http://www.jornada.unam.mx/rss/economia.xml' ) + ,(u'Mundo' , u'http://www.jornada.unam.mx/rss/mundo.xml' ) + ,(u'Estados' , u'http://www.jornada.unam.mx/rss/estados.xml' ) + ,(u'Capital' , u'http://www.jornada.unam.mx/rss/capital.xml' ) + ,(u'Sociedad y justicia' , u'http://www.jornada.unam.mx/rss/sociedad.xml' ) + ,(u'Ciencias' , u'http://www.jornada.unam.mx/rss/ciencias.xml' ) + ,(u'Cultura' , u'http://www.jornada.unam.mx/rss/cultura.xml' ) + ,(u'Gastronomia' , u'http://www.jornada.unam.mx/rss/gastronomia.xml' ) + ,(u'Espectaculos' , u'http://www.jornada.unam.mx/rss/espectaculos.xml' ) + ,(u'Deportes' , u'http://www.jornada.unam.mx/rss/deportes.xml' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup - return cover_url diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 6016b072de..e9573e91be 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -166,7 +166,7 @@ Search & Sort The Search & Sort section allows you to perform several powerful actions on your book collections. - * You can sort them by title, author, date, rating etc. by clicking on the column titles. + * You can sort them by title, author, date, rating etc. by clicking on the column titles. You can also sub-sort (i.e. sort on multiple columns). For example, if you click on the title column and then the author column, the book will be sorted by author and then all the entries for the same author will be sorted by title. * You can search for a particular book or set of books using the search bar. More on that below.