From a3bb2c06fb2d4f1b1da2214295f59334d621e092 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 30 Aug 2010 18:59:15 -0600
Subject: [PATCH] Updated La Jornada

---
 resources/images/news/la_jornada.png | Bin 0 -> 943 bytes
 resources/recipes/clarin.recipe      |   2 +-
 resources/recipes/europasur.recipe   |   2 +-
 resources/recipes/la_jornada.recipe  | 160 +++++++++------------------
 src/calibre/manual/gui.rst           |   2 +-
 5 files changed, 55 insertions(+), 111 deletions(-)
 create mode 100644 resources/images/news/la_jornada.png

diff --git a/resources/images/news/la_jornada.png b/resources/images/news/la_jornada.png
new file mode 100644
index 0000000000000000000000000000000000000000..718731a380702fbcf3de68792f80a1bf94f6ebc8
GIT binary patch
literal 943
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?^H&x;TbdoNk>Qn=`po;%NQ<`siI{hqsAD
z%4%;|X>g2da$({jwwMLSHaLd##7Xs<tIX1w$r&Rd+;NOW&p<-BN%6=L>B+2{1elqZ
zbLN~1cTdP(y`gZU<Lg})&ux2sd++<($Gbm1lxdl4Q~tcN=+m6yDIFh7QqIkr-1%7j
zgWIf#O{<$vc}$LS5Sm!|`TW%tcm6($*1WyVMOpV!|L==DdoSu4D=+>OzprY=0)g)C
z<@@?x?z``?=dawt^H!S6)eVmw)8vUe?zN1efaQ<Lmy*ZXch@j{aaO%~?{mybzR5c+
z9E#sq&6}#d;^iwBO^x#+h3m6je$P96F2%ECk8|OdK8AkLC+*3fzF&MF`SogDUS2Kl
z`CDv9HqEfR-)?Kl+ED%V^o)}tPh<G%kMl`Y@0zNm8eH`Fs@as&M?anaK5@T6L-Ebe
zSISC*I{1t}d8+p<bxixWqi2TNyJK6YBozj9^(q&BSro7}>htA(o!>v4YjW3igsrjp
zz<RDBeyXpq^1KBF3+H!v2s4zaHGiveXnCVw`{Gc5&$0b>Cl&>mCjZ`_@^P8ilxy5?
zCM5|nXwN(($;Ft^bH`D7@nVVXd5?Km3fPz|R%um=O_}$5udQ{r@s7>0p;xY-O!@A~
zv_4<GzWBvS@2U4JrJEhsu={^q!?F16Nl(u=x6Xc<8F%K5$;@EeJNDJ@3T?J7%epOg
zX5;Q+_CI!Se8d`#uALqD?ai7QFHMdfiGR}GR{3x-e}`F)!jFG>GWWcSnD<Y9E$`69
zwyr=oNGs8I$w#yN1(hErUApx&=-n)-P@g|1Bd*6C>#uQN@;^ML&fdFhTk`YI9f6CI
zYK4=gZ&bVV;KCJIyCZv4!uEVoT7O6Cm^7d9p1;01x0e;@dR<*6W+OdY^Qdsd_jirg
zuE<*U<Y&*WN}9RM@4wFW?=@Osdg|d>ub)qS`D)eGqW?bc_ZaC~B%BwK7o2pw@ATHT
zuR_(YV?F2oaLhS8;s59Vtg82VUGIzQ_yH5VYKdz^NlIc#s#S7PDv)9@GB7mIHL%b%
zHV82^u`)5RGBVUPFtai+xaAwT1Vuw`eoAIqC5i?!D-&}oBQuDGH|CWf4Gf;HelF{r
G5}E)MpsW%A

literal 0
HcmV?d00001

diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe
index 7bbb663d1d..cf9440ad55 100644
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
     use_embedded_content  = False
     no_stylesheets        = True
     encoding              = 'utf8'
-    language              = 'es_AR'
+    language              = 'es'
     publication_type      = 'newspaper'
     INDEX                 = 'http://www.clarin.com'
     masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
diff --git a/resources/recipes/europasur.recipe b/resources/recipes/europasur.recipe
index 3179c83234..cda111e995 100644
--- a/resources/recipes/europasur.recipe
+++ b/resources/recipes/europasur.recipe
@@ -20,7 +20,7 @@ class Europasur(BasicNewsRecipe):
     delay                 = 2
     no_stylesheets        = True
     encoding              = 'cp1252'
-    language              = 'es_ES'
+    language              = 'es'
     publication_type      = 'newspaper'
     extra_css             = """ body{font-family: Verdana,Arial,Helvetica,sans-serif}
                                 h2{font-family: Georgia,Times New Roman,Times,serif}
diff --git a/resources/recipes/la_jornada.recipe b/resources/recipes/la_jornada.recipe
index edcd1ec9a7..2e1a3bb50d 100644
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@@ -1,120 +1,64 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2010, Rogelio Dominguez <rogelio.dominguez at gmail.com>'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.jornada.unam.mx
 '''
 
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 
-import re
+class LaJornada_mx(BasicNewsRecipe):
+    title                 = 'La Jornada (Mexico)'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias del diario mexicano La Jornada'
+    publisher             = 'DEMOS, Desarrollo de Medios, S.A. de C.V.'
+    category              = 'news, Mexico'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    cover_url             = strftime("http://www.jornada.unam.mx/%Y/%m/%d/planitas/portadita.jpg")
+    masthead_url          = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
+    extra_css             = """
+                                body{font-family: "Times New Roman",serif }
+                                .cabeza{font-size: xx-large; font-weight: bold }
+                                .credito-articulo{font-size: 1.3em}
+                            """
 
-class LaJornada(BasicNewsRecipe):
-    title          = u'La Jornada'
-    language       = 'es'
-    oldest_article = 1
-    __author__ = 'rogeliodh'
-    max_articles_per_feed = 100
-    remove_tags    = [dict(name='div', attrs={'class':['go gui','go gui top','comment-cont',]})]
-    remove_tags_before = dict(id='article-cont')
-    remove_tags_after = dict(id='article-cont')
-    no_stylesheets = True
-    extra_css      = ' .series{ \
-                               border-bottom: 1px solid #626366; \
-                               font-weight: bold; \
-                               } \
-                       .sumario{ \
-                               font-weight: bold; \
-                               margin-top: 2em; \
-                               text-align: center \
-                               } \
-                       p.sumario{ \
-                               text-align: center \
-                               } \
-                       .sumarios{font-weight: bold} \
-                       .cabeza{	font-size: 1.5em} \
-                       .pie-foto { \
-                               text-align: justify; \
-                               font-size: 0.8em; \
-                               text-align: justify; \
-                               } \
-                        .pie-foto .credito { \
-                               font-weight: bold; \
-                               display: block \
-                               } \
-                       .credito-autor{ \
-                               margin-top: 1.5em; \
-                               padding-left: 0.6em; \
-                               border-bottom: 1px solid #626366; \
-                               font-variant: small-caps; \
-                               font-weight: bold \
-                               } \
-                       .credito-articulo{ \
-                               margin-top: 1.5em; \
-                               padding-left: 0.6em; \
-                               border-bottom: 1px solid #626366; \
-                               font-variant: small-caps; \
-                               font-weight: bold \
-                               } \
-                       .credito-titulo{text-align: right} \
-                       .hemero { \
-                               text-align: right; \
-                               font-size: 0.9em; \
-                               margin-bottom: 8px; \
-                               } \
-                       .loc    { \
-                               font-weight: bold; \
-                               } \
-                       .carton  { \
-                               text-align: center; \
-                               } \
-                       .credit { \
-                               font-weight: bold; \
-                               } \
-                              '
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
 
-    preprocess_regexps = [
-                         # Remove capitalized initial letter on some articles (editorial)
-    		       	 (re.compile(r'<div class="inicial">(.*)</div><p class="s-s">', re.DOTALL|re.IGNORECASE),
-        		  lambda match: match.group(1)),
-                         # Cartons section uses a class instead of a div to identify the main content. Change it.
-    		       	 (re.compile(r'class="carton"', re.DOTALL|re.IGNORECASE),
-        		  lambda match: 'id="article-cont" class="carton"'),
-                         # Remove <link rel="alternate"> as calibre has a bug (to report)
-    		       	 (re.compile(r'<link rel="alternate".*?/>', re.DOTALL|re.IGNORECASE),
-        		  lambda match: ''),
-        		 ]
-
-    INDEX          = 'http://www.jornada.unam.mx/rss/edicion.xml'
-    feeds          = [
-                     (u'Opinion','http://www.jornada.unam.mx/rss/opinion.xml'),
-                     (u'Cartones','http://www.jornada.unam.mx/rss/cartones.xml'),
-                     (u'Política','http://www.jornada.unam.mx/rss/politica.xml'),
-                     (u'Economía','http://www.jornada.unam.mx/rss/economia.xml'),
-                     (u'Mundo','http://www.jornada.unam.mx/rss/mundo.xml'),
-                     (u'Estados','http://www.jornada.unam.mx/rss/estados.xml'),
-                     (u'Capital','http://www.jornada.unam.mx/rss/capital.xml'),
-                     (u'Sociedad','http://www.jornada.unam.mx/rss/sociedad.xml'),
-                     (u'Ciencias','http://www.jornada.unam.mx/rss/ciencias.xml'),
-                     (u'Cultura','http://www.jornada.unam.mx/rss/cultura.xml'),
-                     (u'Gastronomia','http://www.jornada.unam.mx/rss/gastronomia.xml'),
-                     (u'Espectáculos','http://www.jornada.unam.mx/rss/espectaculos.xml'),
-                     (u'Deportes','http://www.jornada.unam.mx/rss/deportes.xml'),
+    keep_only_tags = [
+                         dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','text']})
+                        ,dict(name='div', attrs={'id':'renderComments'})
                      ]
+    remove_tags = [dict(name='div', attrs={'class':'buttonbar'})]
 
-    def get_cover_url(self):
-        '''
-        Cover URL is http://www.jornada.unam.mx/YYYY/MM/DD/portada.pdf
-        '''
-        cover_url = None
-        soup = self.index_to_soup(self.INDEX)
-        soupstone = BeautifulStoneSoup(str(soup))
-        urlbase = str(soupstone('link')[0])
-        r= re.compile(r'.*http://www.jornada.unam.mx/([0-9]{4})/([0-9]{2})/([0-9]{2})', re.DOTALL|re.IGNORECASE)
-        m = r.match(urlbase)
-        if m:
-              cover_url = 'http://www.jornada.unam.mx/' + m.groups()[0] + '/' + m.groups()[1] + '/' + m.groups()[2] + '/portada.pdf'
+    feeds = [
+              (u'Ultimas noticias'    , u'http://www.jornada.unam.mx/ultimas/news/RSS'     )
+             ,(u'Opinion'             , u'http://www.jornada.unam.mx/rss/opinion.xml'      )
+             ,(u'Politica'            , u'http://www.jornada.unam.mx/rss/politica.xml'     )
+             ,(u'Economia'            , u'http://www.jornada.unam.mx/rss/economia.xml'     )
+             ,(u'Mundo'               , u'http://www.jornada.unam.mx/rss/mundo.xml'        )
+             ,(u'Estados'             , u'http://www.jornada.unam.mx/rss/estados.xml'      )
+             ,(u'Capital'             , u'http://www.jornada.unam.mx/rss/capital.xml'      )
+             ,(u'Sociedad y justicia' , u'http://www.jornada.unam.mx/rss/sociedad.xml'     )
+             ,(u'Ciencias'            , u'http://www.jornada.unam.mx/rss/ciencias.xml'     )
+             ,(u'Cultura'             , u'http://www.jornada.unam.mx/rss/cultura.xml'      )
+             ,(u'Gastronomia'         , u'http://www.jornada.unam.mx/rss/gastronomia.xml'  )
+             ,(u'Espectaculos'        , u'http://www.jornada.unam.mx/rss/espectaculos.xml' )
+             ,(u'Deportes'            , u'http://www.jornada.unam.mx/rss/deportes.xml'     )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
 
-        return cover_url
diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst
index 6016b072de..e9573e91be 100644
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@@ -166,7 +166,7 @@ Search & Sort
 
 The Search & Sort section allows you to perform several powerful actions on your book collections.
 
-    * You can sort them by title, author, date, rating etc. by clicking on the column titles.
+    * You can sort them by title, author, date, rating etc. by clicking on the column titles. You can also sub-sort (i.e. sort on multiple columns). For example, if you click on the title column and then the author column, the book will be sorted by author and then all the entries for the same author will be sorted by title.
                 
     * You can search for a particular book or set of books using the search bar. More on that below.