Merge from trunk

2025-08-30 23:00:21 -04:00 · 2011-10-02 08:09:01 +02:00 · 2011-10-02 08:09:01 +02:00 · 2e4533811a
commit 2e4533811a
parent c39fe2f531 fd483b05f9
199 changed files with 147109 additions and 143638 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,97 @@
 #  new recipes:
 #    - title: 

+- version: 0.8.21
+  date: 2011-09-30
+
+  new features:
+    - title: "A Tips and Tricks blog at http://blog.calibre-ebook.com to introduce less well known calibre features in a simple way"
+
+    - title: "News download: Add list of articles in the downloaded issue to the comments metadata of the generated ebook. Makes it possible to search for a particular article in the calibre library."
+      ticket: [851717]
+
+    - title: "Toolbar buttons: You can now also right click the buttons to bring the popup of extra actions, in addition to clicking the small arrow next to the button." 
+
+    - title: "Amazon metadata download plugin: Add option to download metadata from amazon.es"
+
+    - title: Driver for Vizio and iRobot A9 Android tablets
+      tickets: [854408,862175] 
+
+    - title: "When switching to/starting with a library with a corrupted database, offer the user the option of rebuilding the database instead of erroring out."
+
+    - title: "Template language: Add list_equals function"
+
+    - title: "Add a special output profile for the PocketBook 900 as it does not resize images correctly by itself"
+  
+  bug fixes:
+    - title: "Fix regression that cause PDF Output to generate very large files"
+
+    - title: Fix Title Sort field not being displayed in Book details panel
+
+    - title: Prevent renaming of languages in the Tag browser
+      tickets: [860943]
+
+    - title: "Get books: Fix getting price from Foyles"
+
+    - title: "Content server: When a search matches no queries, do not show an error message"
+
+    - title: "ODT Input: Add workaround for ADE to fix centering of block level images when converting to EPUB"
+      tickets: [859343]
+
+    - title: "Content server: When WSGI embedding fix handling of empty URL"
+
+    - title: "RTF Input: Fix spurious spaces inserted after some unicode characters"
+      tickets: [851215]
+
+    - title: "Fix regression that broke clicking on the first letter of author names in the Tag Browser when grouped"
+      tickets: [860615]
+
+    - title: "Fix reading metadata from filenames when the author regexp does not match anything"
+
+    - title: "Fix incorrect display of the month September in Finnish calibre"
+      tickets: [858737]
+
+    - title: "Do not delete the file when the user tries to add a format to a book from a file already in the books directory"
+      tickets: [856158]
+
+    - title: "Fix regression that broke customization of Kobo device plugin"
+
+    - title: "Allow user defined templates to be used in save to disk"
+
+  improved recipes:
+    - Read It Later
+    - American Spectator
+    - Sydney Morning Herald
+    - Chicago Tribune
+    - American Prospect
+    - DNA India
+    - Times of India
+    - Kurier
+    - xkcd
+    - Cnet
+
+  new recipes:
+    - title: Various Colombian news sources
+      author: BIGO-CAVA
+
+    - title: Gosc Niedzielny
+      author: Piotr Kontek
+
+    - title: Leipzer Volkszeitung 
+      author: a.peter
+
+    - title: Folha de Sao Paulo (full edition)
+      author: fluzao
+
+    - title: Den of Geek
+      author: Jaded
+
+    - title: Republica
+      author: Manish Bhattarai
+
+    - title: Sign on San Diego
+      author: Jay Kindle
+
 - version: 0.8.20
  date: 2011-09-23

--- a/recipes/amspec.recipe
+++ b/recipes/amspec.recipe
@ -18,25 +18,16 @@ class TheAmericanSpectator(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'en'
    INDEX                 = 'http://spectator.org'
-      
-    conversion_options = {  
+    auto_cleanup = True
+    encoding = 'utf-8'
+
+    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                         }

-    keep_only_tags   = [
-                             dict(name='div', attrs={'class':'post inner'})
-                            ,dict(name='div', attrs={'class':'author-bio'})
-                         ]
-
-    remove_tags     = [
-                             dict(name='object')
-                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
-                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
-                        ]
-                         
    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]

    def get_cover_url(self):
@ -48,10 +39,10 @@ class TheAmericanSpectator(BasicNewsRecipe):
            link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
            cover_url = self.INDEX + link_item2.img['src']
        return cover_url
-          
+
    def print_version(self, url):
        return url + '/print'
-        
+
    def get_article_url(self, article):
        return article.get('guid', None)
-        
+
--- a/recipes/aprospect.recipe
+++ b/recipes/aprospect.recipe
@ -1,26 +1,18 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class AmericanProspect(BasicNewsRecipe):
    title          = u'American Prospect'
-    __author__     = u'Michael Heinz'
-    oldest_article = 30
-    language = 'en'
-    max_articles_per_feed = 100
-    recursions = 0
-    no_stylesheets = True
-    remove_javascript = True
+    __author__     = u'Michael Heinz, a.peter'
+    version        = 2

-    preprocess_regexps = [
-        (re.compile(r'<body.*?<div class="pad_10L10R">', re.DOTALL|re.IGNORECASE), lambda match: '<body><div>'),
-        (re.compile(r'</div>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</div></body>'),
-        (re.compile('\r'),lambda match: ''),
-        (re.compile(r'<!-- .+? -->', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<link .+?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<script.*?</script>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<noscript.*?</noscript>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-        (re.compile(r'<meta .*?/>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-    ]
+    oldest_article        = 30
+    language              = 'en'
+    max_articles_per_feed = 100
+    recursions            = 0
+    no_stylesheets        = True
+    remove_javascript     = True
+
+    keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
+    remove_tags    = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]

    feeds       = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
-
--- a/recipes/chicago_tribune.recipe
+++ b/recipes/chicago_tribune.recipe
@ -8,21 +8,25 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class ChicagoTribune(BasicNewsRecipe):

    title       = 'Chicago Tribune'
-    __author__  = 'Kovid Goyal and Sujata Raman'
+    __author__  = 'Kovid Goyal and Sujata Raman, a.peter'
    description = 'Politics, local and business news from Chicago'
-    language = 'en'
+    language    = 'en'
+    version     = 2

-    use_embedded_content    = False
-    no_stylesheets        = True
-    remove_javascript = True
+    use_embedded_content = False
+    no_stylesheets       = True
+    remove_javascript    = True
+    recursions           = 1

    keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
                      dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
                           ]
-    remove_tags_after = [    {'class':['photo_article',]} ]
+    remove_tags_after = [{'class':['photo_article',]}]

-    remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
-                   {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
+    match_regexps = [r'page=[0-9]+']
+
+    remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
+                   {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
                   dict(name='font',attrs={'id':["cr-other-headlines"]})]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
                    .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-		'''
+                '''
    feeds = [
             ('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
             ('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@ -76,8 +80,12 @@ class ChicagoTribune(BasicNewsRecipe):
        print article.get('feedburner_origlink', article.get('guid', article.get('link')))
        return article.get('feedburner_origlink', article.get('guid', article.get('link')))

-
    def postprocess_html(self, soup, first_fetch):
+        # Remove the navigation bar. It was kept until now to be able to follow
+        # the links to further pages. But now we don't need them anymore.
+        for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
+            nav.extract()
+
        for t in soup.findAll(['table', 'tr', 'td']):
            t.name = 'div'

@ -88,4 +96,3 @@ class ChicagoTribune(BasicNewsRecipe):

        return soup

-
--- a/recipes/diario_la_republica.recipe
+++ b/recipes/diario_la_republica.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1317341449(BasicNewsRecipe):
+    title          = u'Diario La Republica'
+    __author__ = 'CAVALENCIA'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    language = 'es_CO'
+
+    feeds          = [(u'Diario La Republica', u'http://www.larepublica.com.co/rss/larepublica.xml')]
--- a/recipes/el_colombiano.recipe
+++ b/recipes/el_colombiano.recipe
@ -2,12 +2,10 @@

 from calibre.web.feeds.news import BasicNewsRecipe

-
-
 class AdvancedUserRecipe1311790237(BasicNewsRecipe):
    title          = u'Periódico El Colombiano'
-    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
+    language = 'es_CO'
    cover_url     = 'http://www.elcolombiano.com/images/logoElColombiano348x46.gif'
    remove_tags_before = dict(id='contenidoArt')
    remove_tags_after  = dict(id='enviaTips')
--- a/recipes/el_espectador.recipe
+++ b/recipes/el_espectador.recipe
@ -0,0 +1,54 @@
+# coding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ColombiaElEspectador(BasicNewsRecipe):
+    title          = u'Periódico el Espectador'
+    __author__  = 'BIGO-CAVA'
+    cover_url     = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
+    #remove_tags_before = dict(id='fb-root')
+    remove_tags_before = dict(id='content')
+    remove_tags_after  = [dict(name='div', attrs={'class':'paginacion'})]
+    language = 'es_CO'
+    #keep_only_tags = [dict(name='div', id='content')]
+    remove_tags        = [dict(name='div', attrs={'class':'herramientas_nota'}),
+          dict(name='div', attrs={'class':'relpauta'}),
+                          dict(name='div', attrs={'class':'recursosrelacionados'}),
+                         dict(name='div', attrs={'class':'nav_negocios'})]
+   #                       dict(name='div', attrs={'class':'tags_playerrecurso'}),
+    #                      dict(name='div', attrs={'class':'ico-mail2'}),
+      #                    dict(name='div', attrs={'id':'caja-instapaper'}),
+       #                   dict(name='div', attrs={'class':'modulo herramientas'})]
+    oldest_article = 2
+    max_articles_per_feed = 100
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.elespectador.com/sites/elespectador.com/themes/elespectador/images/logo.gif'
+    publication_type      = 'newspaper'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
+                                 """
+
+
+    feeds          = [(u'Política   ', u' http://www.elespectador.com/noticias/politica/feed'),
+                      (u'Judicial', u'http://www.elespectador.com/noticias/judicial/feed'),
+              (u'Paz', u'http://www.elespectador.com/noticias/paz/feed'),
+              (u'Economía', u'http://www.elespectador.com/economia/feed'),
+              (u'Soy Periodista', u'http://www.elespectador.com/noticias/soyperiodista/feed'),
+              (u'Investigación', u'http://www.elespectador.com/noticias/investigacion/feed'),
+              (u'Educación', u'http://www.elespectador.com/noticias/educacion/feed'),
+              (u'Salud', u'http://www.elespectador.com/noticias/salud/feed'),
+              (u'El Mundo', u'http://www.elespectador.com/noticias/elmundo/feed'),
+              (u'Nacional', u'http://www.elespectador.com/noticias/nacional/feed'),
+                                              (u'Bogotá', u'http://www.elespectador.com/noticias/bogota/feed'),
+                                              (u'Deportes', u'http://www.elespectador.com/deportes/feed'),
+                                              (u'Tecnología', u'http://www.elespectador.com/tecnologia/feed'),
+                                              (u'Actualidad', u'http://www.elespectador.com/noticias/actualidad/feed'),
+                                              (u'Opinión', u'http://www.elespectador.com/opinion/feed'),
+              (u'Editorial', u'http://www.elespectador.com/opinion/editorial/feed')]
--- a/recipes/el_mundo_co.recipe
+++ b/recipes/el_mundo_co.recipe
@ -0,0 +1,50 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ColombiaElMundo02(BasicNewsRecipe):
+    title          = u'Periódico El Mundo'
+    __author__  = 'BIGO-CAVA'
+    language = 'es_CO'
+    cover_url     = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
+    remove_tags_before = dict(id='miga_pan')
+    #remove_tags_before = [dict(name='div', attrs={'class':'contenido'})]
+    remove_tags_after  = [dict(name='div', attrs={'class':'cuadro_opciones_new1'})]
+    #keep_only_tags = [dict(name='div', id='miga_pan')]
+    remove_tags        = [dict(name='div', attrs={'class':'ruta'}),
+    dict(name='div', attrs={'class':'buscador'}),
+    dict(name='div', attrs={'class':'iconos'}),
+                          dict(name='div', attrs={'class':'otros_iconos'}),
+                         dict(name='div', attrs={'class':'cuadro_opciones_new1'}),
+                          dict(name='div', attrs={'class':'otras_noticias'}),
+                          dict(name='div', attrs={'class':'notas_relacionadas'}),
+                          dict(name='div', attrs={'id':'lateral_2'})]
+    oldest_article = 2
+    max_articles_per_feed = 100
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.elmundo.com/portal/img/logo_mundo2.png'
+    publication_type      = 'newspaper'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
+                                 """
+
+
+    feeds          = [(u'Opinión', u'http://www.elmundo.com/images/rss/opinion.xml'),
+              (u'Economía', u'http://www.elmundo.com/images/rss/noticias_economia.xml'),
+              (u'Deportes', u'http://www.elmundo.com/images/rss/deportes.xml'),
+              (u'Política   ', u'http://www.elmundo.com/images/rss/noticias_politica.xml'),
+              (u'Antioquia', u'http://www.elmundo.com/images/rss/noticias_antioquia.xml'),
+              (u'Nacional   ', u'http://www.elmundo.com/images/rss/noticias_nacional.xml'),
+              (u'Internacional', u'http://www.elmundo.com/images/rss/noticias_internacional.xml'),
+              (u'Servicios Públicos', u'http://www.elmundo.com/images/rss/noticias_servicios_publicos.xml'),
+              (u'Infraestructura', u'http://www.elmundo.com/images/rss/noticias_infraestructura.xml'),
+                                              (u'Mobilidad', u'http://www.elmundo.com/images/rss/noticias_movilidad.xml'),
+              (u'Derechos Humanos', u'http://www.elmundo.com/images/rss/noticias_derechos_humanos.xml'),
+                                              (u'Vida', u'http://www.elmundo.com/images/rss/vida.xml'),
+              (u'Cultura', u'http://www.elmundo.com/images/rss/cultura.xml')]
--- a/recipes/el_tiempo.recipe
+++ b/recipes/el_tiempo.recipe
@ -2,18 +2,17 @@

 from calibre.web.feeds.news import BasicNewsRecipe

-
-
-
 class ColombiaElTiempo02(BasicNewsRecipe):
    title          = u'Periódico el Tiempo'
-    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
+    language = 'es_CO'
    cover_url     = 'http://www.eltiempo.com/media/css/images/logo_footer.png'
-    remove_tags_before = dict(id='fb-root')
+    #remove_tags_before = dict(id='fb-root')
+    remove_tags_before = dict(id='contenidoArt')
    remove_tags_after  = [dict(name='div', attrs={'class':'modulo reporte'})]
    keep_only_tags = [dict(name='div', id='contenidoArt')]
    remove_tags        = [dict(name='div', attrs={'class':'social-media'}),
+          dict(name='div', attrs={'class':'recomend-art'}),
                          dict(name='div', attrs={'class':'caja-facebook'}),
                          dict(name='div', attrs={'class':'caja-twitter'}),
                          dict(name='div', attrs={'class':'caja-buzz'}),
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@ -0,0 +1,96 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+import re
+
+class FSP(BasicNewsRecipe):
+
+    title      = u'Folha de S\xE3o Paulo'
+    __author__ = 'fluzao'
+    description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
+                  u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
+    INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
+    language = 'pt'
+    no_stylesheets = True
+    max_articles_per_feed  = 40
+    remove_javascript     = True
+    needs_subscription = True
+    remove_tags_before = dict(name='b')
+    remove_tags  = [dict(name='td', attrs={'align':'center'})]
+    remove_attributes = ['height','width']
+    masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
+
+    # fixes the problem with the section names
+    section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
+                    'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
+                    'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
+                    'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio'}
+
+    # this solves the problem with truncated content in Kindle
+    conversion_options = {'linearize_tables' : True}
+
+    # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
+    #    Indice e Comunicar Erros
+    preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
+                                      re.DOTALL|re.IGNORECASE), lambda match: r''),
+                          (re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
+                                      re.DOTALL|re.IGNORECASE), lambda match: r'')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('https://acesso.uol.com.br/login.html')
+            br.form = br.forms().next()
+            br['user']   = self.username
+            br['pass'] = self.password
+            br.submit().read()
+##            if 'Please try again' in raw:
+##                raise Exception('Your username and password are incorrect')
+        return br
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.INDEX)
+        feeds = []
+        articles = []
+        section_title = "Preambulo"
+        for post in soup.findAll('a'):
+            # if name=True => new section
+            strpost = str(post)
+            if strpost.startswith('<a name'):
+                if articles:
+                    feeds.append((section_title, articles))
+                    self.log()
+                    self.log('--> new section found, creating old section feed: ', section_title)
+                section_title = post['name']
+                if section_title in self.section_dict:
+                    section_title = self.section_dict[section_title]
+                articles = []
+                self.log('--> new section title:   ', section_title)
+            if strpost.startswith('<a href'):
+                url = post['href']
+                if url.startswith('/fsp'):
+                    url = 'http://www1.folha.uol.com.br'+url
+                    title = self.tag_to_string(post)
+                    self.log()
+                    self.log('--> post:  ', post)
+                    self.log('--> url:   ', url)
+                    self.log('--> title: ', title)
+                    articles.append({'title':title, 'url':url})
+        if articles:
+            feeds.append((section_title, articles))
+
+        # keeping the front page url
+        minha_capa = feeds[0][1][1]['url']
+
+        # removing the 'Preambulo' section
+        del feeds[0]
+
+        # creating the url for the cover image
+        coverurl = feeds[0][1][0]['url']
+        coverurl = coverurl.replace('/opiniao/fz', '/images/cp')
+        coverurl = coverurl.replace('01.htm', '.jpg')
+        self.cover_url = coverurl
+
+        # inserting the cover page as the first article (nicer for kindle users)
+        feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
+        return feeds
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+import re
+
+class GN(BasicNewsRecipe):
+        EDITION = 0
+
+        __author__ = 'Piotr Kontek'
+        title = u'Gość niedzielny'
+        description = 'Weekly magazine'
+        encoding = 'utf-8'
+        no_stylesheets = True
+        language = 'pl'
+        remove_javascript = True
+        temp_files = []
+
+        articles_are_obfuscated = True
+
+        def get_obfuscated_article(self, url):
+            br = self.get_browser()
+            br.open(url)
+            source = br.response().read()
+            page = self.index_to_soup(source)
+
+            main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
+
+            title = main_section.find('h2')
+            info = main_section.find('div', attrs={'class' : 'cf doc_info'})
+            authors = info.find(attrs={'class':'l'})
+            article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
+            first = True
+            for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
+                if first and p.find('img') != None:
+                    article = article + '<p>'
+                    article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
+                    article = article + '<font size="-2">'
+                    for s in p.findAll('span'):
+                        article = article + self.tag_to_string(s)
+                    article = article + '</font></p>'
+                else:
+                    article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
+                first = False
+
+            html =  unicode(title) + unicode(authors) + unicode(article)
+
+            self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
+            self.temp_files[-1].write(html)
+            self.temp_files[-1].close()
+            return self.temp_files[-1].name
+
+        def find_last_issue(self):
+                soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
+                #szukam zdjęcia i linka do porzedniego pełnego numeru
+                first = True
+                for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
+                    img = d.find('img')
+                    if img != None:
+                        a = img.parent
+                        self.EDITION = a['href']
+                        self.title = img['alt']
+                        self.cover_url = 'http://www.gosc.pl' + img['src']
+                        if not first:
+                            break
+                        first = False
+
+        def parse_index(self):
+                self.find_last_issue()
+                soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
+                feeds = []
+                #wstepniak
+                a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
+                articles = [
+                            {'title' : self.tag_to_string(a),
+                             'url'   : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
+                             'date'  : '',
+                             'description' : ''}
+                            ]
+                feeds.append((u'Wstępniak',articles))
+                #kategorie
+                for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
+                        if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':
+                            main_block = self.index_to_soup('http://www.gosc.pl' + addr['href'])
+                            articles = list(self.find_articles(main_block))
+                            if len(articles) > 0:
+                                section = addr.string
+                                feeds.append((section, articles))
+                return feeds
+
+        def find_articles(self, main_block):
+                for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
+                        art = a.find('a')
+                        yield {
+                                'title' : self.tag_to_string(art),
+                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
+                                'date'  : '',
+                                'description' : ''
+                                }
+                for a in main_block.findAll('div', attrs={'class':'sr-document'}):
+                        art = a.find('a')
+                        yield {
+                                'title' : self.tag_to_string(art),
+                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
+                                'date'  : '',
+                                'description' : ''
+                                }
+
--- a/recipes/leipzer_volkszeitung.recipe
+++ b/recipes/leipzer_volkszeitung.recipe
@ -0,0 +1,34 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
+
+class SportsIllustratedRecipe(BasicNewsRecipe) :
+    __author__    = 'a.peter'
+    __copyright__ = 'a.peter'
+    __license__   = 'GPL v3'
+    language      = 'de'
+    description   = 'Leipziger Volkszeitung Online RSS'
+    version       = 1
+    title         = u'Leipziger Volkszeitung Online RSS'
+    timefmt       = ' [%d.%m.%Y]'
+
+    no_stylesheets = True
+    remove_javascript = True
+    use_embedded_content = False
+    publication_type = 'newspaper'
+
+    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
+    remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
+
+    feeds = [(u'Leipzig',           u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
+             (u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
+             (u'Brennpunkte',       u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
+             (u'Polizeiticker',     u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
+             (u'Boulevard',         u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
+             (u'Kultur',            u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
+             (u'Sport',             u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
+             (u'Regionalsport',     u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
+             (u'Knipser',           u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
+
+    def get_masthead_url(self):
+        return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'
--- a/recipes/portafolio.recipe
+++ b/recipes/portafolio.recipe
@ -4,13 +4,13 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1311799898(BasicNewsRecipe):
    title          = u'Periódico Portafolio Colombia'
-    language = 'es_CO'
    __author__  = 'BIGO-CAVA'
+    language = 'es_CO'
    cover_url     = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png'
    remove_tags_before = dict(id='contenidoArt')
    remove_tags_after  = [dict(name='div', attrs={'class':'articulo-mas'})]
    keep_only_tags = [dict(name='div', id='contenidoArt')]
-    oldest_article = 1
+    oldest_article = 2
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,5 +1,8 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '''
+2010, Darko Miletic <darko.miletic at gmail.com>
+2011, Przemyslaw Kryger <pkryger at gmail.com>
+'''
 '''
 readitlaterlist.com
 '''
@ -9,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Readitlater(BasicNewsRecipe):
    title                 = 'Read It Later'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic, Przemyslaw Kryger'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to
                               setup up your news. Fill in your account
                               username, and optionally you can add password.'''
@ -23,9 +26,6 @@ class Readitlater(BasicNewsRecipe):
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'

-
-    feeds = [(u'Unread articles' , INDEX + u'/unread')]
-
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
@ -37,12 +37,31 @@ class Readitlater(BasicNewsRecipe):
            br.submit()
        return br

+    def get_feeds(self):
+        self.report_progress(0, ('Fetching list of feeds...'))
+        lfeeds = []
+        i = 1
+        feedurl = self.INDEX + u'/unread/1'
+        while True:
+            title = u'Unread articles, page ' + str(i)
+            lfeeds.append((title, feedurl))
+            self.report_progress(0, ('Got ') + str(i) + (' feeds'))
+            i += 1
+            soup = self.index_to_soup(feedurl)
+            ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
+            if ritem is None:
+                break
+            feedurl = self.INDEX + ritem['href']
+        if self.test:
+            return lfeeds[:2]
+        return lfeeds
+
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
-            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul',attrs={'id':'list'})
--- a/recipes/revista_semana.recipe
+++ b/recipes/revista_semana.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class AdvancedUserRecipe1317341570(BasicNewsRecipe):
+    title          = u'Revista Semana'
+    __author__ = 'BIGO-CAVA'
+    language = 'es_CO'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')]
--- a/recipes/smh.recipe
+++ b/recipes/smh.recipe
@ -22,10 +22,10 @@ class Smh_au(BasicNewsRecipe):
    remove_empty_feeds    = True
    masthead_url          = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
    publication_type      = 'newspaper'
-    extra_css             = """ 
-                                h1{font-family: Georgia,"Times New Roman",Times,serif } 
-                                body{font-family: Arial,Helvetica,sans-serif} 
-                                .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} 
+    extra_css             = """
+                                h1{font-family: Georgia,"Times New Roman",Times,serif }
+                                body{font-family: Arial,Helvetica,sans-serif}
+                                .cT-imageLandscape,.cT-imagePortrait{font-size: x-small}
                            """

    conversion_options = {
@ -35,16 +35,16 @@ class Smh_au(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_tags = [
-                     dict(name='div', attrs={'id':['googleAds','moreGoogleAds','comments']})
-                    ,dict(name='div', attrs={'class':'cT-imageMultimedia'})
-                    ,dict(name=['object','embed','iframe'])
-                  ]
    remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
    keep_only_tags    = [dict(name='div',attrs={'id':'content'})]
-    remove_tags       = [ 
-                          dict(attrs={'class':'hidden'}), 
-                          dict(name=['link','meta','base','embed','object','iframe'])
+    remove_tags       = [
+                        dict(name='div',
+                            attrs={'id':['googleAds','moreGoogleAds','comments',
+                                'video-player-content']}),
+                        dict(name='div', attrs={'class':'cT-imageMultimedia'}),
+                        dict(name=['object','embed','iframe']),
+                        dict(attrs={'class':'hidden'}),
+                        dict(name=['link','meta','base','embed','object','iframe'])
                        ]
    remove_attributes = ['width','height','lang']

@ -84,4 +84,4 @@ class Smh_au(BasicNewsRecipe):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
-        
+
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -285,6 +285,15 @@ function booklist(hide_sort) {
    first_page(); 
 }

+function search_result() {
+    var test = $("#booklist #page0").html();
+    if (!test) {
+        $("#booklist").html("No books found matching this query");
+        return;
+    }
+    booklist();
+}
+
 function show_details(a_dom) {
    var book = $(a_dom).closest('div.summary');
    var bd = $('#book_details_dialog');
--- a/session.vim
+++ b/session.vim
@ -2,7 +2,7 @@
 let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]

 python << EOFPY
-import os
+import os, sys

 import vipy

@ -11,15 +11,20 @@ project_dir = os.path.dirname(source_file)
 src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
 base_dir = os.path.join(src_dir, 'calibre')

+sys.path.insert(0, src_dir)
+sys.resources_location = os.path.join(project_dir, 'resources')
+sys.extensions_location = os.path.join(base_dir, 'plugins')
+sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
+
 vipy.session.initialize(project_name='calibre', src_dir=src_dir,
-            project_dir=project_dir, base_dir=base_dir)
+            project_dir=project_dir, base_dir=project_dir)

 def recipe_title_callback(raw):
    return eval(raw.decode('utf-8')).replace(' ', '_')

-vipy.session.add_content_browser('.r', ',r', 'Recipe',
+vipy.session.add_content_browser('<leader>r', 'Recipe',
    vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
    vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
 EOFPY

-nmap \log :enew<CR>:read ! bzr log -l 500 ../.. <CR>:e ../../Changelog.yaml<CR>:e constants.py<CR>
+nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -177,6 +177,7 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \


 poppler_error = None
+poppler_cflags = ['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
 if not poppler_inc_dirs or not os.path.exists(
        os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
    poppler_error = \
@ -186,6 +187,10 @@ if not poppler_inc_dirs or not os.path.exists(
    ' the poppler XPDF headers. If your distro does not '
    ' include them you will have to re-compile poppler '
    ' by hand with --enable-xpdf-headers')
+else:
+    lh = os.path.join(poppler_inc_dirs[0], 'Link.h')
+    if 'class AnnotLink' not in open(lh, 'rb').read():
+        poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE')

 magick_error = None
 if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -11,15 +11,15 @@ from distutils import sysconfig

 from PyQt4.pyqtconfig import QtGuiModuleMakefile

-from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
-from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
-        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
-        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
-        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
-        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
-        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
-        jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
-        icu_lib_dirs
+from setup import Command, islinux, isbsd, isosx, SRC, iswindows
+from setup.build_environment import (fc_inc, fc_lib, chmlib_inc_dirs,
+        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc,
+        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE,
+        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk,
+        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs,
+        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs,
+        jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
+        icu_lib_dirs, poppler_cflags)
 MT
 isunix = islinux or isosx or isbsd

@ -114,7 +114,7 @@ extensions = [
                lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs+ft_lib_dirs+jpg_lib_dirs,
                inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
                error=reflow_error,
-                cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
+                cflags=poppler_cflags
                ),

    Extension('lzx',
--- a/setup/iso_639/af.po
+++ b/setup/iso_639/af.po
--- a/setup/iso_639/am.po
+++ b/setup/iso_639/am.po
--- a/setup/iso_639/ar.po
+++ b/setup/iso_639/ar.po
--- a/setup/iso_639/az.po
+++ b/setup/iso_639/az.po
--- a/setup/iso_639/bg.po
+++ b/setup/iso_639/bg.po
--- a/setup/iso_639/bn_IN.po
+++ b/setup/iso_639/bn_IN.po
--- a/setup/iso_639/br.po
+++ b/setup/iso_639/br.po
--- a/setup/iso_639/bs.po
+++ b/setup/iso_639/bs.po
--- a/setup/iso_639/byn.po
+++ b/setup/iso_639/byn.po
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
--- a/setup/iso_639/crh.po
+++ b/setup/iso_639/crh.po
--- a/setup/iso_639/cs.po
+++ b/setup/iso_639/cs.po
--- a/setup/iso_639/cy.po
+++ b/setup/iso_639/cy.po
--- a/setup/iso_639/da.po
+++ b/setup/iso_639/da.po
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
--- a/setup/iso_639/el.po
+++ b/setup/iso_639/el.po
--- a/setup/iso_639/eo.po
+++ b/setup/iso_639/eo.po
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
--- a/setup/iso_639/et.po
+++ b/setup/iso_639/et.po
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
--- a/setup/iso_639/fa.po
+++ b/setup/iso_639/fa.po
--- a/setup/iso_639/fi.po
+++ b/setup/iso_639/fi.po
--- a/setup/iso_639/fr.po
+++ b/setup/iso_639/fr.po
--- a/setup/iso_639/ga.po
+++ b/setup/iso_639/ga.po
--- a/setup/iso_639/gez.po
+++ b/setup/iso_639/gez.po
--- a/setup/iso_639/gl.po
+++ b/setup/iso_639/gl.po
--- a/setup/iso_639/gu.po
+++ b/setup/iso_639/gu.po
--- a/setup/iso_639/he.po
+++ b/setup/iso_639/he.po
--- a/setup/iso_639/hi.po
+++ b/setup/iso_639/hi.po
--- a/setup/iso_639/hr.po
+++ b/setup/iso_639/hr.po
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
--- a/setup/iso_639/id.po
+++ b/setup/iso_639/id.po
--- a/setup/iso_639/is.po
+++ b/setup/iso_639/is.po
--- a/setup/iso_639/iso_639_3.pot
+++ b/setup/iso_639/iso_639_3.pot
--- a/setup/iso_639/iso_639_3.xml
+++ b/setup/iso_639/iso_639_3.xml
--- a/setup/iso_639/it.po
+++ b/setup/iso_639/it.po
--- a/setup/iso_639/ja.po
+++ b/setup/iso_639/ja.po
--- a/setup/iso_639/kn.po
+++ b/setup/iso_639/kn.po
--- a/setup/iso_639/ko.po
+++ b/setup/iso_639/ko.po
--- a/setup/iso_639/kok.po
+++ b/setup/iso_639/kok.po
--- a/setup/iso_639/lt.po
+++ b/setup/iso_639/lt.po
--- a/setup/iso_639/lv.po
+++ b/setup/iso_639/lv.po
--- a/setup/iso_639/mi.po
+++ b/setup/iso_639/mi.po
--- a/setup/iso_639/mk.po
+++ b/setup/iso_639/mk.po
--- a/setup/iso_639/mn.po
+++ b/setup/iso_639/mn.po
--- a/setup/iso_639/mr.po
+++ b/setup/iso_639/mr.po
--- a/setup/iso_639/ms.po
+++ b/setup/iso_639/ms.po
--- a/setup/iso_639/mt.po
+++ b/setup/iso_639/mt.po
--- a/setup/iso_639/nb.po
+++ b/setup/iso_639/nb.po
--- a/setup/iso_639/nl.po
+++ b/setup/iso_639/nl.po
--- a/setup/iso_639/nn.po
+++ b/setup/iso_639/nn.po
--- a/setup/iso_639/nso.po
+++ b/setup/iso_639/nso.po
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
--- a/setup/iso_639/or.po
+++ b/setup/iso_639/or.po
--- a/setup/iso_639/pa.po
+++ b/setup/iso_639/pa.po
--- a/setup/iso_639/pl.po
+++ b/setup/iso_639/pl.po
--- a/setup/iso_639/ps.po
+++ b/setup/iso_639/ps.po
--- a/setup/iso_639/pt.po
+++ b/setup/iso_639/pt.po
--- a/setup/iso_639/pt_BR.po
+++ b/setup/iso_639/pt_BR.po
--- a/setup/iso_639/ro.po
+++ b/setup/iso_639/ro.po
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
--- a/setup/iso_639/rw.po
+++ b/setup/iso_639/rw.po
--- a/setup/iso_639/sk.po
+++ b/setup/iso_639/sk.po
--- a/setup/iso_639/sl.po
+++ b/setup/iso_639/sl.po
--- a/setup/iso_639/sr.po
+++ b/setup/iso_639/sr.po
--- a/setup/iso_639/sr@latin.po
+++ b/setup/iso_639/sr@latin.po
--- a/setup/iso_639/sv.po
+++ b/setup/iso_639/sv.po
--- a/setup/iso_639/ta.po
+++ b/setup/iso_639/ta.po
--- a/setup/iso_639/th.po
+++ b/setup/iso_639/th.po
--- a/setup/iso_639/ti.po
+++ b/setup/iso_639/ti.po
--- a/setup/iso_639/tig.po
+++ b/setup/iso_639/tig.po
--- a/setup/iso_639/tr.po
+++ b/setup/iso_639/tr.po
--- a/setup/iso_639/tt.po
+++ b/setup/iso_639/tt.po
--- a/setup/iso_639/uk.po
+++ b/setup/iso_639/uk.po
--- a/setup/iso_639/ve.po
+++ b/setup/iso_639/ve.po
--- a/setup/iso_639/vi.po
+++ b/setup/iso_639/vi.po
--- a/setup/iso_639/wa.po
+++ b/setup/iso_639/wa.po
--- a/setup/iso_639/xh.po
+++ b/setup/iso_639/xh.po
--- a/setup/iso_639/zh_CN.po
+++ b/setup/iso_639/zh_CN.po
--- a/setup/iso_639/zh_TW.po
+++ b/setup/iso_639/zh_TW.po
--- a/Show More
+++ b/Show More