Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-11-23 06:35:48 +01:00 · 2010-11-23 06:35:48 +01:00 · 9653087ea0
commit 9653087ea0
parent 3137b37b01 81e05df304
22 changed files with 695 additions and 49 deletions
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 180.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = '180.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf-8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Titulares', u'http://www.180.com.uy/feed.php')
        ]
    def get_cover_url(self):
 		return 'http://www.180.com.uy/tplef/img/logo.gif'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -0,0 +1,58 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 bitacora.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'bitacora.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'iso-8859-1'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['txt'])]
    remove_tags = [
             dict(name='div', attrs={'class':'tablafoot'}),
             dict(name=['object','h4']),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
        ]
    def get_cover_url(self):
 	cover_url = None
 	index = 'http://www.bitacora.com.uy'
 	soup = self.index_to_soup(index)
 	link_item = soup.find('img',attrs={'class':'imgtapa'})
 	if link_item:
 		cover_url = "http://www.bitacora.com.uy/"+link_item['src']
 	return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/cosmopolitan.recipe
+++ b/resources/recipes/cosmopolitan.recipe
@ -0,0 +1,69 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 Muy Interesante
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Cosmopolitan'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Cosmopolitan, Edicion Espanola'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    conversion_options = {'linearize_tables': True}
    oldest_article        = 180
    max_articles_per_feed = 100
    keep_only_tags = [
             dict(id=['contenido']),
             dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
                     ]
    remove_tags = [
             dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
             dict(name='div', attrs={'id':'comment'}),
             dict(name='table', attrs={'class':'pagenav'}),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                img {float:left; clear:both; margin:10px}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
                  (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
    ]
    def preprocess_html(self, soup):
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return soup
    def get_cover_url(self):
 		index = 'http://www.cosmohispano.com/revista'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('img',attrs={'class':'img_portada'})
 		if link_item:
 			cover_url = "http://www.cosmohispano.com"+link_item['src']
 		return cover_url
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -0,0 +1,67 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.elpais.com.uy/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Diario El Pais'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias | Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 2
    encoding = 'iso-8859-1'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [
                      dict(name='h1'),
                      dict(name='div', attrs={'id':'Contenido'})
                      ]
    remove_tags = [
 				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
 				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
 				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
 				 dict(name=['object','form']),
 				 dict(name=['object','table']) ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
           (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
           (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
           (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
           (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
           (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
           (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
           (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') 
        ]
    def get_cover_url(self):
 		cover_url = None
 		index = 'http://www.elpais.com.uy'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('div',attrs={'class':'boxmedio box257'})
 		print link_item
 		if link_item:
 			cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
 		return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -0,0 +1,100 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://freeway.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'freeway.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Freeway, Montevideo, Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    conversion_options = {'linearize_tables': True}
    oldest_article        = 180
    max_articles_per_feed = 100
    keep_only_tags = [
             dict(id=['contenido']),
             dict(name='a', attrs={'class':'titulo_art_ppal'}),
             dict(name='img', attrs={'class':'recuadro'}),
             dict(name='td', attrs={'class':'txt_art_ppal'})
                     ]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                img {float:left; clear:both; margin:10px}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    def parse_index(self):
            feeds = []
            for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
               articles = self.art_parse_section(url)
               if articles:
                   feeds.append((title, articles))
            return feeds
    def art_parse_section(self, url):
            soup = self.index_to_soup(url)
            div = soup.find(attrs={'id': 'tbl_1'})
            current_articles = []
            for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
                if tag.get('class') == 'link-list-heading':
                    break
                for td in tag.findAll('td'):
                    a = td.find('a', attrs= {'class': 'titulo_articulos'})
                    if a is None:
                        continue
                    title = self.tag_to_string(a)
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                         url = 'http://freeway.com.uy'+url
                    p = td.find('p', attrs= {'class': 'txt_articulos'})
                    description = self.tag_to_string(p)
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    self.log('\t\t\t', description)
                    current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
            return current_articles
    def preprocess_html(self, soup):
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return soup
    def get_cover_url(self):
 		#index = 'http://www.cosmohispano.com/revista'
 		#soup = self.index_to_soup(index)
 		#link_item = soup.find('img',attrs={'class':'img_portada'})
 		#if link_item:
 		#	cover_url = "http://www.cosmohispano.com"+link_item['src']
 		return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -0,0 +1,48 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 ladiaria.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'La Diaria'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['article'])]
    remove_tags = [
             dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
             dict(name='div', attrs={'id':'discussion'}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://ladiaria.com/feeds/articulos')
        ]
    def get_cover_url(self):
        return 'http://ladiaria.com/edicion/imagenportada/'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -8,7 +8,7 @@ from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaRazon_Bol(BasicNewsRecipe):
-    title                 = 'La Razón - Bolivia'
+    title                 = u'La Razón - Bolivia'
    __author__            = 'Darko Miletic'
    description           = 'El diario nacional de Bolivia'
    publisher             = 'Praxsis S.R.L.'
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -0,0 +1,56 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.montevideo.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Montevideo COMM'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf-8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['txt'])]
    remove_tags = [
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
           (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
           (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
           (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
           # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
           # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
           (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
        ]
    def get_cover_url(self):
 		return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -0,0 +1,63 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 observa.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
    description           = 'Noticias desde Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['contenido'])]
    remove_tags = [
             dict(name='div', attrs={'id':'contenedorVinculadas'}),
             dict(name='p', attrs={'id':'nota_firma'}),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
        ]
    def get_cover_url(self):
 		cover_url = None
 		index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
 		soup = self.index_to_soup(index)
 		link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
 		if link_item:
 			cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
 		print cover_url
 		return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -0,0 +1,54 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
 '''
 http://www.revistabla.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Noticias(BasicNewsRecipe):
    title                 = 'Revista Bla'
    __author__            = 'Gustavo Azambuja'
    description           = 'Moda | Uruguay'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 20
    max_articles_per_feed = 100
    keep_only_tags = [dict(id=['body_container'])]
    remove_tags = [
 				 dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
 				 dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
 				 dict(name='p', attrs={'class':'FacebookLikeButton'}),
 				 dict(name=['object','link']) ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://www.revistabla.com/feed/')
        ]
    def get_cover_url(self):
 	cover_url = None
 	index = 'http://www.revistabla.com'
 	soup = self.index_to_soup(index)
 	link_item = soup.find('div',attrs={'class':'header_right'})
 	if link_item:
 		cover_url = link_item.img['src']
 	return cover_url
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/revista_muy.recipe
+++ b/resources/recipes/revista_muy.recipe
@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
                   feeds.append((title, articles))
            return feeds
    def get_cover_url(self):
        index = 'http://www.muyinteresante.es/revista'
        soup = self.index_to_soup(index)
        link_item = soup.find('img',attrs={'class':'img_portada'})
        if link_item:
            cover_url = "http://www.muyinteresante.es"+link_item['src']
        return cover_url
--- a/resources/recipes/telepolis.recipe
+++ b/resources/recipes/telepolis.recipe
@ -3,12 +3,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
-''' http://www.derstandard.at - Austrian Newspaper '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TelepolisNews(BasicNewsRecipe):
-    title          = u'Telepolis (News)'
+    title          = u'Telepolis (News+Artikel)'
    __author__ = 'Gerhard Aigner'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from telepolis'
@ -26,10 +26,10 @@ class TelepolisNews(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
-    keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
+    keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
-    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
-    feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
    html2lrf_options = [
        '--comment'  , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):
    def get_article_url(self, article):
        '''if the linked article is of kind artikel don't take it'''
-        if (article.link.count('artikel') > 0) :
+        if (article.link.count('artikel') > 1) :
            return None
        return article.link
@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -132,7 +132,7 @@ class Win32Freeze(Command, WixMixIn):
        shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
        shutil.rmtree(comext)
-        for pat in (r'numpy', r'PyQt4\uic\port_v3'):
+        for pat in (r'PyQt4\uic\port_v3', ):
            x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
            shutil.rmtree(x)
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -19,7 +19,7 @@ Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw
 This is where all dependencies will be installed.
-Add C:\Python26\Scripts and C:\Python26 to PATH 
+Add C:\Python27\Scripts and C:\Python27 to PATH 
 Install setuptools from http://pypi.python.org/pypi/setuptools
 If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked::
@ -28,7 +28,7 @@ If there are no windows binaries already compiled for the version of python you
 Run the following command to install python dependencies::
-    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto
 Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -229,6 +229,10 @@ class KOBO(USBMS):
        #Delete the volume_shortcovers second
        cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
        # Delete the rows from content_keys
        if self.dbversion >= 8:
            cursor.execute('delete from content_keys where volumeid = ?', t)
        # Delete the chapters associated with the book next
        t = (ContentID,ContentID,)
        cursor.execute('delete from content where BookID  = ? or ContentID = ?', t)
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -140,7 +140,7 @@ def create_books(opts, args, timeout=5.):
    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
    #remove duplicates ISBN
-    return dict((book.isbn, book) for book in tans).values()
+    return list(dict((book.isbn, book) for book in tans).values())
 def main(args=sys.argv):
    parser = option_parser()
--- a/src/calibre/library/init.py
+++ b/src/calibre/library/init.py
@ -6,3 +6,53 @@ def db(path=None):
    from calibre.library.database2 import LibraryDatabase2
    from calibre.utils.config import prefs
    return LibraryDatabase2(path if path else prefs['library_path'])
 def generate_test_db(library_path,
        num_of_records=20000,
        num_of_authors=6000,
        num_of_tags=10000,
        tag_length=7,
        author_length=7,
        title_length=10,
        max_authors=10,
        max_tags=10
        ):
    import random, string, os, sys, time
    if not os.path.exists(library_path):
        os.makedirs(library_path)
    def randstr(length):
        return ''.join(random.choice(string.letters) for i in
                xrange(length))
    all_tags = [randstr(tag_length) for j in xrange(num_of_tags)]
    print 'Generated', num_of_tags, 'tags'
    all_authors = [randstr(author_length) for j in xrange(num_of_authors)]
    print 'Generated', num_of_authors, 'authors'
    all_titles = [randstr(title_length) for j in xrange(num_of_records)]
    print 'Generated', num_of_records, 'titles'
    testdb = db(library_path)
    print 'Creating', num_of_records, 'records...'
    start = time.time()
    for i, title in enumerate(all_titles):
        print i+1,
        sys.stdout.flush()
        authors = random.randint(1, max_authors)
        authors = [random.choice(all_authors) for i in xrange(authors)]
        tags = random.randint(0, max_tags)
        tags = [random.choice(all_tags) for i in xrange(tags)]
        from calibre.ebooks.metadata.book.base import Metadata
        mi = Metadata(title, authors)
        mi.tags = tags
        testdb.import_book(mi, [])
    t = time.time() - start
    print '\nGenerated', num_of_records, 'records in:', t, 'seconds'
    print 'Time per record:', t/float(num_of_records)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -405,9 +405,6 @@ class BIBTEX(CatalogPlugin):
            else :
                template_citation = u'%s' % str(entry["id"])
            if asccii_bibtex :
                return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
            else :
            return bibtexclass.ValidateCitationKey(template_citation)
        self.fmt = path_to_output.rpartition('.')[2]
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1248,15 +1248,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                    traceback.print_exc()
                else:
                    raise
        path_changed = False
        if set_title and mi.title:
-            self.set_title(id, mi.title, commit=False)
+            self._set_title(id, mi.title)
            path_changed = True
        if set_authors:
            if not mi.authors:
                    mi.authors = [_('Unknown')]
            authors = []
            for a in mi.authors:
                authors += string_to_authors(a)
-            self.set_authors(id, authors, notify=False, commit=False)
+            self._set_authors(id, authors)
            path_changed = True
        if path_changed:
            self.set_path(id, index_is_id=True)
        if mi.author_sort:
            doit(self.set_author_sort, id, mi.author_sort, notify=False,
                    commit=False)
@ -1348,13 +1353,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                result.append(r)
        return ' & '.join(result).replace('|', ',')
-    def set_authors(self, id, authors, notify=True, commit=True):
+    def _set_authors(self, id, authors):
        '''
        Note that even if commit is False, the db will still be committed to
        because this causes the location of files to change
        :param authors: A list of authors.
        '''
        if not authors:
            authors = [_('Unknown')]
        self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
@ -1379,25 +1378,30 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        ss = self.author_sort_from_book(id, index_is_id=True)
        self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
                          (ss, id))
        self.dirtied([id], commit=False)
        if commit:
            self.conn.commit()
        self.data.set(id, self.FIELD_MAP['authors'],
                      ','.join([a.replace(',', '|') for a in authors]),
                      row_is_id=True)
        self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
    def set_authors(self, id, authors, notify=True, commit=True):
        '''
        Note that even if commit is False, the db will still be committed to
        because this causes the location of files to change
        :param authors: A list of authors.
        '''
        self._set_authors(id, authors)
        self.dirtied([id], commit=False)
        if commit:
            self.conn.commit()
        self.set_path(id, index_is_id=True)
        if notify:
            self.notify('metadata', [id])
-    def set_title(self, id, title, notify=True, commit=True):
+    def _set_title(self, id, title):
        '''
        Note that even if commit is False, the db will still be committed to
        because this causes the location of files to change
        '''
        if not title:
-            return
+            return False
-        if not isinstance(title, unicode):
+        if isbytestring(title):
            title = title.decode(preferred_encoding, 'replace')
        self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
        self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True)
@ -1405,6 +1409,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True)
        else:
            self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True)
        return True
    def set_title(self, id, title, notify=True, commit=True):
        '''
        Note that even if commit is False, the db will still be committed to
        because this causes the location of files to change
        '''
        if not self._set_title(id, title):
            return
        self.set_path(id, index_is_id=True)
        self.dirtied([id], commit=False)
        if commit:
@ -2072,13 +2085,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                                (id, title, series_index, aus))
        self.data.books_added([id], self)
        self.set_path(id, True)
        self.conn.commit()
        if mi.timestamp is None:
            mi.timestamp = utcnow()
        if mi.pubdate is None:
            mi.pubdate = utcnow()
-        self.set_metadata(id, mi, ignore_errors=True)
+        self.set_metadata(id, mi, ignore_errors=True, commit=True)
        if cover is not None:
            try:
                self.set_cover(id, cover)
@ -2114,13 +2125,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            id = obj.lastrowid
            self.data.books_added([id], self)
            ids.append(id)
            self.set_path(id, True)
            self.conn.commit()
            if mi.timestamp is None:
                mi.timestamp = utcnow()
            if mi.pubdate is None:
                mi.pubdate = utcnow()
-            self.set_metadata(id, mi)
+            self.set_metadata(id, mi, commit=True, ignore_errors=True)
            npath = self.run_import_plugins(path, format)
            format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
            stream = lopen(npath, 'rb')
@ -2154,12 +2163,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                          (title, series_index, aus))
        id = obj.lastrowid
        self.data.books_added([id], self)
        self.set_path(id, True)
        if mi.timestamp is None:
            mi.timestamp = utcnow()
        if mi.pubdate is None:
            mi.pubdate = utcnow()
-        self.set_metadata(id, mi, ignore_errors=True)
+        self.set_metadata(id, mi, ignore_errors=True, commit=True)
        if preserve_uuid and mi.uuid:
            self.set_uuid(id, mi.uuid, commit=False)
        for path in formats:
--- a/src/calibre/startup.py
+++ b/src/calibre/startup.py
@ -129,7 +129,7 @@ if not _run_once:
                def __getattribute__(self, attr):
                    if attr in ('name', '__enter__', '__str__', '__unicode__',
-                            '__repr__'):
+                            '__repr__', '__exit__'):
                        return object.__getattribute__(self, attr)
                    fobject = object.__getattribute__(self, 'fobject')
                    return getattr(fobject, attr)
@ -155,6 +155,11 @@ if not _run_once:
                    fobject.__enter__()
                    return self
                def __exit__(self, *args):
                    fobject = object.__getattribute__(self, 'fobject')
                    return fobject.__exit__(*args)
            m = mode[0]
            random = len(mode) > 1 and mode[1] == '+'
            binary = mode[-1] == 'b'
--- a/src/calibre/utils/bibtex.py
+++ b/src/calibre/utils/bibtex.py
@ -69,6 +69,9 @@ from UserDict import UserDict
 from calibre.constants import preferred_encoding
 from calibre.utils.mreplace import MReplace
 from calibre.constants import preferred_encoding
 from calibre.utils.mreplace import MReplace
 utf8enc2latex_mapping = {
    # This is a mapping of Unicode characters to LaTeX equivalents.
    # The information has been extracted from
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -61,6 +61,11 @@ def serialize_recipe(urn, recipe_class):
 def serialize_collection(mapping_of_recipe_classes):
    collection = E.recipe_collection()
    '''for u, x in mapping_of_recipe_classes.items():
        print 11111, u, repr(x.title)
        if isinstance(x.title, str):
            x.title.decode('ascii')
    '''
    for urn in sorted(mapping_of_recipe_classes.keys(),
            key=lambda key: getattr(mapping_of_recipe_classes[key], 'title',
                'zzz')):