From d6edba76cdbb0729beac475c8de4324aa7a7245c Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Sun, 21 Nov 2010 22:06:41 -0400 Subject: [PATCH 1/8] Fix missing table in deleting books process for Kobo WiFi and Kobo-O 1.8 Beta --- src/calibre/devices/kobo/driver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 934dc0879e..174441c276 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -229,6 +229,10 @@ class KOBO(USBMS): #Delete the volume_shortcovers second cursor.execute('delete from volume_shortcovers where volumeid = ?', t) + # Delete the rows from content_keys + if self.dbversion >= 8: + cursor.execute('delete from content_keys where volumeid = ?', t) + # Delete the chapters associated with the book next t = (ContentID,ContentID,) cursor.execute('delete from content where BookID = ? or ContentID = ?', t) From 27d52c02eb32b5268aabc1b9d6fc51b9d6af2580 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 21 Nov 2010 19:18:33 -0700 Subject: [PATCH 2/8] Improved telepolis --- resources/recipes/telepolis.recipe | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/resources/recipes/telepolis.recipe b/resources/recipes/telepolis.recipe index 1009dca275..4ca57f8275 100644 --- a/resources/recipes/telepolis.recipe +++ b/resources/recipes/telepolis.recipe @@ -3,12 +3,12 @@ __license__ = 'GPL v3' __copyright__ = '2009, Gerhard Aigner ' -''' http://www.derstandard.at - Austrian Newspaper ''' + import re from calibre.web.feeds.news import BasicNewsRecipe class TelepolisNews(BasicNewsRecipe): - title = u'Telepolis (News)' + title = u'Telepolis (News+Artikel)' __author__ = 'Gerhard Aigner' publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' description = 'News from telepolis' @@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe): encoding = "utf-8" language = 'de_AT' - use_embedded_content = False + use_embedded_content =False remove_empty_feeds = True preprocess_regexps = [(re.compile(r']*>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(r'', re.DOTALL|re.IGNORECASE), lambda match: ''),] - keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})] - remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})] + keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})] + remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})] - feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')] + feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')] html2lrf_options = [ '--comment' , description @@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe): def get_article_url(self, article): '''if the linked article is of kind artikel don't take it''' - if (article.link.count('artikel') > 0) : + if (article.link.count('artikel') > 1) : return None return article.link @@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe): mtag = '' soup.head.insert(0,mtag) return soup + + From 2dbc7058f2c559427f2d6eeb71f88ad3d1e786e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 12:06:50 -0700 Subject: [PATCH 3/8] Improve Revista Muy Intersante --- resources/recipes/revista_muy.recipe | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/resources/recipes/revista_muy.recipe b/resources/recipes/revista_muy.recipe index e452a6f053..b101fe97ce 100644 --- a/resources/recipes/revista_muy.recipe +++ b/resources/recipes/revista_muy.recipe @@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe): feeds.append((title, articles)) return feeds + def get_cover_url(self): + index = 'http://www.muyinteresante.es/revista' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'img_portada'}) + if link_item: + cover_url = "http://www.muyinteresante.es"+link_item['src'] + return cover_url From 0503a2f6523c93122c8eaf3cf10516b2d9f7d23b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 12:24:35 -0700 Subject: [PATCH 4/8] Various Spanish news sources by Gustavo Azambuja --- resources/recipes/180.recipe | 50 ++++++++++ resources/recipes/bitacora.recipe | 58 ++++++++++++ resources/recipes/cosmopolitan.recipe | 69 ++++++++++++++ resources/recipes/el_pais_uy.recipe | 67 +++++++++++++ resources/recipes/freeway.recipe | 100 ++++++++++++++++++++ resources/recipes/la_diaria.recipe | 48 ++++++++++ resources/recipes/la_razon_bo.recipe | 2 +- resources/recipes/montevideo_com.recipe | 56 +++++++++++ resources/recipes/observa_digital.recipe | 63 ++++++++++++ resources/recipes/revista_bla.recipe | 54 +++++++++++ src/calibre/web/feeds/recipes/collection.py | 5 + 11 files changed, 571 insertions(+), 1 deletion(-) create mode 100644 resources/recipes/180.recipe create mode 100644 resources/recipes/bitacora.recipe create mode 100644 resources/recipes/cosmopolitan.recipe create mode 100644 resources/recipes/el_pais_uy.recipe create mode 100644 resources/recipes/freeway.recipe create mode 100644 resources/recipes/la_diaria.recipe create mode 100644 resources/recipes/montevideo_com.recipe create mode 100644 resources/recipes/observa_digital.recipe create mode 100644 resources/recipes/revista_bla.recipe diff --git a/resources/recipes/180.recipe b/resources/recipes/180.recipe new file mode 100644 index 0000000000..5158bb99e0 --- /dev/null +++ b/resources/recipes/180.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +180.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = '180.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf-8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})] + remove_tags = [ + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Titulares', u'http://www.180.com.uy/feed.php') + ] + + def get_cover_url(self): + return 'http://www.180.com.uy/tplef/img/logo.gif' + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/bitacora.recipe b/resources/recipes/bitacora.recipe new file mode 100644 index 0000000000..a36eb52988 --- /dev/null +++ b/resources/recipes/bitacora.recipe @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +bitacora.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'bitacora.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'iso-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['txt'])] + remove_tags = [ + dict(name='div', attrs={'class':'tablafoot'}), + dict(name=['object','h4']), + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.bitacora.com.uy' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'imgtapa'}) + if link_item: + cover_url = "http://www.bitacora.com.uy/"+link_item['src'] + return cover_url + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/cosmopolitan.recipe b/resources/recipes/cosmopolitan.recipe new file mode 100644 index 0000000000..d7d3db7e0c --- /dev/null +++ b/resources/recipes/cosmopolitan.recipe @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +Muy Interesante +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Cosmopolitan' + __author__ = 'Gustavo Azambuja' + description = 'Revista Cosmopolitan, Edicion Espanola' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 1 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + conversion_options = {'linearize_tables': True} + + oldest_article = 180 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(id=['contenido']), + dict(name='td', attrs={'class':['contentheading', 'txt_articulo']}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}), + dict(name='div', attrs={'id':'comment'}), + dict(name='table', attrs={'class':'pagenav'}), + dict(name=['object','link']) + ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + img {float:left; clear:both; margin:10px} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt') + ] + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def get_cover_url(self): + index = 'http://www.cosmohispano.com/revista' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'class':'img_portada'}) + if link_item: + cover_url = "http://www.cosmohispano.com"+link_item['src'] + return cover_url diff --git a/resources/recipes/el_pais_uy.recipe b/resources/recipes/el_pais_uy.recipe new file mode 100644 index 0000000000..b474b5c232 --- /dev/null +++ b/resources/recipes/el_pais_uy.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.elpais.com.uy/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pais' + __author__ = 'Gustavo Azambuja' + description = 'Noticias | Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 2 + encoding = 'iso-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1'), + dict(name='div', attrs={'id':'Contenido'}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}), + dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}), + dict(name='p', attrs={'class':'FacebookLikeButton'}), + dict(name=['object','form']), + dict(name=['object','table']) ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'), + (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'), + (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'), + (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'), + (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'), + (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'), + (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'), + (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.elpais.com.uy' + soup = self.index_to_soup(index) + link_item = soup.find('div',attrs={'class':'boxmedio box257'}) + print link_item + if link_item: + cover_url = 'http://www.elpais.com.uy'+link_item.img['src'] + return cover_url + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/freeway.recipe b/resources/recipes/freeway.recipe new file mode 100644 index 0000000000..cb6d41ebb2 --- /dev/null +++ b/resources/recipes/freeway.recipe @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://freeway.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'freeway.com.uy' + __author__ = 'Gustavo Azambuja' + description = 'Revista Freeway, Montevideo, Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 1 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + conversion_options = {'linearize_tables': True} + + oldest_article = 180 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(id=['contenido']), + dict(name='a', attrs={'class':'titulo_art_ppal'}), + dict(name='img', attrs={'class':'recuadro'}), + dict(name='td', attrs={'class':'txt_art_ppal'}) + ] + remove_tags = [ + dict(name=['object','link']) + ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + img {float:left; clear:both; margin:10px} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + + def parse_index(self): + feeds = [] + for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]: + articles = self.art_parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def art_parse_section(self, url): + soup = self.index_to_soup(url) + div = soup.find(attrs={'id': 'tbl_1'}) + + current_articles = [] + for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}): + if tag.get('class') == 'link-list-heading': + break + for td in tag.findAll('td'): + a = td.find('a', attrs= {'class': 'titulo_articulos'}) + if a is None: + continue + title = self.tag_to_string(a) + url = a.get('href', False) + if not url or not title: + continue + if url.startswith('/'): + url = 'http://freeway.com.uy'+url + p = td.find('p', attrs= {'class': 'txt_articulos'}) + description = self.tag_to_string(p) + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + self.log('\t\t\t', description) + current_articles.append({'title': title, 'url': url, 'description':description, 'date':''}) + + return current_articles + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def get_cover_url(self): + #index = 'http://www.cosmohispano.com/revista' + #soup = self.index_to_soup(index) + #link_item = soup.find('img',attrs={'class':'img_portada'}) + #if link_item: + # cover_url = "http://www.cosmohispano.com"+link_item['src'] + return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg' diff --git a/resources/recipes/la_diaria.recipe b/resources/recipes/la_diaria.recipe new file mode 100644 index 0000000000..d89eb465dd --- /dev/null +++ b/resources/recipes/la_diaria.recipe @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +ladiaria.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'La Diaria' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['article'])] + remove_tags = [ + dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}), + dict(name='div', attrs={'id':'discussion'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://ladiaria.com/feeds/articulos') + ] + + def get_cover_url(self): + return 'http://ladiaria.com/edicion/imagenportada/' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/la_razon_bo.recipe b/resources/recipes/la_razon_bo.recipe index b7cd59d043..18a00d6763 100644 --- a/resources/recipes/la_razon_bo.recipe +++ b/resources/recipes/la_razon_bo.recipe @@ -8,7 +8,7 @@ from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class LaRazon_Bol(BasicNewsRecipe): - title = 'La Razón - Bolivia' + title = u'La Razón - Bolivia' __author__ = 'Darko Miletic' description = 'El diario nacional de Bolivia' publisher = 'Praxsis S.R.L.' diff --git a/resources/recipes/montevideo_com.recipe b/resources/recipes/montevideo_com.recipe new file mode 100644 index 0000000000..cabd4181d6 --- /dev/null +++ b/resources/recipes/montevideo_com.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.montevideo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Montevideo COMM' + __author__ = 'Gustavo Azambuja' + description = 'Noticias de Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf-8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['txt'])] + remove_tags = [ + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'), + (u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'), + (u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'), + (u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'), + # (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'), + # (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'), + (u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023') + ] + + def get_cover_url(self): + return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg' + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/resources/recipes/observa_digital.recipe b/resources/recipes/observa_digital.recipe new file mode 100644 index 0000000000..375d67236c --- /dev/null +++ b/resources/recipes/observa_digital.recipe @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +observa.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Observa Digital' + __author__ = '2010, Gustavo Azambuja ' + description = 'Noticias desde Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['contenido'])] + remove_tags = [ + dict(name='div', attrs={'id':'contenedorVinculadas'}), + dict(name='p', attrs={'id':'nota_firma'}), + dict(name=['object','link']) + ] + + remove_attributes = ['width','height', 'style', 'font', 'color'] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'), + (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'), + (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'), + (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia' + soup = self.index_to_soup(index) + link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'}) + if link_item: + cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip() + + print cover_url + + return cover_url + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/revista_bla.recipe b/resources/recipes/revista_bla.recipe new file mode 100644 index 0000000000..15c7e7fb3f --- /dev/null +++ b/resources/recipes/revista_bla.recipe @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2010, Gustavo Azambuja ' +''' +http://www.revistabla.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Noticias(BasicNewsRecipe): + title = 'Revista Bla' + __author__ = 'Gustavo Azambuja' + description = 'Moda | Uruguay' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 20 + max_articles_per_feed = 100 + keep_only_tags = [dict(id=['body_container'])] + remove_tags = [ + dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}), + dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}), + dict(name='p', attrs={'class':'FacebookLikeButton'}), + dict(name=['object','link']) ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.revistabla.com/feed/') + ] + + def get_cover_url(self): + cover_url = None + index = 'http://www.revistabla.com' + soup = self.index_to_soup(index) + link_item = soup.find('div',attrs={'class':'header_right'}) + if link_item: + cover_url = link_item.img['src'] + return cover_url + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index 5b34ddab0b..a513cf3880 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -61,6 +61,11 @@ def serialize_recipe(urn, recipe_class): def serialize_collection(mapping_of_recipe_classes): collection = E.recipe_collection() + '''for u, x in mapping_of_recipe_classes.items(): + print 11111, u, repr(x.title) + if isinstance(x.title, str): + x.title.decode('ascii') + ''' for urn in sorted(mapping_of_recipe_classes.keys(), key=lambda key: getattr(mapping_of_recipe_classes[key], 'title', 'zzz')): From 7ff09a5d0c1357614bad3dbb7caf844bccfc3b24 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 12:40:56 -0700 Subject: [PATCH 5/8] Speedup for bibtex catalog generation. Only show HTML comment customization if the download plugin says it gets HTML comments --- src/calibre/ebooks/metadata/fetch.py | 15 ++-- src/calibre/ebooks/metadata/isbndb.py | 17 +--- src/calibre/library/catalog.py | 42 ++++----- src/calibre/utils/bibtex.py | 124 +++++++++++++------------- src/calibre/utils/mreplace.py | 32 +++++++ 5 files changed, 128 insertions(+), 102 deletions(-) create mode 100644 src/calibre/utils/mreplace.py diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index b6b3fb9c40..b797a477d6 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -145,18 +145,21 @@ class MetadataSource(Plugin): # {{{ setattr(w, '_'+x, cb) cb.setChecked(c.get(x, True)) w._layout.addWidget(cb) - - cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name)) - setattr(w, '_textcomments', cb) - cb.setChecked(c.get('textcomments', False)) - w._layout.addWidget(cb) + + if self.has_html_comments: + cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name)) + setattr(w, '_textcomments', cb) + cb.setChecked(c.get('textcomments', False)) + w._layout.addWidget(cb) return w def save_settings(self, w): dl_settings = {} - for x in ('rating', 'tags', 'comments', 'textcomments'): + for x in ('rating', 'tags', 'comments'): dl_settings[x] = getattr(w, '_'+x).isChecked() + if self.has_html_comments: + dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked() c = self.config_store() c.set(self.name, dl_settings) if hasattr(w, '_sc'): diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 83cf6ee0ed..9169227326 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -90,10 +90,8 @@ def build_isbn(base_url, opts): return base_url + 'index1=isbn&value1='+opts.isbn def build_combined(base_url, opts): - query = '' - for e in (opts.title, opts.author, opts.publisher): - if e is not None: - query += ' ' + e + query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \ + if e is not None ]) query = query.strip() if len(query) == 0: raise ISBNDBError('You must specify at least one of --author, --title or --publisher') @@ -141,15 +139,8 @@ def create_books(opts, args, timeout=5.): print ('ISBNDB query: '+url) tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)] - ans = [] - for x in tans: - add = True - for y in ans: - if y.isbn == x.isbn: - add = False - if add: - ans.append(x) - return ans + #remove duplicates ISBN + return list(dict((book.isbn, book) for book in tans).values()) def main(args=sys.argv): parser = option_parser() diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 19519d6d71..33525f6540 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -278,10 +278,10 @@ class BIBTEX(CatalogPlugin): from calibre.library.save_to_disk import preprocess_template #Bibtex functions - from calibre.utils.bibtex import bibtex_author_format, utf8ToBibtex, ValidateCitationKey + from calibre.utils.bibtex import BibTeX def create_bibtex_entry(entry, fields, mode, template_citation, - asccii_bibtex = True, citation_bibtex = True): + bibtexdict, citation_bibtex = True): #Bibtex doesn't like UTF-8 but keep unicode until writing #Define starting chain or if book valid strict and not book return a Fail string @@ -297,7 +297,8 @@ class BIBTEX(CatalogPlugin): if citation_bibtex : # Citation tag - bibtex_entry.append(make_bibtex_citation(entry, template_citation, asccii_bibtex)) + bibtex_entry.append(make_bibtex_citation(entry, template_citation, + bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: @@ -312,11 +313,11 @@ class BIBTEX(CatalogPlugin): pass if field == 'authors' : - bibtex_entry.append(u'author = "%s"' % bibtex_author_format(item)) + bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field in ['title', 'publisher', 'cover', 'uuid', 'author_sort', 'series'] : - bibtex_entry.append(u'%s = "%s"' % (field, utf8ToBibtex(item, asccii_bibtex))) + bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) elif field == 'id' : bibtex_entry.append(u'calibreid = "%s"' % int(item)) @@ -329,13 +330,13 @@ class BIBTEX(CatalogPlugin): elif field == 'tags' : #A list to flatten - bibtex_entry.append(u'tags = "%s"' % utf8ToBibtex(u', '.join(item), asccii_bibtex)) + bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments' : #\n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') - bibtex_entry.append(u'note = "%s"' % utf8ToBibtex(item, asccii_bibtex)) + bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn' : # Could be 9, 10 or 13 digits @@ -353,8 +354,7 @@ class BIBTEX(CatalogPlugin): elif field == 'pubdate' : bibtex_entry.append(u'year = "%s"' % item.year) - bibtex_entry.append(u'month = "%s"' % utf8ToBibtex(strftime("%b", item), - asccii_bibtex)) + bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' @@ -371,7 +371,7 @@ class BIBTEX(CatalogPlugin): else : return True - def make_bibtex_citation(entry, template_citation, asccii_bibtex): + def make_bibtex_citation(entry, template_citation, bibtexclass): #define a function to replace the template entry by its value def tpl_replace(objtplname) : @@ -392,8 +392,9 @@ class BIBTEX(CatalogPlugin): return u'' if len(template_citation) >0 : - tpl_citation = utf8ToBibtex(ValidateCitationKey(re.sub(u'\{[^{}]*\}', - tpl_replace, template_citation)), asccii_bibtex) + tpl_citation = bibtexclass.utf8ToBibtex( + bibtexclass.ValidateCitationKey(re.sub(u'\{[^{}]*\}', + tpl_replace, template_citation))) if len(tpl_citation) >0 : return tpl_citation @@ -404,10 +405,7 @@ class BIBTEX(CatalogPlugin): else : template_citation = u'%s' % str(entry["id"]) - if asccii_bibtex : - return ValidateCitationKey(template_citation.encode('ascii', 'replace')) - else : - return ValidateCitationKey(template_citation) + return bibtexclass.ValidateCitationKey(template_citation) self.fmt = path_to_output.rpartition('.')[2] self.notification = notification @@ -475,13 +473,16 @@ class BIBTEX(CatalogPlugin): if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) + #Initialize BibTeX class + bibtexc = BibTeX() + #Entries writing after Bibtex formating (or not) if bibfile_enc != 'ascii' : - asccii_bibtex = False + bibtexc.ascii_bibtex = False else : - asccii_bibtex = True + bibtexc.ascii_bibtex = True - #Check and go to default in case of bad CLI + #Check citation choice and go to default in case of bad CLI if isinstance(opts.impcit, (StringType, UnicodeType)) : if opts.impcit == 'False' : citation_bibtex= False @@ -493,6 +494,7 @@ class BIBTEX(CatalogPlugin): else : citation_bibtex= opts.impcit + #Preprocess for error and light correction template_citation = preprocess_template(opts.bib_cit) #Open output and write entries @@ -514,7 +516,7 @@ class BIBTEX(CatalogPlugin): for entry in data: outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation, - asccii_bibtex, citation_bibtex)) + bibtexc, citation_bibtex)) outfile.close() diff --git a/src/calibre/utils/bibtex.py b/src/calibre/utils/bibtex.py index 1328aa9157..d19a6b05fe 100644 --- a/src/calibre/utils/bibtex.py +++ b/src/calibre/utils/bibtex.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Collection of python utility-methodes commonly used by other bibliograph packages. From http://pypi.python.org/pypi/bibliograph.core/ @@ -62,11 +60,14 @@ DAMAGE. """ -__docformat__ = 'reStructuredText' __author__ = 'sengian ' +__docformat__ = 'restructuredtext en' import re, string +from calibre.constants import preferred_encoding +from calibre.utils.mreplace import MReplace + utf8enc2latex_mapping = { # This is a mapping of Unicode characters to LaTeX equivalents. # The information has been extracted from @@ -2463,7 +2464,7 @@ utf8enc2latex_mapping = { u'\U0001d7fd': '$\\mathtt{7}$', u'\U0001d7fe': '$\\mathtt{8}$', u'\U0001d7ff': '$\\mathtt{9}$', - + #Items from simple list u'\u0106': "{\\a\\'C}", u'\u0408': '{\\CYRJE}', @@ -2842,69 +2843,66 @@ entity_mapping = { '"':'{"}', } -def ValidateCitationKey(text): - """ - removes characters not allowed in BibTeX keys +class BibTeX: + def __init__(self): + self.rep_utf8 = MReplace(utf8enc2latex_mapping) + self.rep_ent = MReplace(entity_mapping) + #Set default conversion to ASCII BibTeX + self.ascii_bibtex = True + # This substitution is based on the description of cite key restrictions at + # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html + self.invalid_cit = re.compile(u'[ "@\',\\#}{~%&$^]') + self.upper = re.compile(u'[' + + string.uppercase.decode(preferred_encoding) + u']') + self.escape = re.compile(u'[~#&%_]') - >>> from bibliograph.core.utils import _validKey - >>> _validKey(DummyEntry('Foo Bar')) - 'FooBar' + def ValidateCitationKey(self, text): + """ + removes characters not allowed in BibTeX keys + >>> ValidateCitationKey(DummyEntry('my@id')) + 'myid' + """ + return self.invalid_cit.sub(u'', text) - >>> _validKey(DummyEntry('my@id')) - 'myid' + def braceUppercase(self, text): + """ Convert uppercase letters to bibtex encoded uppercase + >>> braceUppercase('Foo Bar') + '{F}oo {B}ar' + """ + return self.upper.sub(lambda m: u'{%s}' % m.group(), text) - """ - # This substitution is based on the description of cite key restrictions at - # http://bibdesk.sourceforge.net/manual/BibDesk%20Help_2.html - return re.sub(u'[ "@\',\\#}{~%&$^]', u'', text) + def resolveEntities(self, text): + #for entity, entity_map in entity_mapping.iteritems(): + # text = text.replace(entity, entity_map) + #return text + return self.rep_ent.mreplace(text) -def BraceUppercase(text): - """ Convert uppercase letters to bibtex encoded uppercase + def resolveUnicode(self, text): + #UTF-8 text as entry + #for unichar, latexenc in utf8enc2latex_mapping.iteritems() : + # text = text.replace(unichar, latexenc) + text = self.rep_utf8.mreplace(text) + return text.replace(u'$}{$', u'') - >>> from bibliograph.core.utils import _braceUppercase - >>> _braceUppercase('foo bar') - 'foo bar' + def escapeSpecialCharacters(self, text): + """ + latex escaping some (not all) special characters + """ + text.replace('\\', '\\\\') + return self.escape.sub(lambda m: u'\\%s' % m.group(), text) - >>> _braceUppercase('Foo Bar') - '{F}oo {B}ar' - """ - for uc in string.uppercase: - text = text.replace(uc, u'{%s}' % uc) - return text + #Calibre functions + #Option to go to official ASCII Bibtex or unofficial UTF-8 + #Go from an unicode entry to ASCII Bibtex format without encoding + def utf8ToBibtex(self, text): + if len(text) == 0: + return '' + text.replace('\\', '\\\\') + text = self.resolveEntities(text) + if self.ascii_bibtex : + text = self.resolveUnicode(text) + return self.escapeSpecialCharacters(text) -def resolveEntities(text): - for entity, entity_map in entity_mapping.iteritems(): - text = text.replace(entity, entity_map) - return text - -def resolveUnicode(text): - #UTF-8 text as entry - for unichar, latexenc in utf8enc2latex_mapping.iteritems() : - text = text.replace(unichar, latexenc) - return text.replace(u'$}{$', u'') - -def escapeSpecialCharacters(text): - """ - latex escaping some (not all) special characters - """ - text.replace('\\', '\\\\') - escape = ['~', '#', '&', '%', '_'] - for c in escape: - text = text.replace(c, '\\' + c ) - return text - -#Calibre functions -#Go from an unicode entry to ASCII Bibtex format without encoding -#Option to go to official ASCII Bibtex or unofficial UTF-8 -def utf8ToBibtex(text, asccii_bibtex = True): - if len(text) == 0: - return '' - text.replace('\\', '\\\\') - text = resolveEntities(text) - if asccii_bibtex : - text = resolveUnicode(text) - return escapeSpecialCharacters(text) - -def bibtex_author_format(item): - #Format authors for Bibtex compliance (get a list as input) - return utf8ToBibtex(u' and'.join([author for author in item])) + def bibtex_author_format(self, item): + #Format authors for Bibtex compliance (get a list as input) + return self.utf8ToBibtex(u' and'.join([author for author in item])) diff --git a/src/calibre/utils/mreplace.py b/src/calibre/utils/mreplace.py new file mode 100644 index 0000000000..b9fbc0bded --- /dev/null +++ b/src/calibre/utils/mreplace.py @@ -0,0 +1,32 @@ +#multiple replace from dictionnary : http://code.activestate.com/recipes/81330/ +__license__ = 'GPL v3' +__copyright__ = '2010, sengian ' +__docformat__ = 'restructuredtext en' + +import re +from UserDict import UserDict + +class MReplace(UserDict): + def __init__(self, dict = None): + UserDict.__init__(self, dict) + self.re = None + self.regex = None + self.compile_regex() + + def compile_regex(self): + if len(self.data) > 0: + keys = sorted(self.data.keys(), key=len) + keys.reverse() + tmp = "(%s)" % "|".join(map(re.escape, keys)) + if self.re != tmp: + self.re = tmp + self.regex = re.compile(self.re) + + def __call__(self, mo): + return self[mo.string[mo.start():mo.end()]] + + def mreplace(self, text): + #Replace without regex compile + if len(self.data) < 1 or self.re is None: + return text + return self.regex.sub(self, text) \ No newline at end of file From 7ba2b0d3cf58f5f759aec224fad2030d1537ae1e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 15:37:35 -0700 Subject: [PATCH 6/8] Update windows binary build to use python 2.7 --- setup/installer/windows/freeze.py | 2 +- setup/installer/windows/notes.rst | 4 ++-- src/calibre/startup.py | 7 ++++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index dc3dd33604..118b6690f0 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -132,7 +132,7 @@ class Win32Freeze(Command, WixMixIn): shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell')) shutil.rmtree(comext) - for pat in (r'numpy', r'PyQt4\uic\port_v3'): + for pat in (r'PyQt4\uic\port_v3', ): x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0] shutil.rmtree(x) diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 281cd8668e..45aa4d2afb 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -19,7 +19,7 @@ Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw This is where all dependencies will be installed. -Add C:\Python26\Scripts and C:\Python26 to PATH +Add C:\Python27\Scripts and C:\Python27 to PATH Install setuptools from http://pypi.python.org/pypi/setuptools If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked:: @@ -28,7 +28,7 @@ If there are no windows binaries already compiled for the version of python you Run the following command to install python dependencies:: - easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform + easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) diff --git a/src/calibre/startup.py b/src/calibre/startup.py index e384153993..e74660d0bc 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -129,7 +129,7 @@ if not _run_once: def __getattribute__(self, attr): if attr in ('name', '__enter__', '__str__', '__unicode__', - '__repr__'): + '__repr__', '__exit__'): return object.__getattribute__(self, attr) fobject = object.__getattribute__(self, 'fobject') return getattr(fobject, attr) @@ -155,6 +155,11 @@ if not _run_once: fobject.__enter__() return self + def __exit__(self, *args): + fobject = object.__getattribute__(self, 'fobject') + return fobject.__exit__(*args) + + m = mode[0] random = len(mode) > 1 and mode[1] == '+' binary = mode[-1] == 'b' From f445ccaa1432b2803966930aad6a6a2850941e54 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 17:03:05 -0700 Subject: [PATCH 7/8] Remove unneccessary calls to set_path when creating book records. Speeds up record creation by about 30% on my system --- src/calibre/library/database2.py | 62 ++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8e7002097a..44e7449295 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1248,15 +1248,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): traceback.print_exc() else: raise + path_changed = False if set_title and mi.title: - self.set_title(id, mi.title, commit=False) + self._set_title(id, mi.title) + path_changed = True if set_authors: if not mi.authors: mi.authors = [_('Unknown')] authors = [] for a in mi.authors: authors += string_to_authors(a) - self.set_authors(id, authors, notify=False, commit=False) + self._set_authors(id, authors) + path_changed = True + if path_changed: + self.set_path(id, index_is_id=True) if mi.author_sort: doit(self.set_author_sort, id, mi.author_sort, notify=False, commit=False) @@ -1348,13 +1353,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): result.append(r) return ' & '.join(result).replace('|', ',') - def set_authors(self, id, authors, notify=True, commit=True): - ''' - Note that even if commit is False, the db will still be committed to - because this causes the location of files to change - - :param authors: A list of authors. - ''' + def _set_authors(self, id, authors): if not authors: authors = [_('Unknown')] self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,)) @@ -1379,25 +1378,30 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ss = self.author_sort_from_book(id, index_is_id=True) self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (ss, id)) - self.dirtied([id], commit=False) - if commit: - self.conn.commit() self.data.set(id, self.FIELD_MAP['authors'], ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) + + def set_authors(self, id, authors, notify=True, commit=True): + ''' + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + + :param authors: A list of authors. + ''' + self._set_authors(id, authors) + self.dirtied([id], commit=False) + if commit: + self.conn.commit() self.set_path(id, index_is_id=True) if notify: self.notify('metadata', [id]) - def set_title(self, id, title, notify=True, commit=True): - ''' - Note that even if commit is False, the db will still be committed to - because this causes the location of files to change - ''' + def _set_title(self, id, title): if not title: - return - if not isinstance(title, unicode): + return False + if isbytestring(title): title = title.decode(preferred_encoding, 'replace') self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id)) self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True) @@ -1405,6 +1409,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True) else: self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True) + return True + + def set_title(self, id, title, notify=True, commit=True): + ''' + Note that even if commit is False, the db will still be committed to + because this causes the location of files to change + ''' + if not self._set_title(id, title): + return self.set_path(id, index_is_id=True) self.dirtied([id], commit=False) if commit: @@ -2072,13 +2085,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): (id, title, series_index, aus)) self.data.books_added([id], self) - self.set_path(id, True) - self.conn.commit() if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, ignore_errors=True) + self.set_metadata(id, mi, ignore_errors=True, commit=False) + self.conn.commit() if cover is not None: try: self.set_cover(id, cover) @@ -2114,13 +2126,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): id = obj.lastrowid self.data.books_added([id], self) ids.append(id) - self.set_path(id, True) - self.conn.commit() if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi) + self.set_metadata(id, mi, commit=False) + self.conn.commit() npath = self.run_import_plugins(path, format) format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() stream = lopen(npath, 'rb') @@ -2154,7 +2165,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): (title, series_index, aus)) id = obj.lastrowid self.data.books_added([id], self) - self.set_path(id, True) if mi.timestamp is None: mi.timestamp = utcnow() if mi.pubdate is None: From 81e05df30412fabb180dbe94e17082473f8b5e37 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 22 Nov 2010 17:20:49 -0700 Subject: [PATCH 8/8] Add method to create a dummy library for testing --- src/calibre/library/__init__.py | 50 ++++++++++++++++++++++++++++++++ src/calibre/library/database2.py | 8 ++--- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/calibre/library/__init__.py b/src/calibre/library/__init__.py index 0f8e5e5496..8ff23c0a0a 100644 --- a/src/calibre/library/__init__.py +++ b/src/calibre/library/__init__.py @@ -6,3 +6,53 @@ def db(path=None): from calibre.library.database2 import LibraryDatabase2 from calibre.utils.config import prefs return LibraryDatabase2(path if path else prefs['library_path']) + + +def generate_test_db(library_path, + num_of_records=20000, + num_of_authors=6000, + num_of_tags=10000, + tag_length=7, + author_length=7, + title_length=10, + max_authors=10, + max_tags=10 + ): + import random, string, os, sys, time + + if not os.path.exists(library_path): + os.makedirs(library_path) + + def randstr(length): + return ''.join(random.choice(string.letters) for i in + xrange(length)) + + all_tags = [randstr(tag_length) for j in xrange(num_of_tags)] + print 'Generated', num_of_tags, 'tags' + all_authors = [randstr(author_length) for j in xrange(num_of_authors)] + print 'Generated', num_of_authors, 'authors' + all_titles = [randstr(title_length) for j in xrange(num_of_records)] + print 'Generated', num_of_records, 'titles' + + testdb = db(library_path) + + print 'Creating', num_of_records, 'records...' + + start = time.time() + + for i, title in enumerate(all_titles): + print i+1, + sys.stdout.flush() + authors = random.randint(1, max_authors) + authors = [random.choice(all_authors) for i in xrange(authors)] + tags = random.randint(0, max_tags) + tags = [random.choice(all_tags) for i in xrange(tags)] + from calibre.ebooks.metadata.book.base import Metadata + mi = Metadata(title, authors) + mi.tags = tags + testdb.import_book(mi, []) + + t = time.time() - start + print '\nGenerated', num_of_records, 'records in:', t, 'seconds' + print 'Time per record:', t/float(num_of_records) + diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 44e7449295..21a54a4dd6 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -2089,8 +2089,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, ignore_errors=True, commit=False) - self.conn.commit() + self.set_metadata(id, mi, ignore_errors=True, commit=True) if cover is not None: try: self.set_cover(id, cover) @@ -2130,8 +2129,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, commit=False) - self.conn.commit() + self.set_metadata(id, mi, commit=True, ignore_errors=True) npath = self.run_import_plugins(path, format) format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() stream = lopen(npath, 'rb') @@ -2169,7 +2167,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi, ignore_errors=True) + self.set_metadata(id, mi, ignore_errors=True, commit=True) if preserve_uuid and mi.uuid: self.set_uuid(id, mi.uuid, commit=False) for path in formats: