Merge from trunk

This commit is contained in:
Sengian 2010-11-23 06:35:48 +01:00
commit 9653087ea0
22 changed files with 695 additions and 49 deletions

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
180.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = '180.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Titulares', u'http://www.180.com.uy/feed.php')
]
def get_cover_url(self):
return 'http://www.180.com.uy/tplef/img/logo.gif'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
bitacora.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'bitacora.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'iso-8859-1'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['txt'])]
remove_tags = [
dict(name='div', attrs={'class':'tablafoot'}),
dict(name=['object','h4']),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.bitacora.com.uy'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'imgtapa'})
if link_item:
cover_url = "http://www.bitacora.com.uy/"+link_item['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
Muy Interesante
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'Cosmopolitan'
__author__ = 'Gustavo Azambuja'
description = 'Revista Cosmopolitan, Edicion Espanola'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables': True}
oldest_article = 180
max_articles_per_feed = 100
keep_only_tags = [
dict(id=['contenido']),
dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
]
remove_tags = [
dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
dict(name='div', attrs={'id':'comment'}),
dict(name='table', attrs={'class':'pagenav'}),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
img {float:left; clear:both; margin:10px}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
]
def preprocess_html(self, soup):
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_cover_url(self):
index = 'http://www.cosmohispano.com/revista'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'img_portada'})
if link_item:
cover_url = "http://www.cosmohispano.com"+link_item['src']
return cover_url

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.elpais.com.uy/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'Diario El Pais'
__author__ = 'Gustavo Azambuja'
description = 'Noticias | Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 2
encoding = 'iso-8859-1'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [
dict(name='h1'),
dict(name='div', attrs={'id':'Contenido'})
]
remove_tags = [
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
dict(name='p', attrs={'class':'FacebookLikeButton'}),
dict(name=['object','form']),
dict(name=['object','table']) ]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
(u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
(u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
(u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
(u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
(u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
(u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
(u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.elpais.com.uy'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'boxmedio box257'})
print link_item
if link_item:
cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,100 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://freeway.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'freeway.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Revista Freeway, Montevideo, Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables': True}
oldest_article = 180
max_articles_per_feed = 100
keep_only_tags = [
dict(id=['contenido']),
dict(name='a', attrs={'class':'titulo_art_ppal'}),
dict(name='img', attrs={'class':'recuadro'}),
dict(name='td', attrs={'class':'txt_art_ppal'})
]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
img {float:left; clear:both; margin:10px}
p {font-family:Arial,Helvetica,sans-serif;}
'''
def parse_index(self):
feeds = []
for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
articles = self.art_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def art_parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'id': 'tbl_1'})
current_articles = []
for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
if tag.get('class') == 'link-list-heading':
break
for td in tag.findAll('td'):
a = td.find('a', attrs= {'class': 'titulo_articulos'})
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://freeway.com.uy'+url
p = td.find('p', attrs= {'class': 'txt_articulos'})
description = self.tag_to_string(p)
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', description)
current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
return current_articles
def preprocess_html(self, soup):
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_cover_url(self):
#index = 'http://www.cosmohispano.com/revista'
#soup = self.index_to_soup(index)
#link_item = soup.find('img',attrs={'class':'img_portada'})
#if link_item:
# cover_url = "http://www.cosmohispano.com"+link_item['src']
return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
ladiaria.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'La Diaria'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['article'])]
remove_tags = [
dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
dict(name='div', attrs={'id':'discussion'}),
dict(name=['object','link'])
]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://ladiaria.com/feeds/articulos')
]
def get_cover_url(self):
return 'http://ladiaria.com/edicion/imagenportada/'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -8,7 +8,7 @@ from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LaRazon_Bol(BasicNewsRecipe): class LaRazon_Bol(BasicNewsRecipe):
title = 'La Razón - Bolivia' title = u'La Razón - Bolivia'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El diario nacional de Bolivia' description = 'El diario nacional de Bolivia'
publisher = 'Praxsis S.R.L.' publisher = 'Praxsis S.R.L.'

View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.montevideo.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Montevideo COMM'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['txt'])]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
(u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
(u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
(u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
# (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
# (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
(u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
]
def get_cover_url(self):
return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
observa.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Observa Digital'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
description = 'Noticias desde Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['contenido'])]
remove_tags = [
dict(name='div', attrs={'id':'contenedorVinculadas'}),
dict(name='p', attrs={'id':'nota_firma'}),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
(u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
(u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
(u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
if link_item:
cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
print cover_url
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.revistabla.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Revista Bla'
__author__ = 'Gustavo Azambuja'
description = 'Moda | Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
keep_only_tags = [dict(id=['body_container'])]
remove_tags = [
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
dict(name='p', attrs={'class':'FacebookLikeButton'}),
dict(name=['object','link']) ]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://www.revistabla.com/feed/')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.revistabla.com'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'header_right'})
if link_item:
cover_url = link_item.img['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
feeds.append((title, articles)) feeds.append((title, articles))
return feeds return feeds
def get_cover_url(self):
index = 'http://www.muyinteresante.es/revista'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'img_portada'})
if link_item:
cover_url = "http://www.muyinteresante.es"+link_item['src']
return cover_url

View File

@ -3,12 +3,12 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>' __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisNews(BasicNewsRecipe): class TelepolisNews(BasicNewsRecipe):
title = u'Telepolis (News)' title = u'Telepolis (News+Artikel)'
__author__ = 'Gerhard Aigner' __author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis' description = 'News from telepolis'
@ -26,10 +26,10 @@ class TelepolisNews(BasicNewsRecipe):
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''), preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),] (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})] keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})] remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')] feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment' , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):
def get_article_url(self, article): def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it''' '''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 0) : if (article.link.count('artikel') > 1) :
return None return None
return article.link return article.link
@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
return soup return soup

View File

@ -132,7 +132,7 @@ class Win32Freeze(Command, WixMixIn):
shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell')) shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
shutil.rmtree(comext) shutil.rmtree(comext)
for pat in (r'numpy', r'PyQt4\uic\port_v3'): for pat in (r'PyQt4\uic\port_v3', ):
x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0] x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
shutil.rmtree(x) shutil.rmtree(x)

View File

@ -19,7 +19,7 @@ Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw
This is where all dependencies will be installed. This is where all dependencies will be installed.
Add C:\Python26\Scripts and C:\Python26 to PATH Add C:\Python27\Scripts and C:\Python27 to PATH
Install setuptools from http://pypi.python.org/pypi/setuptools Install setuptools from http://pypi.python.org/pypi/setuptools
If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked:: If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked::
@ -28,7 +28,7 @@ If there are no windows binaries already compiled for the version of python you
Run the following command to install python dependencies:: Run the following command to install python dependencies::
easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)

View File

@ -229,6 +229,10 @@ class KOBO(USBMS):
#Delete the volume_shortcovers second #Delete the volume_shortcovers second
cursor.execute('delete from volume_shortcovers where volumeid = ?', t) cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
# Delete the rows from content_keys
if self.dbversion >= 8:
cursor.execute('delete from content_keys where volumeid = ?', t)
# Delete the chapters associated with the book next # Delete the chapters associated with the book next
t = (ContentID,ContentID,) t = (ContentID,ContentID,)
cursor.execute('delete from content where BookID = ? or ContentID = ?', t) cursor.execute('delete from content where BookID = ? or ContentID = ?', t)

View File

@ -140,7 +140,7 @@ def create_books(opts, args, timeout=5.):
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)] tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
#remove duplicates ISBN #remove duplicates ISBN
return dict((book.isbn, book) for book in tans).values() return list(dict((book.isbn, book) for book in tans).values())
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()

View File

@ -6,3 +6,53 @@ def db(path=None):
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
from calibre.utils.config import prefs from calibre.utils.config import prefs
return LibraryDatabase2(path if path else prefs['library_path']) return LibraryDatabase2(path if path else prefs['library_path'])
def generate_test_db(library_path,
num_of_records=20000,
num_of_authors=6000,
num_of_tags=10000,
tag_length=7,
author_length=7,
title_length=10,
max_authors=10,
max_tags=10
):
import random, string, os, sys, time
if not os.path.exists(library_path):
os.makedirs(library_path)
def randstr(length):
return ''.join(random.choice(string.letters) for i in
xrange(length))
all_tags = [randstr(tag_length) for j in xrange(num_of_tags)]
print 'Generated', num_of_tags, 'tags'
all_authors = [randstr(author_length) for j in xrange(num_of_authors)]
print 'Generated', num_of_authors, 'authors'
all_titles = [randstr(title_length) for j in xrange(num_of_records)]
print 'Generated', num_of_records, 'titles'
testdb = db(library_path)
print 'Creating', num_of_records, 'records...'
start = time.time()
for i, title in enumerate(all_titles):
print i+1,
sys.stdout.flush()
authors = random.randint(1, max_authors)
authors = [random.choice(all_authors) for i in xrange(authors)]
tags = random.randint(0, max_tags)
tags = [random.choice(all_tags) for i in xrange(tags)]
from calibre.ebooks.metadata.book.base import Metadata
mi = Metadata(title, authors)
mi.tags = tags
testdb.import_book(mi, [])
t = time.time() - start
print '\nGenerated', num_of_records, 'records in:', t, 'seconds'
print 'Time per record:', t/float(num_of_records)

View File

@ -405,9 +405,6 @@ class BIBTEX(CatalogPlugin):
else : else :
template_citation = u'%s' % str(entry["id"]) template_citation = u'%s' % str(entry["id"])
if asccii_bibtex :
return bibtexclass.ValidateCitationKey(template_citation.encode('ascii', 'replace'))
else :
return bibtexclass.ValidateCitationKey(template_citation) return bibtexclass.ValidateCitationKey(template_citation)
self.fmt = path_to_output.rpartition('.')[2] self.fmt = path_to_output.rpartition('.')[2]

View File

@ -1248,15 +1248,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
traceback.print_exc() traceback.print_exc()
else: else:
raise raise
path_changed = False
if set_title and mi.title: if set_title and mi.title:
self.set_title(id, mi.title, commit=False) self._set_title(id, mi.title)
path_changed = True
if set_authors: if set_authors:
if not mi.authors: if not mi.authors:
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
authors = [] authors = []
for a in mi.authors: for a in mi.authors:
authors += string_to_authors(a) authors += string_to_authors(a)
self.set_authors(id, authors, notify=False, commit=False) self._set_authors(id, authors)
path_changed = True
if path_changed:
self.set_path(id, index_is_id=True)
if mi.author_sort: if mi.author_sort:
doit(self.set_author_sort, id, mi.author_sort, notify=False, doit(self.set_author_sort, id, mi.author_sort, notify=False,
commit=False) commit=False)
@ -1348,13 +1353,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
result.append(r) result.append(r)
return ' & '.join(result).replace('|', ',') return ' & '.join(result).replace('|', ',')
def set_authors(self, id, authors, notify=True, commit=True): def _set_authors(self, id, authors):
'''
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
:param authors: A list of authors.
'''
if not authors: if not authors:
authors = [_('Unknown')] authors = [_('Unknown')]
self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,)) self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
@ -1379,25 +1378,30 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
ss = self.author_sort_from_book(id, index_is_id=True) ss = self.author_sort_from_book(id, index_is_id=True)
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
(ss, id)) (ss, id))
self.dirtied([id], commit=False)
if commit:
self.conn.commit()
self.data.set(id, self.FIELD_MAP['authors'], self.data.set(id, self.FIELD_MAP['authors'],
','.join([a.replace(',', '|') for a in authors]), ','.join([a.replace(',', '|') for a in authors]),
row_is_id=True) row_is_id=True)
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
def set_authors(self, id, authors, notify=True, commit=True):
'''
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
:param authors: A list of authors.
'''
self._set_authors(id, authors)
self.dirtied([id], commit=False)
if commit:
self.conn.commit()
self.set_path(id, index_is_id=True) self.set_path(id, index_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_title(self, id, title, notify=True, commit=True): def _set_title(self, id, title):
'''
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
'''
if not title: if not title:
return return False
if not isinstance(title, unicode): if isbytestring(title):
title = title.decode(preferred_encoding, 'replace') title = title.decode(preferred_encoding, 'replace')
self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id)) self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True) self.data.set(id, self.FIELD_MAP['title'], title, row_is_id=True)
@ -1405,6 +1409,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True) self.data.set(id, self.FIELD_MAP['sort'], title_sort(title), row_is_id=True)
else: else:
self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True) self.data.set(id, self.FIELD_MAP['sort'], title, row_is_id=True)
return True
def set_title(self, id, title, notify=True, commit=True):
'''
Note that even if commit is False, the db will still be committed to
because this causes the location of files to change
'''
if not self._set_title(id, title):
return
self.set_path(id, index_is_id=True) self.set_path(id, index_is_id=True)
self.dirtied([id], commit=False) self.dirtied([id], commit=False)
if commit: if commit:
@ -2072,13 +2085,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
(id, title, series_index, aus)) (id, title, series_index, aus))
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, True)
self.conn.commit()
if mi.timestamp is None: if mi.timestamp is None:
mi.timestamp = utcnow() mi.timestamp = utcnow()
if mi.pubdate is None: if mi.pubdate is None:
mi.pubdate = utcnow() mi.pubdate = utcnow()
self.set_metadata(id, mi, ignore_errors=True) self.set_metadata(id, mi, ignore_errors=True, commit=True)
if cover is not None: if cover is not None:
try: try:
self.set_cover(id, cover) self.set_cover(id, cover)
@ -2114,13 +2125,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self) self.data.books_added([id], self)
ids.append(id) ids.append(id)
self.set_path(id, True)
self.conn.commit()
if mi.timestamp is None: if mi.timestamp is None:
mi.timestamp = utcnow() mi.timestamp = utcnow()
if mi.pubdate is None: if mi.pubdate is None:
mi.pubdate = utcnow() mi.pubdate = utcnow()
self.set_metadata(id, mi) self.set_metadata(id, mi, commit=True, ignore_errors=True)
npath = self.run_import_plugins(path, format) npath = self.run_import_plugins(path, format)
format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
stream = lopen(npath, 'rb') stream = lopen(npath, 'rb')
@ -2154,12 +2163,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
(title, series_index, aus)) (title, series_index, aus))
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, True)
if mi.timestamp is None: if mi.timestamp is None:
mi.timestamp = utcnow() mi.timestamp = utcnow()
if mi.pubdate is None: if mi.pubdate is None:
mi.pubdate = utcnow() mi.pubdate = utcnow()
self.set_metadata(id, mi, ignore_errors=True) self.set_metadata(id, mi, ignore_errors=True, commit=True)
if preserve_uuid and mi.uuid: if preserve_uuid and mi.uuid:
self.set_uuid(id, mi.uuid, commit=False) self.set_uuid(id, mi.uuid, commit=False)
for path in formats: for path in formats:

View File

@ -129,7 +129,7 @@ if not _run_once:
def __getattribute__(self, attr): def __getattribute__(self, attr):
if attr in ('name', '__enter__', '__str__', '__unicode__', if attr in ('name', '__enter__', '__str__', '__unicode__',
'__repr__'): '__repr__', '__exit__'):
return object.__getattribute__(self, attr) return object.__getattribute__(self, attr)
fobject = object.__getattribute__(self, 'fobject') fobject = object.__getattribute__(self, 'fobject')
return getattr(fobject, attr) return getattr(fobject, attr)
@ -155,6 +155,11 @@ if not _run_once:
fobject.__enter__() fobject.__enter__()
return self return self
def __exit__(self, *args):
fobject = object.__getattribute__(self, 'fobject')
return fobject.__exit__(*args)
m = mode[0] m = mode[0]
random = len(mode) > 1 and mode[1] == '+' random = len(mode) > 1 and mode[1] == '+'
binary = mode[-1] == 'b' binary = mode[-1] == 'b'

View File

@ -69,6 +69,9 @@ from UserDict import UserDict
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.utils.mreplace import MReplace from calibre.utils.mreplace import MReplace
from calibre.constants import preferred_encoding
from calibre.utils.mreplace import MReplace
utf8enc2latex_mapping = { utf8enc2latex_mapping = {
# This is a mapping of Unicode characters to LaTeX equivalents. # This is a mapping of Unicode characters to LaTeX equivalents.
# The information has been extracted from # The information has been extracted from

View File

@ -61,6 +61,11 @@ def serialize_recipe(urn, recipe_class):
def serialize_collection(mapping_of_recipe_classes): def serialize_collection(mapping_of_recipe_classes):
collection = E.recipe_collection() collection = E.recipe_collection()
'''for u, x in mapping_of_recipe_classes.items():
print 11111, u, repr(x.title)
if isinstance(x.title, str):
x.title.decode('ascii')
'''
for urn in sorted(mapping_of_recipe_classes.keys(), for urn in sorted(mapping_of_recipe_classes.keys(),
key=lambda key: getattr(mapping_of_recipe_classes[key], 'title', key=lambda key: getattr(mapping_of_recipe_classes[key], 'title',
'zzz')): 'zzz')):