diff --git a/recipes/estadao.recipe b/recipes/estadao.recipe index 86ab572398..5b6303ba21 100644 --- a/recipes/estadao.recipe +++ b/recipes/estadao.recipe @@ -1,134 +1,129 @@ -#!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe -from datetime import datetime, timedelta -from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup -from calibre.utils.magick import Image, PixelWand -from urllib2 import Request, urlopen, URLError - -class Estadao(BasicNewsRecipe): - THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/ - LANGUAGE = 'pt_br' - language = 'pt' - LANGHTM = 'pt-br' - ENCODING = 'utf' - ENCHTM = 'utf-8' - directionhtm = 'ltr' - requires_version = (0,8,47) - news = True - publication_type = 'newsportal' - - title = u'Estadao' - __author__ = 'Euler Alves' - description = u'Brazilian news from Estad\xe3o' - publisher = u'Estad\xe3o' - category = 'news, rss' - - oldest_article = 4 - max_articles_per_feed = 100 - summary_length = 1000 - - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - timefmt = ' [%d %b %Y (%a)]' - - html2lrf_options = [ - '--comment', description - ,'--category', category - ,'--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - hoje = datetime.now()-timedelta(days=2) - pubdate = hoje.strftime('%a, %d %b') - if hoje.hour<10: - hoje = hoje-timedelta(days=1) - CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg' - SCREENSHOT = 'http://estadao.com.br/' - cover_margins = (0,0,'white') - masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png' - - keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})] - remove_tags = [ - dict(name='div', - attrs={'id':[ - 'bb-md-noticia-tabs' - ]}) - ,dict(name='div', - attrs={'class':[ - 'tags' - ,'discussion' - ,'bb-gg adsense_container' - ]}) - - ,dict(name='a') - ,dict(name='iframe') - ,dict(name='link') - ,dict(name='script') - ] - - feeds = [ - (u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml') - ,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml') - ,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml') - ,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml') - ,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/') - ,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml') - ,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml') - ,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml') - ,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml') - ] - - conversion_options = { - 'title' : title - ,'comments' : description - ,'publisher' : publisher - ,'tags' : category - ,'language' : LANGUAGE - ,'linearize_tables': True - } - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - if not soup.find(attrs={'http-equiv':'Content-Language'}): - meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) - soup.head.insert(0,meta0) - if not soup.find(attrs={'http-equiv':'Content-Type'}): - meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) - soup.head.insert(0,meta1) - return soup - - def postprocess_html(self, soup, first): - #process all the images. assumes that the new html has the correct path - for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): - iurl = tag['src'] - img = Image() - img.open(iurl) - width, height = img.size - print 'img is: ', iurl, 'width is: ', width, 'height is: ', height - pw = PixelWand() - if( width > height and width > 590) : - print 'Rotate image' - img.rotate(pw, -90) - img.save(iurl) - return soup - - def get_cover_url(self): - cover_url = self.CAPA - pedido = Request(self.CAPA) - pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') - pedido.add_header('Accept-Charset',self.ENCHTM) - pedido.add_header('Referer',self.SCREENSHOT) - try: - resposta = urlopen(pedido) - soup = BeautifulSoup(resposta) - cover_item = soup.find('body') - if cover_item: - cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' - return cover_url - except URLError: - cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' - return cover_url +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime, timedelta +from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup +from calibre.utils.magick import Image, PixelWand +from urllib2 import Request, urlopen, URLError + +class Estadao(BasicNewsRecipe): + THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here + LANGUAGE = 'pt_br' + language = 'pt' + LANGHTM = 'pt-br' + ENCODING = 'utf' + ENCHTM = 'utf-8' + directionhtm = 'ltr' + requires_version = (0,7,47) + news = True + + title = u'Estad\xe3o' + __author__ = 'Euler Alves' + description = u'Brazilian news from Estad\xe3o' + publisher = u'Estad\xe3o' + category = 'news, rss' + + oldest_article = 4 + max_articles_per_feed = 100 + summary_length = 1000 + + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + timefmt = ' [%d %b %Y (%a)]' + + hoje = datetime.now()-timedelta(days=2) + pubdate = hoje.strftime('%a, %d %b') + if hoje.hour<10: + hoje = hoje-timedelta(days=1) + CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg' + SCREENSHOT = 'http://estadao.com.br/' + cover_margins = (0,0,'white') + masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png' + + keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})] + remove_tags = [ + dict(name='div', + attrs={'id':[ + 'bb-md-noticia-tabs' + ]}) + ,dict(name='div', + attrs={'class':[ + 'tags' + ,'discussion' + ,'bb-gg adsense_container' + ]}) + + ,dict(name='a') + ,dict(name='iframe') + ,dict(name='link') + ,dict(name='script') + ] + + + feeds = [ + (u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml') + ,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml') + ,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml') + ,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml') + ,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/') + ,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml') + ,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml') + ,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml') + ,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml') + ] + + conversion_options = { + 'title' : title + ,'comments' : description + ,'publisher' : publisher + ,'tags' : category + ,'language' : LANGUAGE + ,'linearize_tables': True + } + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + if not soup.find(attrs={'http-equiv':'Content-Language'}): + meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) + soup.head.insert(0,meta0) + if not soup.find(attrs={'http-equiv':'Content-Type'}): + meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) + soup.head.insert(0,meta1) + return soup + + def postprocess_html(self, soup, first): + #process all the images. assumes that the new html has the correct path + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + width, height = img.size + print 'img is: ', iurl, 'width is: ', width, 'height is: ', height + if img < 0: + raise RuntimeError('Out of memory') + pw = PixelWand() + if( width > height and width > 590) : + print 'Rotate image' + img.rotate(pw, -90) + img.save(iurl) + return soup + + def get_cover_url(self): + if self.THUMBALIZR_API: + cover_url = self.CAPA + pedido = Request(self.CAPA) + pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') + pedido.add_header('Accept-Charset',self.ENCHTM) + pedido.add_header('Referer',self.SCREENSHOT) + try: + resposta = urlopen(pedido) + soup = BeautifulSoup(resposta) + cover_item = soup.find('body') + if cover_item: + cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' + return cover_url + except URLError: + cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' + return cover_url + diff --git a/recipes/folhadesaopaulo.recipe b/recipes/folhadesaopaulo.recipe index 40898672e6..028513ad3a 100644 --- a/recipes/folhadesaopaulo.recipe +++ b/recipes/folhadesaopaulo.recipe @@ -1,149 +1,151 @@ -from calibre.web.feeds.news import BasicNewsRecipe -from datetime import datetime, timedelta -from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup -from calibre.utils.magick import Image, PixelWand -from urllib2 import Request, urlopen, URLError - -class FolhaOnline(BasicNewsRecipe): - THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/ - LANGUAGE = 'pt_br' - language = 'pt' - LANGHTM = 'pt-br' - ENCODING = 'cp1252' - ENCHTM = 'iso-8859-1' - directionhtm = 'ltr' - requires_version = (0,8,47) - news = True - publication_type = 'newsportal' - - title = u'Folha de S\xE3o Paulo' - __author__ = 'Euler Alves' - description = u'Brazilian news from Folha de S\xE3o Paulo' - publisher = u'Folha de S\xE3o Paulo' - category = 'news, rss' - - oldest_article = 4 - max_articles_per_feed = 100 - summary_length = 1000 - - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - timefmt = ' [%d %b %Y (%a)]' - - html2lrf_options = [ - '--comment', description - ,'--category', category - ,'--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - hoje = datetime.now() - pubdate = hoje.strftime('%a, %d %b') - if hoje.hour<6: - hoje = hoje-timedelta(days=1) - CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg' - SCREENSHOT = 'http://www1.folha.uol.com.br/' - cover_margins = (0,0,'white') - masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' - - keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})] - remove_tags = [ - dict(name='div', - attrs={'id':[ - 'articleButton' - ,'bookmarklets' - ,'ad-180x150-1' - ,'contextualAdsArticle' - ,'articleEnd' - ,'articleComments' - ]}) - ,dict(name='div', - attrs={'class':[ - 'openBox adslibraryArticle' - ]}) - - ,dict(name='a') - ,dict(name='iframe') - ,dict(name='link') - ,dict(name='script') - ] - - feeds = [ - (u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml') - ,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml') - ,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml') - ,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml') - ,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml') - ,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml') - ,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml') - ,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml') - ,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/') - ,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/') - ,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/') - ,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/') - ,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml') - ,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml') - ,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml') - ,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml') - ,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml') - ,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml') - ,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml') - ,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml') - ,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml') - ,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml') - ] - - conversion_options = { - 'title' : title - ,'comments' : description - ,'publisher' : publisher - ,'tags' : category - ,'language' : LANGUAGE - ,'linearize_tables': True - } - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - if not soup.find(attrs={'http-equiv':'Content-Language'}): - meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) - soup.head.insert(0,meta0) - if not soup.find(attrs={'http-equiv':'Content-Type'}): - meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) - soup.head.insert(0,meta1) - return soup - - def postprocess_html(self, soup, first): - #process all the images. assumes that the new html has the correct path - for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): - iurl = tag['src'] - img = Image() - img.open(iurl) - width, height = img.size - print 'img is: ', iurl, 'width is: ', width, 'height is: ', height - pw = PixelWand() - if( width > height and width > 590) : - print 'Rotate image' - img.rotate(pw, -90) - img.save(iurl) - return soup - - def get_cover_url(self): - cover_url = self.CAPA - pedido = Request(self.CAPA) - pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') - pedido.add_header('Accept-Charset',self.ENCHTM) - pedido.add_header('Referer',self.SCREENSHOT) - try: - resposta = urlopen(pedido) - soup = BeautifulSoup(resposta) - cover_item = soup.find('body') - if cover_item: - cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' - return cover_url - except URLError: - cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' - return cover_url +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime, timedelta +from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup +from calibre.utils.magick import Image, PixelWand +from urllib2 import Request, urlopen, URLError + +class FolhaOnline(BasicNewsRecipe): + THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here + LANGUAGE = 'pt_br' + language = 'pt' + LANGHTM = 'pt-br' + ENCODING = 'cp1252' + ENCHTM = 'iso-8859-1' + directionhtm = 'ltr' + requires_version = (0,7,47) + news = True + + title = u'Folha de S\xE3o Paulo' + __author__ = 'Euler Alves' + description = u'Brazilian news from Folha de S\xE3o Paulo' + publisher = u'Folha de S\xE3o Paulo' + category = 'news, rss' + + oldest_article = 4 + max_articles_per_feed = 100 + summary_length = 1000 + + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + timefmt = ' [%d %b %Y (%a)]' + + html2lrf_options = [ + '--comment', description + ,'--category', category + ,'--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + hoje = datetime.now() + pubdate = hoje.strftime('%a, %d %b') + if hoje.hour<6: + hoje = hoje-timedelta(days=1) + CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg' + SCREENSHOT = 'http://www1.folha.uol.com.br/' + cover_margins = (0,0,'white') + masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' + + keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})] + remove_tags = [ + dict(name='div', + attrs={'id':[ + 'articleButton' + ,'bookmarklets' + ,'ad-180x150-1' + ,'contextualAdsArticle' + ,'articleEnd' + ,'articleComments' + ]}) + ,dict(name='div', + attrs={'class':[ + 'openBox adslibraryArticle' + ]}) + + ,dict(name='a') + ,dict(name='iframe') + ,dict(name='link') + ,dict(name='script') + ] + + feeds = [ + (u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml') + ,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml') + ,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml') + ,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml') + ,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml') + ,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml') + ,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml') + ,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml') + ,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/') + ,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/') + ,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/') + ,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/') + ,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml') + ,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml') + ,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml') + ,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml') + ,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml') + ,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml') + ,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml') + ,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml') + ,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml') + ,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml') + ] + + + conversion_options = { + 'title' : title + ,'comments' : description + ,'publisher' : publisher + ,'tags' : category + ,'language' : LANGUAGE + ,'linearize_tables': True + } + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + if not soup.find(attrs={'http-equiv':'Content-Language'}): + meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) + soup.head.insert(0,meta0) + if not soup.find(attrs={'http-equiv':'Content-Type'}): + meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) + soup.head.insert(0,meta1) + return soup + + def postprocess_html(self, soup, first): + #process all the images. assumes that the new html has the correct path + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + width, height = img.size + print 'img is: ', iurl, 'width is: ', width, 'height is: ', height + if img < 0: + raise RuntimeError('Out of memory') + pw = PixelWand() + if( width > height and width > 590) : + print 'Rotate image' + img.rotate(pw, -90) + img.save(iurl) + return soup + + def get_cover_url(self): + cover_url = self.CAPA + pedido = Request(self.CAPA) + pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)') + pedido.add_header('Accept-Charset',self.ENCHTM) + pedido.add_header('Referer',self.SCREENSHOT) + try: + resposta = urlopen(pedido) + soup = BeautifulSoup(resposta) + cover_item = soup.find('body') + if cover_item: + cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' + return cover_url + except URLError: + cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90' + return cover_url diff --git a/recipes/icons/estadao.png b/recipes/icons/estadao.png index 8f5637ad58..706f101f33 100644 Binary files a/recipes/icons/estadao.png and b/recipes/icons/estadao.png differ diff --git a/recipes/icons/folhadesaopaulo.png b/recipes/icons/folhadesaopaulo.png new file mode 100644 index 0000000000..c895e57d70 Binary files /dev/null and b/recipes/icons/folhadesaopaulo.png differ diff --git a/recipes/icons/prostamerika.png b/recipes/icons/prostamerika.png new file mode 100644 index 0000000000..f88c846bed Binary files /dev/null and b/recipes/icons/prostamerika.png differ diff --git a/recipes/icons/sb_nation.png b/recipes/icons/sb_nation.png new file mode 100644 index 0000000000..9d82e492de Binary files /dev/null and b/recipes/icons/sb_nation.png differ diff --git a/recipes/icons/wvhooligan.png b/recipes/icons/wvhooligan.png new file mode 100644 index 0000000000..81a59118c9 Binary files /dev/null and b/recipes/icons/wvhooligan.png differ diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 83ea496b2c..1434a25725 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan" +__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns" ''' irishtimes.com ''' @@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe class IrishTimes(BasicNewsRecipe): title = u'The Irish Times' - __author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan" + encoding = 'ISO-8859-15' + __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" language = 'en_IE' timefmt = ' (%A, %B %d, %Y)' - oldest_article = 3 + + oldest_article = 1.0 + max_articles_per_feed = 100 no_stylesheets = True - simultaneous_downloads= 1 + simultaneous_downloads= 5 r = re.compile('.*(?Phttp:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*') remove_tags = [dict(name='div', attrs={'class':'footer'})] - extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' + extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }' feeds = [ ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'), @@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe): ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'), ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'), ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'), + ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'), + ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'), + ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'), + ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'), + ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'), + ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'), + ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'), + ('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'), + ('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'), + ('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'), + ('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'), + ('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'), ] + def print_version(self, url): - if url.count('rss.feedsportal.com'): - u = 'http://www.irishtimes.com' + \ - (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') - else: - u = url.replace('.html','_pf.html') - return u + if url.count('rss.feedsportal.com'): + u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm') + else: + u = url.replace('.html','_pf.html') + return u def get_article_url(self, article): return article.link + + diff --git a/recipes/lifehacker.recipe b/recipes/lifehacker.recipe index ff95efc50a..e96b031dab 100644 --- a/recipes/lifehacker.recipe +++ b/recipes/lifehacker.recipe @@ -1,37 +1,100 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, NA' -''' -lifehacker.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class Lifehacker(BasicNewsRecipe): - title = 'Lifehacker' - __author__ = 'Kovid Goyal' - description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live." - publisher = 'lifehacker.com' - category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = True - language = 'en' - masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } - - remove_tags = [ - {'class': 'feedflare'}, - ] - - feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) - +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime +from calibre.ebooks.BeautifulSoup import Tag +from calibre.utils.magick import Image, PixelWand + +class LifeHacker(BasicNewsRecipe): + THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here + LANGUAGE = 'en' + LANGHTM = 'en' + language = 'en' + ENCODING = 'utf' + ENCHTM = 'utf-8' + requires_version = (0,7,47) + news = True + + title = u'LifeHacker' + __author__ = 'Euler Alves' + description = u'Tips, tricks, and downloads for getting things done.' + publisher = u'lifehacker.com' + author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani' + category = 'news, rss' + + oldest_article = 4 + max_articles_per_feed = 20 + summary_length = 1000 + + remove_javascript = True + no_stylesheets = True + use_embedded_content = True + remove_empty_feeds = True + timefmt = ' [%d %b %Y (%a)]' + + hoje = datetime.now() + pubdate = hoje.strftime('%a, %d %b') + cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90' + cover_margins = (0,0,'white') + masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' + + remove_tags = [ + {'class': 'feedflare'}, + dict(name='div', + attrs={'class':[ + 'ad_container' + ,'ad_300x250' + ,'ad_interstitial' + ,'share-wrap' + ,'ad_300x600' + ,'ad_perma-footer-adsense' + ,'ad_perma-panorama' + ,'ad panorama' + ,'ad_container' + ]}) + ,dict(name='div', + attrs={'id':[ + 'agegate_container' + ,'agegate_container_rejected' + ,'sharemenu-wrap' + ]}) + ] + + feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')] + + conversion_options = { + 'title' : title + ,'comments' : description + ,'publisher' : publisher + ,'tags' : category + ,'language' : LANGUAGE + ,'linearize_tables': True + } + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + if not soup.find(attrs={'http-equiv':'Content-Language'}): + meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)]) + soup.head.insert(0,meta0) + if not soup.find(attrs={'http-equiv':'Content-Type'}): + meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)]) + soup.head.insert(0,meta1) + return soup + + def postprocess_html(self, soup, first): + #process all the images. assumes that the new html has the correct path + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + width, height = img.size + print 'img is: ', iurl, 'width is: ', width, 'height is: ', height + if img < 0: + raise RuntimeError('Out of memory') + pw = PixelWand() + if( width > height and width > 590) : + print 'Rotate image' + img.rotate(pw, -90) + img.save(iurl) + return soup + + diff --git a/recipes/prostamerika.recipe b/recipes/prostamerika.recipe new file mode 100644 index 0000000000..b216ee469d --- /dev/null +++ b/recipes/prostamerika.recipe @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# encoding: utf-8 + +__license__ = 'GPL 3' +__copyright__ = 'zotzo' + +""" +http://www.prostamerika.com/ +""" + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ProstAmerika(BasicNewsRecipe): + title = 'Prost Amerika' + language = 'en' + __author__ = 'rylsfan' + #authors = + description = 'Seattle soccer with a European accent. News, features, and match reports.' + publisher = 'ProstAmerika' # 4464 fremont avenue n, # 209, Seattle, 98103, United States + category = 'Sports' + + oldest_article = 7 + max_articles_per_feed = 100 + + cover_url = 'http://img17.imageshack.us/img17/9498/prostamerika.jpg' + masthead_url = 'http://www.prostamerika.com/soundersfc/wp-content/uploads/2011/02/PASoccer_taglinewhole.jpg' + + encoding = 'utf-8' + + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + + feeds =[ + (u'Cascadia', u'http://www.prostamerika.com/category/localfootball/feed/' ), + (u'MLS', u'http://www.prostamerika.com/category/mls/feed/'), + (u'EPL', u'http://www.prostamerika.com/category/epl/feed/'), + (u'World', u'http://www.prostamerika.com/category/international-soccer/feed/'), + (u'Fan Culture',u'http://www.prostamerika.com/category/fan-culture/feed/') + + ] + + keep_only_tags = [dict(name='div', attrs={'id':'maincontent'})] + remove_tags = [ + {'class':'tweetmeme_button'}, + {'class':'wp-caption-text'} + ] + + + remove_tags_after =[ + {'class':'tweetmeme_button'} + ] + + extra_css = ''' + h1{font-family:Didot,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + + + diff --git a/recipes/sb_nation.recipe b/recipes/sb_nation.recipe new file mode 100644 index 0000000000..19b828cdb7 --- /dev/null +++ b/recipes/sb_nation.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'Zotzo' +''' +http://www.stumptownfooty.com/ +http://www.eightysixforever.com +http://www.sounderatheart.com +http://www.dailysoccerfix.com/ + +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class SBNation(BasicNewsRecipe): + title = u'SBNation' + __author__ = 'rylsfan' + description = u"More than 290 individual communities, each offering high quality year-round coverage and conversation led by fans who are passionate." + oldest_article = 3 + language = 'en' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + + #cover_url = 'http://img132.imageshack.us/img132/4913/2hyggjegqqdywzn9.png' + + keep_only_tags = [ + dict(name='h2', attrs={'class':'title'}) + ,dict(name='div', attrs={'class':'entry-body'}) + ] + + remove_tags_after = dict(name='div', attrs={'class':'footline entry-actions'}) + remove_tags = [ + dict(name='div', attrs={'class':'footline entry-actions'}), + {'class': 'extend-divide'} + ] + # SBNation has 300 special blogs to choose from. These are just a couple! + feeds = [ + (u'Daily Fix', u'http://www.dailysoccerfix.com/rss/'), + (u"Stumptown Footy", u'http://www.stumptownfooty.com/rss/'), + (u'Sounders', u'http://www.sounderatheart.com/rss/'), + (u'Whitecaps', u'http://www.eightysixforever.com/rss/'), + ] + + extra_css = """ + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + p{font-family:Helvetica,sans-serif; display: block; text-align: left; text-decoration: none; text-indent: 0%;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + """ + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + def populate_article_metadata(self, article, soup, first): + h2 = soup.find('h2') + h2.replaceWith(h2.prettify() + '

By ' + article.author + '

') diff --git a/recipes/wvhooligan.recipe b/recipes/wvhooligan.recipe new file mode 100644 index 0000000000..680ac6f244 --- /dev/null +++ b/recipes/wvhooligan.recipe @@ -0,0 +1,61 @@ +#!/usr/bin/env python +__license__ = 'GPL 3' +__copyright__ = 'zotzo' +__docformat__ = 'restructuredtext en' +''' +http://wvhooligan.com/ +''' +from calibre.web.feeds.news import BasicNewsRecipe +#import re + +class wvHooligan(BasicNewsRecipe): + authors = u'Drew Epperley' + __author__ = 'rylsfan' + language = 'en' + version = 2 + + title = u'WV Hooligan' + publisher = u'Drew Epperley' + publication_type = 'Blog' + category = u'Soccer' + description = u'A look at Major League Soccer (MLS) through the eyes of a MLS writer and fan.' + + cover_url = 'http://wvhooligan.com/wp-content/themes/urbanelements/images/logo3.png' + + oldest_article = 15 + max_articles_per_feed = 150 + use_embedded_content = True + no_stylesheets = True + remove_javascript = True + encoding = 'utf8' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + {'class': 'feedflare'}, + {'class': 'tweetmeme_button'}, + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + feeds =[ + (u'Stories', u'http://feeds2.feedburner.com/wvhooligan'), + (u'MLS', u'http://wvhooligan.com/category/mls/feed/'), + (u'MLS Power Rankings', u'http://wvhooligan.com/category/power-rankings/feed/'), + (u'MLS Expansion', u'http://wvhooligan.com/category/mls/expansion-talk/feed/'), + (u'US National Team', u'http://wvhooligan.com/category/us-national-team/feed/'), + (u'College', u'http://wvhooligan.com/category/college-soccer/feed/'), + ] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/resources/fonts/liberation/LiberationMono-Bold.ttf b/resources/fonts/liberation/LiberationMono-Bold.ttf index 95de75300f..42941e57b1 100644 Binary files a/resources/fonts/liberation/LiberationMono-Bold.ttf and b/resources/fonts/liberation/LiberationMono-Bold.ttf differ diff --git a/resources/fonts/liberation/LiberationMono-BoldItalic.ttf b/resources/fonts/liberation/LiberationMono-BoldItalic.ttf index 11ec3e7793..4682e4de1f 100644 Binary files a/resources/fonts/liberation/LiberationMono-BoldItalic.ttf and b/resources/fonts/liberation/LiberationMono-BoldItalic.ttf differ diff --git a/resources/fonts/liberation/LiberationMono-Italic.ttf b/resources/fonts/liberation/LiberationMono-Italic.ttf index 08f55d6367..e19f08cfb7 100644 Binary files a/resources/fonts/liberation/LiberationMono-Italic.ttf and b/resources/fonts/liberation/LiberationMono-Italic.ttf differ diff --git a/resources/fonts/liberation/LiberationMono-Regular.ttf b/resources/fonts/liberation/LiberationMono-Regular.ttf index e3024a09d8..dea96958a1 100644 Binary files a/resources/fonts/liberation/LiberationMono-Regular.ttf and b/resources/fonts/liberation/LiberationMono-Regular.ttf differ diff --git a/resources/fonts/liberation/LiberationSans-Bold.ttf b/resources/fonts/liberation/LiberationSans-Bold.ttf index 53200d956c..b29a5640e4 100644 Binary files a/resources/fonts/liberation/LiberationSans-Bold.ttf and b/resources/fonts/liberation/LiberationSans-Bold.ttf differ diff --git a/resources/fonts/liberation/LiberationSans-BoldItalic.ttf b/resources/fonts/liberation/LiberationSans-BoldItalic.ttf index d06deca60d..0b0bf94a57 100644 Binary files a/resources/fonts/liberation/LiberationSans-BoldItalic.ttf and b/resources/fonts/liberation/LiberationSans-BoldItalic.ttf differ diff --git a/resources/fonts/liberation/LiberationSans-Italic.ttf b/resources/fonts/liberation/LiberationSans-Italic.ttf index 07275adf6c..4a430cdddd 100644 Binary files a/resources/fonts/liberation/LiberationSans-Italic.ttf and b/resources/fonts/liberation/LiberationSans-Italic.ttf differ diff --git a/resources/fonts/liberation/LiberationSans-Regular.ttf b/resources/fonts/liberation/LiberationSans-Regular.ttf index 09fac2ff94..2de10634e0 100644 Binary files a/resources/fonts/liberation/LiberationSans-Regular.ttf and b/resources/fonts/liberation/LiberationSans-Regular.ttf differ diff --git a/resources/fonts/liberation/LiberationSerif-Bold.ttf b/resources/fonts/liberation/LiberationSerif-Bold.ttf index 3a4ab92ac4..892746e128 100644 Binary files a/resources/fonts/liberation/LiberationSerif-Bold.ttf and b/resources/fonts/liberation/LiberationSerif-Bold.ttf differ diff --git a/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf b/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf index dc75de89c0..ad754700fd 100644 Binary files a/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf and b/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf differ diff --git a/resources/fonts/liberation/LiberationSerif-Italic.ttf b/resources/fonts/liberation/LiberationSerif-Italic.ttf index d92b5e3929..e81544aab2 100644 Binary files a/resources/fonts/liberation/LiberationSerif-Italic.ttf and b/resources/fonts/liberation/LiberationSerif-Italic.ttf differ diff --git a/resources/fonts/liberation/LiberationSerif-Regular.ttf b/resources/fonts/liberation/LiberationSerif-Regular.ttf index d100691a25..155675f711 100644 Binary files a/resources/fonts/liberation/LiberationSerif-Regular.ttf and b/resources/fonts/liberation/LiberationSerif-Regular.ttf differ diff --git a/setup/upload.py b/setup/upload.py index 6cd9ad3eca..4fd388ce43 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -5,7 +5,8 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, glob +import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \ + glob, stat from subprocess import check_call from tempfile import NamedTemporaryFile, mkdtemp from zipfile import ZipFile @@ -344,6 +345,8 @@ class UploadUserManual(Command): # {{{ def build_plugin_example(self, path): from calibre import CurrentDir with NamedTemporaryFile(suffix='.zip') as f: + os.fchmod(f.fileno(), + stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE) with CurrentDir(self.d(path)): with ZipFile(f, 'w') as zf: for x in os.listdir('.'): @@ -352,8 +355,8 @@ class UploadUserManual(Command): # {{{ for y in os.listdir(x): zf.write(os.path.join(x, y)) bname = self.b(path) + '_plugin.zip' - subprocess.check_call(['scp', f.name, 'divok:%s/%s'%(DOWNLOADS, - bname)]) + dest = '%s/%s'%(DOWNLOADS, bname) + subprocess.check_call(['scp', f.name, dest]) def run(self, opts): path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples') diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index fa1f0a53de..c3aca457ad 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -61,6 +61,9 @@ def osx_version(): if m: return int(m.group(1)), int(m.group(2)), int(m.group(3)) +def confirm_config_name(name): + return name + '_again' + _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]') _filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<', u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32)))) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 0cfa8f1ab2..213f74f816 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' import cStringIO, ctypes, datetime, os, re, shutil, subprocess, sys, tempfile, time from calibre.constants import __appname__, __version__, DEBUG -from calibre import fit_image +from calibre import fit_image, confirm_config_name from calibre.constants import isosx, iswindows from calibre.devices.errors import OpenFeedback, UserFeedback from calibre.devices.usbms.deviceconfig import DeviceConfig @@ -18,12 +18,77 @@ from calibre.ebooks.metadata import authors_to_string, MetaInformation, \ from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.epub import set_metadata from calibre.library.server.utils import strftime -from calibre.utils.config import config_dir, prefs +from calibre.utils.config import config_dir, dynamic, prefs from calibre.utils.date import now, parse_date from calibre.utils.logging import Log from calibre.utils.zipfile import ZipFile +class AppleOpenFeedback(OpenFeedback): + + def __init__(self, plugin): + OpenFeedback.__init__(self, u'') + self.log = plugin.log + self.plugin = plugin + + def custom_dialog(self, parent): + from PyQt4.Qt import (QDialog, QDialogButtonBox, QIcon, + QLabel, QPushButton, QVBoxLayout) + + class Dialog(QDialog): + + def __init__(self, p, cd, pixmap='dialog_information.png'): + QDialog.__init__(self, p) + self.cd = cd + self.setWindowTitle("Apple iDevice detected") + self.l = l = QVBoxLayout() + self.setLayout(l) + msg = QLabel() + msg.setText(_( + '

If you do not want calibre to recognize your Apple iDevice ' + 'when it is connected to your computer, ' + 'click Disable Apple Driver.

' + '

To transfer books to your iDevice, ' + 'click Disable Apple Driver, ' + "then use the 'Connect to iTunes' method recommended in the " + 'Calibre + iDevices FAQ, ' + 'using the Connect/Share|Connect to iTunes menu item.

' + '

Enabling the Apple driver for direct connection to iDevices ' + 'is an unsupported advanced user mode.

' + '

' + )) + msg.setOpenExternalLinks(True) + msg.setWordWrap(True) + l.addWidget(msg) + + self.bb = QDialogButtonBox() + disable_driver = QPushButton(_("Disable Apple driver")) + disable_driver.setDefault(True) + self.bb.addButton(disable_driver, QDialogButtonBox.RejectRole) + + enable_driver = QPushButton(_("Enable Apple driver")) + self.bb.addButton(enable_driver, QDialogButtonBox.AcceptRole) + l.addWidget(self.bb) + self.bb.accepted.connect(self.accept) + self.bb.rejected.connect(self.reject) + + self.setWindowIcon(QIcon(I(pixmap))) + self.resize(self.sizeHint()) + + self.finished.connect(self.do_it) + + def do_it(self, return_code): + if return_code == self.Accepted: + self.cd.log.info(" Apple driver ENABLED") + dynamic[confirm_config_name(self.cd.plugin.DISPLAY_DISABLE_DIALOG)] = False + else: + from calibre.customize.ui import disable_plugin + self.cd.log.info(" Apple driver DISABLED") + disable_plugin(self.cd.plugin) + + return Dialog(parent, self) + + from PIL import Image as PILImage from lxml import etree @@ -54,15 +119,11 @@ class DriverBase(DeviceConfig, DevicePlugin): 'iBooks Category'), _('Cache covers from iTunes/iBooks') + ':::' + - _('Enable to cache and display covers from iTunes/iBooks'), - _("Skip 'Connect to iTunes' recommendation") + - ':::' + - _("Enable to skip the 'Connect to iTunes' recommendation dialog") + _('Enable to cache and display covers from iTunes/iBooks') ] EXTRA_CUSTOMIZATION_DEFAULT = [ True, True, - False, ] @@ -118,12 +179,13 @@ class ITUNES(DriverBase): supported_platforms = ['osx','windows'] author = 'GRiker' #: The version of this plugin as a 3-tuple (major, minor, revision) - version = (0,9,0) + version = (1,0,0) + + DISPLAY_DISABLE_DIALOG = "display_disable_apple_driver_dialog" # EXTRA_CUSTOMIZATION_MESSAGE indexes USE_SERIES_AS_CATEGORY = 0 CACHE_COVERS = 1 - SKIP_CONNECT_TO_ITUNES_DIALOG = 2 OPEN_FEEDBACK_MESSAGE = _( 'Apple device detected, launching iTunes, please wait ...') @@ -739,21 +801,17 @@ class ITUNES(DriverBase): Note that most of the initialization is necessarily performed in can_handle(), as we need to talk to iTunes to discover if there's a connected iPod ''' + if DEBUG: self.log.info("ITUNES.open()") - # Display a dialog recommending using 'Connect to iTunes' - if False and not self.settings().extra_customization[self.SKIP_CONNECT_TO_ITUNES_DIALOG]: - raise OpenFeedback('

' + ('Click the "Connect/Share" button and choose' - ' "Connect to iTunes" to send books from your calibre library' - ' to your Apple iDevice.

For more information, see ' - '' - 'Calibre + Apple iDevices FAQ.

' - 'After following the Quick Start steps outlined in the FAQ, ' - 'restart calibre.')) - - if DEBUG: - self.log.info(" advanced user mode, directly connecting to iDevice") + # Display a dialog recommending using 'Connect to iTunes' if user hasn't + # previously disabled the dialog + if dynamic.get(confirm_config_name(self.DISPLAY_DISABLE_DIALOG),True): + raise AppleOpenFeedback(self) + else: + if DEBUG: + self.log.info(" advanced user mode, directly connecting to iDevice") # Confirm/create thumbs archive if not os.path.exists(self.cache_dir): diff --git a/src/calibre/devices/bambook/libbambookcore.py b/src/calibre/devices/bambook/libbambookcore.py index 35d04ba4ac..e77ac1da7b 100644 --- a/src/calibre/devices/bambook/libbambookcore.py +++ b/src/calibre/devices/bambook/libbambookcore.py @@ -10,7 +10,7 @@ Sanda library wrapper import ctypes, uuid, hashlib, os, sys from threading import Event, Lock -from calibre.constants import iswindows, islinux, isosx +from calibre.constants import iswindows from calibre import load_library try: @@ -29,12 +29,9 @@ try: except: lib_handle = None +text_encoding = 'utf-8' if iswindows: text_encoding = 'mbcs' -elif islinux: - text_encoding = 'utf-8' -elif isosx: - text_encoding = 'utf-8' def is_bambook_lib_ready(): return lib_handle != None diff --git a/src/calibre/devices/errors.py b/src/calibre/devices/errors.py index 3d88eb741f..ecd61a1169 100644 --- a/src/calibre/devices/errors.py +++ b/src/calibre/devices/errors.py @@ -41,6 +41,13 @@ class OpenFeedback(DeviceError): self.feedback_msg = msg DeviceError.__init__(self, msg) + def custom_dialog(self, parent): + ''' + If you need to show the user a custom dialog, instead of just + displaying the feedback_msg, create and return it here. + ''' + raise NotImplementedError + class DeviceBusy(ProtocolError): """ Raised when device is busy """ def __init__(self, uerr=""): diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index 571ceafe53..c562176ef2 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -110,4 +110,11 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, if resolve_entities: raw = substitute_entites(raw) + if encoding and encoding.lower().replace('_', '-').strip() in ( + 'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn', + 'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'): + # Microsoft Word exports to HTML with encoding incorrectly set to + # gb2312 instead of gbk. gbk is a superset of gb2312, anyway. + encoding = 'gbk' + return raw, encoding diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 6272e7b10b..b26befe075 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -1003,8 +1003,10 @@ OptionRecommendation(name='sr3_replace', self.opts.insert_blank_line = oibl self.opts.remove_paragraph_spacing = orps - from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins + from calibre.ebooks.oeb.transforms.page_margin import \ + RemoveFakeMargins, RemoveAdobeMargins RemoveFakeMargins()(self.oeb, self.log, self.opts) + RemoveAdobeMargins()(self.oeb, self.log, self.opts) pr(0.9) self.flush() diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py index 70f3c3a15a..e354bee562 100644 --- a/src/calibre/ebooks/lrf/input.py +++ b/src/calibre/ebooks/lrf/input.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, textwrap +import os, textwrap, sys from copy import deepcopy from lxml import etree @@ -413,7 +413,12 @@ class LRFInput(InputFormatPlugin): ('calibre', 'image-block'): image_block, } transform = etree.XSLT(styledoc, extensions=extensions) - result = transform(doc) + try: + result = transform(doc) + except RuntimeError: + sys.setrecursionlimit(5000) + result = transform(doc) + with open('content.opf', 'wb') as f: f.write(result) styles.write() diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 91dcc29230..328ab7be26 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -198,8 +198,10 @@ class Metadata(object): return copy.deepcopy(ans) def _clean_identifier(self, typ, val): - typ = icu_lower(typ).strip().replace(':', '').replace(',', '') - val = val.strip().replace(',', '|').replace(':', '|') + if typ: + typ = icu_lower(typ).strip().replace(':', '').replace(',', '') + if val: + val = val.strip().replace(',', '|').replace(':', '|') return typ, val def set_identifiers(self, identifiers): diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index cbd9db3f04..b0c43a8182 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -182,6 +182,19 @@ def metadata_from_filename(name, pat=None): mi.isbn = si except (IndexError, ValueError): pass + try: + publisher = match.group('publisher') + mi.publisher = publisher + except (IndexError, ValueError): + pass + try: + pubdate = match.group('published') + if pubdate: + from calibre.utils.date import parse_date + mi.pubdate = parse_date(pubdate) + except: + pass + if mi.is_null('title'): mi.title = name return mi diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index abf35a9465..cfa2b09ea8 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -483,7 +483,6 @@ class Amazon(Source): log.exception('Failed to download cover from:', cached_url) # }}} - if __name__ == '__main__': # tests {{{ # To run these test use: calibre-debug -e # src/calibre/ebooks/metadata/sources/amazon.py @@ -504,7 +503,7 @@ if __name__ == '__main__': # tests {{{ ( # This isbn not on amazon {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', 'authors':['Lutz']}, - [title_test('Learning Python: Powerful Object-Oriented Programming', + [title_test('Learning Python, 3rd Edition', exact=True), authors_test(['Mark Lutz']) ] diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 9106a0ca52..eb81b04763 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -15,6 +15,7 @@ from calibre.customize import Plugin from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.config import JSONConfig from calibre.utils.titlecase import titlecase +from calibre.ebooks.metadata import check_isbn msprefs = JSONConfig('metadata_sources.json') @@ -244,6 +245,7 @@ class Source(Plugin): mi.title = fixcase(mi.title) mi.authors = list(map(fixcase, mi.authors)) mi.tags = list(map(fixcase, mi.tags)) + mi.isbn = check_isbn(mi.isbn) # }}} diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 53fb3a9ea4..1d4d8840e8 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -14,6 +14,7 @@ from io import BytesIO from calibre.customize.ui import metadata_plugins from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.xisbn import xisbn # How long to wait for more results after first result is found WAIT_AFTER_FIRST_RESULT = 30 # seconds @@ -42,6 +43,7 @@ def is_worker_alive(workers): return False def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): + start_time = time.time() plugins = list(metadata_plugins['identify']) kwargs = { @@ -79,7 +81,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): time.sleep(0.2) if get_results() and first_result_at is None: - first_result_at = time.time() + first_result_at = time.time() if not is_worker_alive(workers): break @@ -105,3 +107,55 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): log(plog) log('\n'+'*'*80) + for i, result in enumerate(results): + result.relevance_in_source = i + result.has_cached_cover_url = \ + plugin.get_cached_cover_url(result.identifiers) is not None + result.identify_plugin = plugin + + log('The identify phase took %.2f seconds'%(time.time() - start_time)) + log('Merging results from different sources and finding earliest', + 'publication dates') + start_time = time.time() + merged_results = merge_identify_results(results, log) + log('We have %d merged results, merging took: %.2f seconds' % + (len(merged_results), time.time() - start_time)) + +class ISBNMerge(object): + + def __init__(self): + self.pools = {} + + def isbn_in_pool(self, isbn): + if isbn: + for p in self.pools: + if isbn in p: + return p + return None + + def pool_has_result_from_same_source(self, pool, result): + results = self.pools[pool][1] + for r in results: + if r.identify_plugin is result.identify_plugin: + return True + return False + + def add_result(self, result, isbn): + pool = self.isbn_in_pool(isbn) + if pool is None: + isbns, min_year = xisbn.get_isbn_pool(isbn) + if not isbns: + isbns = frozenset([isbn]) + self.pool[isbns] = pool = (min_year, []) + + if not self.pool_has_result_from_same_source(pool, result): + pool[1].append(result) + +def merge_identify_results(result_map, log): + for plugin, results in result_map.iteritems(): + for result in results: + isbn = result.isbn + if isbn: + isbns, min_year = xisbn.get_isbn_pool(isbn) + + diff --git a/src/calibre/ebooks/metadata/xisbn.py b/src/calibre/ebooks/metadata/xisbn.py index aaeb1c6b98..69cc3f7cb3 100644 --- a/src/calibre/ebooks/metadata/xisbn.py +++ b/src/calibre/ebooks/metadata/xisbn.py @@ -71,14 +71,28 @@ class xISBN(object): ans.add(i) return ans + def get_isbn_pool(self, isbn): + data = self.get_data(isbn) + isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x]) + min_year = 100000 + for x in data: + try: + year = int(x['year']) + if year < min_year: + min_year = year + except: + continue + if min_year == 100000: + min_year = None + return isbns, min_year xisbn = xISBN() if __name__ == '__main__': - import sys + import sys, pprint isbn = sys.argv[-1] - print xisbn.get_data(isbn) + print pprint.pprint(xisbn.get_data(isbn)) print print xisbn.get_associated_isbns(isbn) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 189739986d..40ad5e9e78 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -102,6 +102,7 @@ class MobiMLizer(object): def __call__(self, oeb, context): oeb.logger.info('Converting XHTML to Mobipocket markup...') self.oeb = oeb + self.log = self.oeb.logger self.opts = context self.profile = profile = context.dest self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) @@ -118,6 +119,10 @@ class MobiMLizer(object): del oeb.guide['cover'] item = oeb.manifest.hrefs[href] if item.spine_position is not None: + self.log.warn('Found an HTML cover,', item.href, 'removing it.', + 'If you find some content missing from the output MOBI, it ' + 'is because you misidentified the HTML cover in the input ' + 'document') oeb.spine.remove(item) if item.media_type in OEB_DOCS: self.oeb.manifest.remove(item) @@ -206,7 +211,11 @@ class MobiMLizer(object): vspace = bstate.vpadding + bstate.vmargin bstate.vpadding = bstate.vmargin = 0 if tag not in TABLE_TAGS: - wrapper.attrib['height'] = self.mobimlize_measure(vspace) + if tag in ('ul', 'ol') and vspace > 0: + wrapper.addprevious(etree.Element(XHTML('div'), + height=self.mobimlize_measure(vspace))) + else: + wrapper.attrib['height'] = self.mobimlize_measure(vspace) para.attrib['width'] = self.mobimlize_measure(indent) elif tag == 'table' and vspace > 0: vspace = int(round(vspace / self.profile.fbase)) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 4a09e0b1d4..ebc2f30d00 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -103,8 +103,8 @@ class OEBReader(object): data = self.oeb.container.read(None) data = self.oeb.decode(data) data = XMLDECL_RE.sub('', data) - data = data.replace('http://openebook.org/namespaces/oeb-package/1.0', - OPF1_NS) + data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0(/*)', + OPF1_NS, data) try: opf = etree.fromstring(data) except etree.XMLSyntaxError: diff --git a/src/calibre/ebooks/oeb/transforms/margins.py b/src/calibre/ebooks/oeb/transforms/margins.py deleted file mode 100644 index fbdf2e63fd..0000000000 --- a/src/calibre/ebooks/oeb/transforms/margins.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - -class RemoveFakeMargins(object): - ''' - Try to detect and remove fake margins inserted by asinine ebook creation - software on each paragraph/wrapper div. Can be used only after CSS - flattening. - ''' - - def __call__(self, oeb, opts, log): - self.oeb, self.opts, self.log = oeb, opts, log - - from calibre.ebooks.oeb.base import XPath, OEB_STYLES - - stylesheet = None - for item in self.oeb.manifest: - if item.media_type.lower() in OEB_STYLES: - stylesheet = item.data - break - - if stylesheet is None: - return - - - top_level_elements = {} - second_level_elements = {} - - for x in self.oeb.spine: - root = x.data - body = XPath('//h:body')(root) - if body: - body = body[0] - - if not hasattr(body, 'xpath'): - continue - - # Check for margins on top level elements - for lb in XPath('./h:div|./h:p|./*/h:div|./*/h:p')(body): - cls = lb.get('class', '') - level = top_level_elements if lb.getparent() is body else \ - second_level_elements - if cls not in level: - level[cls] = [] - top_level_elements[cls] = [] - level[cls].append(lb) - - - def get_margins(self, stylesheet, cls): - pass - diff --git a/src/calibre/ebooks/oeb/transforms/page_margin.py b/src/calibre/ebooks/oeb/transforms/page_margin.py index 589f004dd1..c415dda0e0 100644 --- a/src/calibre/ebooks/oeb/transforms/page_margin.py +++ b/src/calibre/ebooks/oeb/transforms/page_margin.py @@ -11,6 +11,26 @@ from collections import Counter from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath +class RemoveAdobeMargins(object): + ''' + Remove margins specified in Adobe's page templates. + ''' + + def __call__(self, oeb, log, opts): + self.oeb, self.opts, self.log = oeb, opts, log + + for item in self.oeb.manifest: + if item.media_type == 'application/vnd.adobe-page-template+xml': + self.log('Removing page margins specified in the' + ' Adobe page template') + for elem in item.data.xpath( + '//*[@margin-bottom or @margin-top ' + 'or @margin-left or @margin-right]'): + for margin in ('left', 'right', 'top', 'bottom'): + attr = 'margin-'+margin + elem.attrib.pop(attr, None) + + class RemoveFakeMargins(object): ''' diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 14b3552b04..8de3f44d36 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -34,7 +34,7 @@ class PDFInput(InputFormatPlugin): from calibre.ebooks.pdf.reflow import PDFDocument if pdfreflow_err: raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err) - pdfreflow.reflow(stream.read()) + pdfreflow.reflow(stream.read(), 1, -1) xml = open('index.xml', 'rb').read() PDFDocument(xml, self.opts, self.log) return os.path.join(os.getcwd(), 'metadata.opf') diff --git a/src/calibre/ebooks/pdf/main.cpp b/src/calibre/ebooks/pdf/main.cpp index 4e6ec60388..869204dc1d 100644 --- a/src/calibre/ebooks/pdf/main.cpp +++ b/src/calibre/ebooks/pdf/main.cpp @@ -24,13 +24,14 @@ extern "C" { pdfreflow_reflow(PyObject *self, PyObject *args) { char *pdfdata; Py_ssize_t size; + int first_page, last_page, num = 0; - if (!PyArg_ParseTuple(args, "s#", &pdfdata, &size)) + if (!PyArg_ParseTuple(args, "s#ii", &pdfdata, &size, &first_page, &last_page)) return NULL; try { Reflow reflow(pdfdata, static_cast(size)); - reflow.render(); + num = reflow.render(first_page, last_page); } catch (std::exception &e) { PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL; } catch (...) { @@ -38,7 +39,7 @@ extern "C" { "Unknown exception raised while rendering PDF"); return NULL; } - Py_RETURN_NONE; + return Py_BuildValue("i", num); } static PyObject * @@ -166,8 +167,8 @@ extern "C" { static PyMethodDef pdfreflow_methods[] = { {"reflow", pdfreflow_reflow, METH_VARARGS, - "reflow(pdf_data)\n\n" - "Reflow the specified PDF." + "reflow(pdf_data, first_page, last_page)\n\n" + "Reflow the specified PDF. Returns the number of pages in the PDF. If last_page is -1 renders to end of document." }, {"get_metadata", pdfreflow_get_metadata, METH_VARARGS, "get_metadata(pdf_data, cover)\n\n" diff --git a/src/calibre/ebooks/pdf/reflow.cpp b/src/calibre/ebooks/pdf/reflow.cpp index e444c126ab..c9d42dd671 100644 --- a/src/calibre/ebooks/pdf/reflow.cpp +++ b/src/calibre/ebooks/pdf/reflow.cpp @@ -712,16 +712,18 @@ Reflow::Reflow(char *pdfdata, size_t sz) : } -void -Reflow::render() { +int +Reflow::render(int first_page, int last_page) { if (!this->doc->okToCopy()) cout << "Warning, this document has the copy protection flag set, ignoring." << endl; globalParams->setTextEncoding(encoding); - int first_page = 1; - int last_page = doc->getNumPages(); + int doc_pages = doc->getNumPages(); + if (last_page < 1 or last_page > doc_pages) last_page = doc_pages; + if (first_page < 1) first_page = 1; + if (first_page > last_page) first_page = last_page; XMLOutputDev *xml_out = new XMLOutputDev(this->doc); doc->displayPages(xml_out, first_page, last_page, @@ -733,9 +735,12 @@ Reflow::render() { false //Printing ); - this->dump_outline(); + if (last_page - first_page == doc_pages - 1) + this->dump_outline(); delete xml_out; + + return doc_pages; } void Reflow::dump_outline() { diff --git a/src/calibre/ebooks/pdf/reflow.h b/src/calibre/ebooks/pdf/reflow.h index ad4b79929d..768799f004 100644 --- a/src/calibre/ebooks/pdf/reflow.h +++ b/src/calibre/ebooks/pdf/reflow.h @@ -66,7 +66,7 @@ class Reflow { ~Reflow(); /* Convert the PDF to XML. All files are output to the current directory */ - void render(); + int render(int first_page, int last_page); /* Get the PDF Info Dictionary */ map get_info(); diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py index caf65932d8..36f420c7bb 100644 --- a/src/calibre/gui2/actions/convert.py +++ b/src/calibre/gui2/actions/convert.py @@ -51,7 +51,7 @@ class ConvertAction(InterfaceAction): self.queue_convert_jobs(jobs, changed, bad, rows, previous, self.book_auto_converted, extra_job_args=[on_card]) - def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format): + def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject): previous = self.gui.library_view.currentIndex() rows = [x.row() for x in \ self.gui.library_view.selectionModel().selectedRows()] @@ -59,7 +59,7 @@ class ConvertAction(InterfaceAction): if jobs == []: return self.queue_convert_jobs(jobs, changed, bad, rows, previous, self.book_auto_converted_mail, - extra_job_args=[delete_from_library, to, fmts]) + extra_job_args=[delete_from_library, to, fmts, subject]) def auto_convert_news(self, book_ids, format): previous = self.gui.library_view.currentIndex() @@ -145,9 +145,10 @@ class ConvertAction(InterfaceAction): self.gui.sync_to_device(on_card, False, specific_format=fmt, send_ids=[book_id], do_auto_convert=False) def book_auto_converted_mail(self, job): - temp_files, fmt, book_id, delete_from_library, to, fmts = self.conversion_jobs[job] + temp_files, fmt, book_id, delete_from_library, to, fmts, subject = self.conversion_jobs[job] self.book_converted(job) - self.gui.send_by_mail(to, fmts, delete_from_library, specific_format=fmt, send_ids=[book_id], do_auto_convert=False) + self.gui.send_by_mail(to, fmts, delete_from_library, subject=subject, + specific_format=fmt, send_ids=[book_id], do_auto_convert=False) def book_auto_converted_news(self, job): temp_files, fmt, book_id = self.conversion_jobs[job] diff --git a/src/calibre/gui2/actions/device.py b/src/calibre/gui2/actions/device.py index a4ca95a9bb..bfefbc5f64 100644 --- a/src/calibre/gui2/actions/device.py +++ b/src/calibre/gui2/actions/device.py @@ -82,7 +82,8 @@ class ShareConnMenu(QMenu): # {{{ keys = sorted(opts.accounts.keys()) for account in keys: formats, auto, default = opts.accounts[account] - dest = 'mail:'+account+';'+formats + subject = opts.subjects.get(account, '') + dest = 'mail:'+account+';'+formats+';'+subject action1 = DeviceAction(dest, False, False, I('mail.png'), account) action2 = DeviceAction(dest, True, False, I('mail.png'), diff --git a/src/calibre/gui2/actions/fetch_news.py b/src/calibre/gui2/actions/fetch_news.py index f7756efbab..f94dfbc88c 100644 --- a/src/calibre/gui2/actions/fetch_news.py +++ b/src/calibre/gui2/actions/fetch_news.py @@ -5,6 +5,8 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import gc + from PyQt4.Qt import Qt from calibre.gui2 import Dispatcher @@ -53,11 +55,11 @@ class FetchNewsAction(InterfaceAction): def scheduled_recipe_fetched(self, job): temp_files, fmt, arg = self.conversion_jobs.pop(job) - pt = temp_files[0] + fname = temp_files[0].name if job.failed: self.scheduler.recipe_download_failed(arg) return self.gui.job_exception(job) - id = self.gui.library_view.model().add_news(pt.name, arg) + id = self.gui.library_view.model().add_news(fname, arg) # Arg may contain a "keep_issues" variable. If it is non-zero, # delete all but newest x issues. @@ -81,5 +83,6 @@ class FetchNewsAction(InterfaceAction): self.gui.status_bar.show_message(arg['title'] + _(' fetched.'), 3000) self.gui.email_news(id) self.gui.sync_news() + gc.collect() diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index ab2177cef1..4d4f66eab1 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -164,7 +164,7 @@ class DeviceManager(Thread): # {{{ dev.open(self.current_library_uuid) except OpenFeedback as e: if dev not in self.ejected_devices: - self.open_feedback_msg(dev.get_gui_name(), e.feedback_msg) + self.open_feedback_msg(dev.get_gui_name(), e) self.ejected_devices.add(dev) continue except: @@ -618,8 +618,11 @@ class DeviceMixin(object): # {{{ if tweaks['auto_connect_to_folder']: self.connect_to_folder_named(tweaks['auto_connect_to_folder']) - def show_open_feedback(self, devname, msg): - self.__of_dev_mem__ = d = info_dialog(self, devname, msg) + def show_open_feedback(self, devname, e): + try: + self.__of_dev_mem__ = d = e.custom_dialog(self) + except NotImplementedError: + self.__of_dev_mem__ = d = info_dialog(self, devname, e.feedback_msg) d.show() def auto_convert_question(self, msg, autos): @@ -884,9 +887,14 @@ class DeviceMixin(object): # {{{ on_card = dest self.sync_to_device(on_card, delete, fmt) elif dest == 'mail': - to, fmts = sub_dest.split(';') + sub_dest_parts = sub_dest.split(';') + while len(sub_dest_parts) < 3: + sub_dest_parts.append('') + to = sub_dest_parts[0] + fmts = sub_dest_parts[1] + subject = ';'.join(sub_dest_parts[2:]) fmts = [x.strip().lower() for x in fmts.split(',')] - self.send_by_mail(to, fmts, delete) + self.send_by_mail(to, fmts, delete, subject=subject) def cover_to_thumbnail(self, data): if self.device_manager.device and \ diff --git a/src/calibre/gui2/dialogs/confirm_delete.py b/src/calibre/gui2/dialogs/confirm_delete.py index 9cdd46712f..16d7bdde2f 100644 --- a/src/calibre/gui2/dialogs/confirm_delete.py +++ b/src/calibre/gui2/dialogs/confirm_delete.py @@ -3,12 +3,11 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' -from calibre.gui2 import dynamic -from calibre.gui2.dialogs.confirm_delete_ui import Ui_Dialog from PyQt4.Qt import QDialog, Qt, QPixmap, QIcon -def _config_name(name): - return name + '_again' +from calibre import confirm_config_name +from calibre.gui2 import dynamic +from calibre.gui2.dialogs.confirm_delete_ui import Ui_Dialog class Dialog(QDialog, Ui_Dialog): @@ -22,11 +21,11 @@ class Dialog(QDialog, Ui_Dialog): self.buttonBox.setFocus(Qt.OtherFocusReason) def toggle(self, *args): - dynamic[_config_name(self.name)] = self.again.isChecked() + dynamic[confirm_config_name(self.name)] = self.again.isChecked() def confirm(msg, name, parent=None, pixmap='dialog_warning.png'): - if not dynamic.get(_config_name(name), True): + if not dynamic.get(confirm_config_name(name), True): return True d = Dialog(msg, name, parent) d.label.setPixmap(QPixmap(I(pixmap))) diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index 81c1d9c255..c6d58fa340 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -22,6 +22,7 @@ from calibre.customize.ui import available_input_formats, available_output_forma from calibre.ebooks.metadata import authors_to_string from calibre.constants import preferred_encoding from calibre.gui2 import config, Dispatcher, warning_dialog +from calibre.library.save_to_disk import get_components from calibre.utils.config import tweaks class EmailJob(BaseJob): # {{{ @@ -210,7 +211,7 @@ class EmailMixin(object): # {{{ def __init__(self): self.emailer = Emailer(self.job_manager) - def send_by_mail(self, to, fmts, delete_from_library, send_ids=None, + def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, do_auto_convert=True, specific_format=None): ids = [self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows()] if send_ids is None else send_ids if not ids or len(ids) == 0: @@ -239,7 +240,14 @@ class EmailMixin(object): # {{{ remove_ids.append(id) jobnames.append(t) attachments.append(f) - subjects.append(_('E-book:')+ ' '+t) + if not subject: + subjects.append(_('E-book:')+ ' '+t) + else: + components = get_components(subject, mi, id) + if not components: + components = [mi.title] + subject = os.path.join(*components) + subjects.append(subject) a = authors_to_string(mi.authors if mi.authors else \ [_('Unknown')]) texts.append(_('Attached, you will find the e-book') + \ @@ -292,7 +300,7 @@ class EmailMixin(object): # {{{ if self.auto_convert_question( _('Auto convert the following books before sending via ' 'email?'), autos): - self.iactions['Convert Books'].auto_convert_mail(to, fmts, delete_from_library, auto, format) + self.iactions['Convert Books'].auto_convert_mail(to, fmts, delete_from_library, auto, format, subject) if bad: bad = '\n'.join('%s'%(i,) for i in bad) diff --git a/src/calibre/gui2/filename_pattern.ui b/src/calibre/gui2/filename_pattern.ui index 68b3108e06..c8a9b4f6f6 100644 --- a/src/calibre/gui2/filename_pattern.ui +++ b/src/calibre/gui2/filename_pattern.ui @@ -206,6 +206,46 @@ + + + + Publisher: + + + + + + + Regular expression (?P<publisher>) + + + No match + + + true + + + + + + + Published: + + + + + + + Regular expression (?P<published>) + + + No match + + + true + + + diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index c921ea125f..3e1670c7af 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en' import shutil, functools, re, os, traceback from contextlib import closing -from operator import attrgetter from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ QModelIndex, QVariant, QDate, QColor @@ -18,7 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import tweaks, prefs from calibre.utils.date import dt_factory, qt_to_dt, isoformat -from calibre.utils.icu import sort_key, strcmp as icu_strcmp +from calibre.utils.icu import sort_key from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.utils.search_query_parser import SearchQueryParser from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ @@ -984,6 +983,21 @@ class OnDeviceSearch(SearchQueryParser): # {{{ # }}} +class DeviceDBSortKeyGen(object): # {{{ + + def __init__(self, attr, keyfunc, db): + self.attr = attr + self.db = db + self.keyfunc = keyfunc + + def __call__(self, x): + try: + ans = self.keyfunc(getattr(self.db[x], self.attr)) + except: + ans = None + return ans +# }}} + class DeviceBooksModel(BooksModel): # {{{ booklist_dirtied = pyqtSignal() @@ -1089,59 +1103,40 @@ class DeviceBooksModel(BooksModel): # {{{ def sort(self, col, order, reset=True): descending = order != Qt.AscendingOrder - def strcmp(attr): - ag = attrgetter(attr) - def _strcmp(x, y): - x = ag(self.db[x]) - y = ag(self.db[y]) - if x == None: - x = '' - if y == None: - y = '' - return icu_strcmp(x.strip(), y.strip()) - return _strcmp - def datecmp(x, y): - x = self.db[x].datetime - y = self.db[y].datetime - return cmp(dt_factory(x, assume_utc=True), dt_factory(y, - assume_utc=True)) - def sizecmp(x, y): - x, y = int(self.db[x].size), int(self.db[y].size) - return cmp(x, y) - def tagscmp(x, y): - x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key)) - y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key)) - return cmp(x, y) - def libcmp(x, y): - x, y = self.db[x].in_library, self.db[y].in_library - return cmp(x, y) - def authorcmp(x, y): - ax = getattr(self.db[x], 'author_sort', None) - ay = getattr(self.db[y], 'author_sort', None) - if ax and ay: - x = ax - y = ay - else: - x, y = authors_to_string(self.db[x].authors), \ - authors_to_string(self.db[y].authors) - return cmp(x, y) cname = self.column_map[col] - fcmp = { - 'title': strcmp('title_sorter'), - 'authors' : authorcmp, - 'size' : sizecmp, - 'timestamp': datecmp, - 'collections': tagscmp, - 'inlibrary': libcmp, + def author_key(x): + try: + ax = self.db[x].author_sort + if not ax: + raise Exception('') + except: + try: + ax = authors_to_string(self.db[x].authors) + except: + ax = '' + return ax + + keygen = { + 'title': ('title_sorter', lambda x: sort_key(x) if x else ''), + 'authors' : author_key, + 'size' : ('size', int), + 'timestamp': ('datetime', functools.partial(dt_factory, assume_utc=True)), + 'collections': ('device_collections', lambda x:sorted(x, + key=sort_key)), + 'inlibrary': ('in_library', lambda x: x), }[cname] - self.map.sort(cmp=fcmp, reverse=descending) + keygen = keygen if callable(keygen) else DeviceDBSortKeyGen( + keygen[0], keygen[1], self.db) + self.map.sort(key=keygen, reverse=descending) if len(self.map) == len(self.db): self.sorted_map = list(self.map) else: self.sorted_map = list(range(len(self.db))) - self.sorted_map.sort(cmp=fcmp, reverse=descending) + self.sorted_map.sort(key=keygen, reverse=descending) self.sorted_on = (self.column_map[col], order) self.sort_history.insert(0, self.sorted_on) + if hasattr(keygen, 'db'): + keygen.db = None if reset: self.reset() diff --git a/src/calibre/gui2/preferences/emailp.py b/src/calibre/gui2/preferences/emailp.py index 19007dfcf1..ded6891387 100644 --- a/src/calibre/gui2/preferences/emailp.py +++ b/src/calibre/gui2/preferences/emailp.py @@ -5,6 +5,8 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import textwrap + from PyQt4.Qt import QAbstractTableModel, QVariant, QFont, Qt @@ -17,25 +19,30 @@ from calibre.utils.smtp import config as smtp_prefs class EmailAccounts(QAbstractTableModel): # {{{ - def __init__(self, accounts): + def __init__(self, accounts, subjects): QAbstractTableModel.__init__(self) self.accounts = accounts + self.subjects = subjects self.account_order = sorted(self.accounts.keys()) - self.headers = map(QVariant, [_('Email'), _('Formats'), _('Auto send')]) + self.headers = map(QVariant, [_('Email'), _('Formats'), _('Subject'), _('Auto send')]) self.default_font = QFont() self.default_font.setBold(True) self.default_font = QVariant(self.default_font) - self.tooltips =[NONE] + map(QVariant, + self.tooltips =[NONE] + list(map(QVariant, map(textwrap.fill, [_('Formats to email. The first matching format will be sent.'), + _('Subject of the email to use when sending. When left blank ' + 'the title will be used for the subject. Also, the same ' + 'templates used for "Save to disk" such as {title} and ' + '{author_sort} can be used here.'), '

'+_('If checked, downloaded news will be automatically ' 'mailed
to this email address ' - '(provided it is in one of the listed formats).')]) + '(provided it is in one of the listed formats).')]))) def rowCount(self, *args): return len(self.account_order) def columnCount(self, *args): - return 3 + return len(self.headers) def headerData(self, section, orientation, role): if role == Qt.DisplayRole and orientation == Qt.Horizontal: @@ -56,14 +63,16 @@ class EmailAccounts(QAbstractTableModel): # {{{ return QVariant(account) if col == 1: return QVariant(self.accounts[account][0]) + if col == 2: + return QVariant(self.subjects.get(account, '')) if role == Qt.FontRole and self.accounts[account][2]: return self.default_font - if role == Qt.CheckStateRole and col == 2: + if role == Qt.CheckStateRole and col == 3: return QVariant(Qt.Checked if self.accounts[account][1] else Qt.Unchecked) return NONE def flags(self, index): - if index.column() == 2: + if index.column() == 3: return QAbstractTableModel.flags(self, index)|Qt.ItemIsUserCheckable else: return QAbstractTableModel.flags(self, index)|Qt.ItemIsEditable @@ -73,8 +82,10 @@ class EmailAccounts(QAbstractTableModel): # {{{ return False row, col = index.row(), index.column() account = self.account_order[row] - if col == 2: + if col == 3: self.accounts[account][1] ^= True + if col == 2: + self.subjects[account] = unicode(value.toString()) elif col == 1: self.accounts[account][0] = unicode(value.toString()).upper() else: @@ -143,7 +154,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.send_email_widget.initialize(self.preferred_to_address) self.send_email_widget.changed_signal.connect(self.changed_signal.emit) opts = self.send_email_widget.smtp_opts - self._email_accounts = EmailAccounts(opts.accounts) + self._email_accounts = EmailAccounts(opts.accounts, opts.subjects) self._email_accounts.dataChanged.connect(lambda x,y: self.changed_signal.emit()) self.email_view.setModel(self._email_accounts) @@ -170,6 +181,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): if not self.send_email_widget.set_email_settings(to_set): raise AbortCommit('abort') self.proxy['accounts'] = self._email_accounts.accounts + self.proxy['subjects'] = self._email_accounts.subjects return ConfigWidgetBase.commit(self) diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index fa3b597636..82173fa0cf 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -109,7 +109,7 @@ class SearchBox2(QComboBox): # {{{ def normalize_state(self): self.setToolTip(self.tool_tip_text) self.line_edit.setStyleSheet( - 'QLineEdit{color:black;background-color:%s;}' % self.normal_background) + 'QLineEdit{color:inherit;background-color:%s;}' % self.normal_background) def text(self): return self.currentText() diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 4102aea412..06964cda1c 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -171,10 +171,11 @@ class Document(QWebPage): # {{{ self.misc_config() self.after_load() - def __init__(self, shortcuts, parent=None): + def __init__(self, shortcuts, parent=None, resize_callback=lambda: None): QWebPage.__init__(self, parent) self.setObjectName("py_bridge") self.debug_javascript = False + self.resize_callback = resize_callback self.current_language = None self.loaded_javascript = False @@ -237,6 +238,12 @@ class Document(QWebPage): # {{{ if self.loaded_javascript: return self.loaded_javascript = True + self.javascript( + ''' + window.onresize = function(event) { + window.py_bridge.window_resized(); + } + ''') if jquery is None: jquery = P('content_server/jquery.js', data=True) self.javascript(jquery) @@ -298,6 +305,10 @@ class Document(QWebPage): # {{{ def debug(self, msg): prints(msg) + @pyqtSignature('') + def window_resized(self): + self.resize_callback() + def reference_mode(self, enable): self.javascript(('enter' if enable else 'leave')+'_reference_mode()') @@ -424,12 +435,19 @@ class Document(QWebPage): # {{{ def xpos(self): return self.mainFrame().scrollPosition().x() - @property + @dynamic_property def scroll_fraction(self): - try: - return float(self.ypos)/(self.height-self.window_height) - except ZeroDivisionError: - return 0. + def fget(self): + try: + return float(self.ypos)/(self.height-self.window_height) + except ZeroDivisionError: + return 0. + def fset(self, val): + npos = val * (self.height - self.window_height) + if npos < 0: + npos = 0 + self.scroll_to(x=self.xpos, y=npos) + return property(fget=fget, fset=fset) @property def hscroll_fraction(self): @@ -493,7 +511,8 @@ class DocumentView(QWebView): # {{{ self._size_hint = QSize(510, 680) self.initial_pos = 0.0 self.to_bottom = False - self.document = Document(self.shortcuts, parent=self) + self.document = Document(self.shortcuts, parent=self, + resize_callback=self.viewport_resized) self.setPage(self.document) self.manager = None self._reference_mode = False @@ -630,9 +649,13 @@ class DocumentView(QWebView): # {{{ def sizeHint(self): return self._size_hint - @property + @dynamic_property def scroll_fraction(self): - return self.document.scroll_fraction + def fget(self): + return self.document.scroll_fraction + def fset(self, val): + self.document.scroll_fraction = float(val) + return property(fget=fget, fset=fset) @property def hscroll_fraction(self): @@ -968,9 +991,11 @@ class DocumentView(QWebView): # {{{ def resizeEvent(self, event): ret = QWebView.resizeEvent(self, event) QTimer.singleShot(10, self.initialize_scrollbar) + return ret + + def viewport_resized(self): if self.manager is not None: self.manager.viewport_resized(self.scroll_fraction) - return ret def event(self, ev): typ = ev.type() diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index c704b98dc9..303d73dc11 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -240,7 +240,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.connect(self.action_reference_mode, SIGNAL('triggered(bool)'), lambda x: self.view.reference_mode(x)) self.connect(self.action_metadata, SIGNAL('triggered(bool)'), lambda x:self.metadata.setVisible(x)) - self.connect(self.action_table_of_contents, SIGNAL('toggled(bool)'), lambda x:self.toc.setVisible(x)) + self.action_table_of_contents.toggled[bool].connect(self.set_toc_visible) self.connect(self.action_copy, SIGNAL('triggered(bool)'), self.copy) self.connect(self.action_font_size_larger, SIGNAL('triggered(bool)'), self.font_size_larger) @@ -310,6 +310,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.restore_state() + def set_toc_visible(self, yes): + self.toc.setVisible(yes) + def clear_recent_history(self, *args): vprefs.set('viewer_open_history', []) self.build_recent_menu() diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index ea0509b51a..e5f1c94342 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -121,6 +121,12 @@ class FilenamePattern(QWidget, Ui_Form): else: self.series_index.setText(_('No match')) + if mi.publisher: + self.publisher.setText(mi.publisher) + + if mi.pubdate: + self.pubdate.setText(mi.pubdate.strftime('%Y-%m-%d')) + self.isbn.setText(_('No match') if mi.isbn is None else str(mi.isbn)) diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index 48960ac871..8eed121b21 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -76,6 +76,8 @@ class CustomColumns(object): 'num':record[6], 'is_multiple':record[7], } + if data['display'] is None: + data['display'] = {} table, lt = self.custom_table_names(data['num']) if table not in custom_tables or (data['normalized'] and lt not in custom_tables): diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py index 7e5a1367cb..c596425a68 100644 --- a/src/calibre/trac/bzr_commit_plugin.py +++ b/src/calibre/trac/bzr_commit_plugin.py @@ -31,8 +31,11 @@ class cmd_commit(_cmd_commit): summary = '' raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' + bug).read() - h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0] - summary = html.tostring(h1, method='text', encoding=unicode).strip() + try: + h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0] + summary = html.tostring(h1, method='text', encoding=unicode).strip() + except: + summary = 'Private bug' print 'Working on bug:', summary if summary: msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary)) diff --git a/src/calibre/utils/html2text.py b/src/calibre/utils/html2text.py index 0eb84a3d38..3779c68918 100644 --- a/src/calibre/utils/html2text.py +++ b/src/calibre/utils/html2text.py @@ -1,8 +1,14 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- + """html2text: Turn HTML into equivalent Markdown-structured text.""" -__version__ = "2.39" -__author__ = "Aaron Swartz (me@aaronsw.com)" -__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3." +# Last upstream version before changes +#__version__ = "2.39" +__license__ = 'GPL 3' +__copyright__ = ''' +Copyright (c) 2011, John Schember +(C) 2004-2008 Aaron Swartz +''' __contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] # TODO: @@ -11,7 +17,6 @@ __contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] if not hasattr(__builtins__, 'True'): True, False = 1, 0 import re, sys, urllib, htmlentitydefs, codecs import sgmllib -import urlparse sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]') try: from textwrap import wrap @@ -145,9 +150,7 @@ class _html2text(sgmllib.SGMLParser): self.outcount = 0 self.start = 1 self.space = 0 - self.a = [] self.astack = [] - self.acount = 0 self.list = [] self.blockquote = 0 self.pre = 0 @@ -181,29 +184,6 @@ class _html2text(sgmllib.SGMLParser): def unknown_endtag(self, tag): self.handle_tag(tag, None, 0) - def previousIndex(self, attrs): - """ returns the index of certain set of attributes (of a link) in the - self.a list - - If the set of attributes is not found, returns None - """ - if not attrs.has_key('href'): return None - - i = -1 - for a in self.a: - i += 1 - match = 0 - - if a.has_key('href') and a['href'] == attrs['href']: - if a.has_key('title') or attrs.has_key('title'): - if (a.has_key('title') and attrs.has_key('title') and - a['title'] == attrs['title']): - match = True - else: - match = True - - if match: return i - def handle_tag(self, tag, attrs, start): attrs = fixattrs(attrs) @@ -268,34 +248,23 @@ class _html2text(sgmllib.SGMLParser): if self.astack: a = self.astack.pop() if a: - i = self.previousIndex(a) - if i is not None: - a = self.a[i] - else: - self.acount += 1 - a['count'] = self.acount - a['outcount'] = self.outcount - self.a.append(a) - self.o("][" + `a['count']` + "]") + title = '' + if a.has_key('title'): + title = ' "%s"' % a['title'] + self.o('](%s%s)' % (a['href'], title)) if tag == "img" and start: attrsD = {} for (x, y) in attrs: attrsD[x] = y attrs = attrsD if attrs.has_key('src'): - attrs['href'] = attrs['src'] alt = attrs.get('alt', '') - i = self.previousIndex(attrs) - if i is not None: - attrs = self.a[i] - else: - self.acount += 1 - attrs['count'] = self.acount - attrs['outcount'] = self.outcount - self.a.append(attrs) self.o("![") self.o(alt) - self.o("]["+`attrs['count']`+"]") + title = '' + if attrs.has_key('title'): + title = ' "%s"' % attrs['title'] + self.o('](%s%s)' % (attrs['src'], title)) if tag == 'dl' and start: self.p() if tag == 'dt' and not start: self.pbr() @@ -373,7 +342,6 @@ class _html2text(sgmllib.SGMLParser): self.out("\n") self.space = 0 - if self.p_p: self.out(('\n'+bq)*self.p_p) self.space = 0 @@ -382,22 +350,6 @@ class _html2text(sgmllib.SGMLParser): if not self.lastWasNL: self.out(' ') self.space = 0 - if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"): - if force == "end": self.out("\n") - - newa = [] - for link in self.a: - if self.outcount > link['outcount']: - self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) - if link.has_key('title'): self.out(" ("+link['title']+")") - self.out("\n") - else: - newa.append(link) - - if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. - - self.a = newa - if self.abbr_list and force == "end": for abbr, definition in self.abbr_list.items(): self.out(" *[" + abbr + "]: " + definition + "\n") diff --git a/src/calibre/utils/smtp.py b/src/calibre/utils/smtp.py index 81936a8f71..1e05cf8287 100644 --- a/src/calibre/utils/smtp.py +++ b/src/calibre/utils/smtp.py @@ -250,6 +250,7 @@ def config(defaults=None): c = Config('smtp',desc) if defaults is None else StringConfig(defaults,desc) c.add_opt('from_') c.add_opt('accounts', default={}) + c.add_opt('subjects', default={}) c.add_opt('relay_host') c.add_opt('relay_port', default=25) c.add_opt('relay_username')