Sync to trunk.

This commit is contained in:
John Schember 2011-04-02 14:10:07 -04:00
commit 5731cad1be
97 changed files with 35653 additions and 31735 deletions

View File

@ -19,6 +19,77 @@
# new recipes:
# - title:
- version: 0.7.53
date: 2011-04-01
new features:
- title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
tickets: [743535]
- title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
- title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
- title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
- title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
tickets: [744020]
- title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
tickets: [742686]
- title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
tickets: [743486]
- title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
tickets: [743533]
- title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
tickets: [743645]
- title: "FB2 Output: Option to set the FB2 genre explicitly."
tickets: [743178]
- title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
bug fixes:
- title: "Fix text color in the search bar set to black instead of the system font color"
tickets: [746846]
- title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
tickets: [745428]
- title: "Make sorting on the device view faster and more robust."
tickets: [742626]
- title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
tickets: [745001]
- title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
tickets: [744365]
- title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
tickets: [744122]
- title: "RTF Input: Handle RTF files with too many levels of list nesting."
tickets: [743243]
improved recipes:
- Irish Times
- LifeHacker
- Estadao
- Folha de Sao Paulo
new recipes:
- title: Financieele Dagblad
author: marvin_2
- title: "Prost Amerika, WV Hooligan and SB Nation"
author: rylsfan
- title: "Cracked.com"
author: Nudgenudge
- version: 0.7.52
date: 2011-03-25

View File

@ -1,134 +1,129 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'utf'
ENCHTM = 'utf-8'
directionhtm = 'ltr'
requires_version = (0,8,47)
news = True
publication_type = 'newsportal'
title = u'Estadao'
__author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [
'--comment', description
,'--category', category
,'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now()-timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<10:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
remove_tags = [
dict(name='div',
attrs={'id':[
'bb-md-noticia-tabs'
]})
,dict(name='div',
attrs={'class':[
'tags'
,'discussion'
,'bb-gg adsense_container'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class Estadao(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'utf'
ENCHTM = 'utf-8'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Estad\xe3o'
__author__ = 'Euler Alves'
description = u'Brazilian news from Estad\xe3o'
publisher = u'Estad\xe3o'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()-timedelta(days=2)
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<10:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
SCREENSHOT = 'http://estadao.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
remove_tags = [
dict(name='div',
attrs={'id':[
'bb-md-noticia-tabs'
]})
,dict(name='div',
attrs={'class':[
'tags'
,'discussion'
,'bb-gg adsense_container'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
if self.THUMBALIZR_API:
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url

View File

@ -0,0 +1,29 @@
from calibre.web.feeds.news import BasicNewsRecipe
class fd(BasicNewsRecipe):
title = u'Het Financieele Dagblad'
__author__ = 'marvin_2'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.fd.nl/static/gfx/logo-fd-164x78.gif'
language = 'nl'
keep_only_tags = (dict(name = 'div', attrs = {'class': ['headlinearticle']}))
remove_tags = [dict(name='span' , attrs={'class':['opties']})]
feeds = [
(u'Overzicht',u'http://www.fd.nl/nieuws/overzicht/?view=RSS&profiel=OPENBAAR')
]
extra_css = '''
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
p{font-family:Arial,Helvetica,sans-serif;}
strong{font-weight:bold; margin-right:5pt;margin-top:20pt;}
.datum_ie {font-style:italic;font-size:small;}
img {align:left;}
'''

View File

@ -1,149 +1,151 @@
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'
directionhtm = 'ltr'
requires_version = (0,8,47)
news = True
publication_type = 'newsportal'
title = u'Folha de S\xE3o Paulo'
__author__ = 'Euler Alves'
description = u'Brazilian news from Folha de S\xE3o Paulo'
publisher = u'Folha de S\xE3o Paulo'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [
'--comment', description
,'--category', category
,'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<6:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
SCREENSHOT = 'http://www1.folha.uol.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
remove_tags = [
dict(name='div',
attrs={'id':[
'articleButton'
,'bookmarklets'
,'ad-180x150-1'
,'contextualAdsArticle'
,'articleEnd'
,'articleComments'
]})
,dict(name='div',
attrs={'class':[
'openBox adslibraryArticle'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
from calibre.utils.magick import Image, PixelWand
from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'
directionhtm = 'ltr'
requires_version = (0,7,47)
news = True
title = u'Folha de S\xE3o Paulo'
__author__ = 'Euler Alves'
description = u'Brazilian news from Folha de S\xE3o Paulo'
publisher = u'Folha de S\xE3o Paulo'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 100
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
html2lrf_options = [
'--comment', description
,'--category', category
,'--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
if hoje.hour<6:
hoje = hoje-timedelta(days=1)
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
SCREENSHOT = 'http://www1.folha.uol.com.br/'
cover_margins = (0,0,'white')
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
remove_tags = [
dict(name='div',
attrs={'id':[
'articleButton'
,'bookmarklets'
,'ad-180x150-1'
,'contextualAdsArticle'
,'articleEnd'
,'articleComments'
]})
,dict(name='div',
attrs={'class':[
'openBox adslibraryArticle'
]})
,dict(name='a')
,dict(name='iframe')
,dict(name='link')
,dict(name='script')
]
feeds = [
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup
def get_cover_url(self):
cover_url = self.CAPA
pedido = Request(self.CAPA)
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
pedido.add_header('Accept-Charset',self.ENCHTM)
pedido.add_header('Referer',self.SCREENSHOT)
try:
resposta = urlopen(pedido)
soup = BeautifulSoup(resposta)
cover_item = soup.find('body')
if cover_item:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url
except URLError:
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
return cover_url

View File

@ -35,8 +35,8 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
]
preprocess_regexps = [

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 714 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 392 B

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan"
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
'''
irishtimes.com
'''
@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times'
__author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
encoding = 'ISO-8859-15'
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
language = 'en_IE'
timefmt = ' (%A, %B %d, %Y)'
oldest_article = 3
oldest_article = 1.0
max_articles_per_feed = 100
no_stylesheets = True
simultaneous_downloads= 1
simultaneous_downloads= 5
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
remove_tags = [dict(name='div', attrs={'class':'footer'})]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }'
feeds = [
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe):
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
]
def print_version(self, url):
if url.count('rss.feedsportal.com'):
u = 'http://www.irishtimes.com' + \
(((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
else:
u = url.replace('.html','_pf.html')
return u
if url.count('rss.feedsportal.com'):
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
else:
u = url.replace('.html','_pf.html')
return u
def get_article_url(self, article):
return article.link

View File

@ -1,37 +1,100 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
lifehacker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Lifehacker(BasicNewsRecipe):
title = 'Lifehacker'
__author__ = 'Kovid Goyal'
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
publisher = 'lifehacker.com'
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime
from calibre.ebooks.BeautifulSoup import Tag
from calibre.utils.magick import Image, PixelWand
class LifeHacker(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'en'
LANGHTM = 'en'
language = 'en'
ENCODING = 'utf'
ENCHTM = 'utf-8'
requires_version = (0,7,47)
news = True
title = u'LifeHacker'
__author__ = 'Euler Alves'
description = u'Tips, tricks, and downloads for getting things done.'
publisher = u'lifehacker.com'
author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
category = 'news, rss'
oldest_article = 4
max_articles_per_feed = 20
summary_length = 1000
remove_javascript = True
no_stylesheets = True
use_embedded_content = True
remove_empty_feeds = True
timefmt = ' [%d %b %Y (%a)]'
hoje = datetime.now()
pubdate = hoje.strftime('%a, %d %b')
cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
cover_margins = (0,0,'white')
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
remove_tags = [
{'class': 'feedflare'},
dict(name='div',
attrs={'class':[
'ad_container'
,'ad_300x250'
,'ad_interstitial'
,'share-wrap'
,'ad_300x600'
,'ad_perma-footer-adsense'
,'ad_perma-panorama'
,'ad panorama'
,'ad_container'
]})
,dict(name='div',
attrs={'id':[
'agegate_container'
,'agegate_container_rejected'
,'sharemenu-wrap'
]})
]
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
conversion_options = {
'title' : title
,'comments' : description
,'publisher' : publisher
,'tags' : category
,'language' : LANGUAGE
,'linearize_tables': True
}
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
if not soup.find(attrs={'http-equiv':'Content-Language'}):
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
soup.head.insert(0,meta0)
if not soup.find(attrs={'http-equiv':'Content-Type'}):
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
soup.head.insert(0,meta1)
return soup
def postprocess_html(self, soup, first):
#process all the images. assumes that the new html has the correct path
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
width, height = img.size
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
if img < 0:
raise RuntimeError('Out of memory')
pw = PixelWand()
if( width > height and width > 590) :
print 'Rotate image'
img.rotate(pw, -90)
img.save(iurl)
return soup

View File

@ -45,7 +45,6 @@ class Stage3(Command):
sub_commands = ['upload_user_manual', 'upload_demo', 'sdist',
'upload_to_sourceforge', 'upload_to_google_code',
'tag_release', 'upload_to_server',
'upload_to_mobileread',
]
class Stage4(Command):

View File

@ -5,7 +5,8 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, glob
import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \
glob, stat
from subprocess import check_call
from tempfile import NamedTemporaryFile, mkdtemp
from zipfile import ZipFile
@ -344,6 +345,8 @@ class UploadUserManual(Command): # {{{
def build_plugin_example(self, path):
from calibre import CurrentDir
with NamedTemporaryFile(suffix='.zip') as f:
os.fchmod(f.fileno(),
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
with CurrentDir(self.d(path)):
with ZipFile(f, 'w') as zf:
for x in os.listdir('.'):
@ -352,8 +355,8 @@ class UploadUserManual(Command): # {{{
for y in os.listdir(x):
zf.write(os.path.join(x, y))
bname = self.b(path) + '_plugin.zip'
subprocess.check_call(['scp', f.name, 'divok:%s/%s'%(DOWNLOADS,
bname)])
dest = '%s/%s'%(DOWNLOADS, bname)
subprocess.check_call(['scp', f.name, 'divok:'+dest])
def run(self, opts):
path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples')

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.7.52'
__version__ = '0.7.53'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re, importlib

View File

@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
try:
if encoding.lower().strip() == 'macintosh':
encoding = 'mac-roman'
if encoding.lower().replace('_', '-').strip() in (
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
# Microsoft Word exports to HTML with encoding incorrectly set to
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
encoding = 'gbk'
raw = raw.decode(encoding, 'replace')
except LookupError:
encoding = 'utf-8'
@ -110,4 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
if resolve_entities:
raw = substitute_entites(raw)
return raw, encoding

View File

@ -26,15 +26,15 @@ class OEB2HTML(object):
links and images can be retrieved after calling oeb2html to get the mapping
of OEB links and images to the new names used in the html returned by oeb2html.
Images will always be referenced as if they are in an images directory.
Use get_css to get the CSS classes for the OEB document as a string.
'''
def __init__(self, log=None):
self.log = default_log if log is None else log
self.links = {}
self.images = {}
def oeb2html(self, oeb_book, opts):
self.log.info('Converting OEB book to HTML...')
self.opts = opts
@ -52,7 +52,7 @@ class OEB2HTML(object):
output.append('\n\n')
output.append('</body></html>')
return ''.join(output)
def dump_text(self, elem, stylizer, page):
raise NotImplementedError
@ -78,7 +78,7 @@ class OEB2HTML(object):
href = '#%s' % self.links[href]
attribs['href'] = href
return attribs
def rewrite_images(self, tag, attribs, page):
if tag == 'img':
src = attribs.get('src', None)
@ -102,14 +102,14 @@ class OEB2HTML(object):
if item.media_type == 'text/css':
css = item.data.cssText
break
return css
return css
class OEB2HTMLNoCSSizer(OEB2HTML):
'''
This will remap a small number of CSS styles to equivalent HTML tags.
'''
def dump_text(self, elem, stylizer, page):
'''
@elem: The element in the etree that we are working on.
@ -218,7 +218,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
style_a = '%s' % style
if tag == 'body':
tag = 'div'
@ -226,7 +226,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
if not style['page-break-before'] == 'always':
style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
tags.append(tag)
# Remove attributes we won't want.
if 'class' in attribs:
del attribs['class']
@ -275,7 +275,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
inline classes (style tag in the head) or reference an external
CSS file called style.css.
'''
def mlize_spine(self, oeb_book):
output = []
for item in oeb_book.spine:
@ -283,7 +283,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output.append('\n\n')
if self.opts.class_style == 'external':
if self.opts.htmlz_class_style == 'external':
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
else:
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
@ -307,16 +307,16 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
# Setup our variables.
text = ['']
style = stylizer.style(elem)
#style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
tags.append(tag)
# Remove attributes we won't want.
if 'style' in attribs:
del attribs['style']

View File

@ -12,7 +12,6 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.html import tostring
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
@ -22,17 +21,17 @@ class HTMLZOutput(OutputFormatPlugin):
name = 'HTMLZ Output'
author = 'John Schember'
file_type = 'htmlz'
options = set([
OptionRecommendation(name='css_type', recommended_value='class',
OptionRecommendation(name='htmlz_css_type', recommended_value='class',
level=OptionRecommendation.LOW,
choices=['class', 'inline', 'tag'],
help=_('Specify the handling of CSS. Default is class.\n'
'class: Use CSS classes and have elements reference them.\n'
'inline: Write the CSS as an inline style attribute.\n'
'tag: Turn as many CSS styles into HTML tags.'
'tag: Turn as many CSS styles as possible into HTML tags.'
)),
OptionRecommendation(name='class_style', recommended_value='external',
OptionRecommendation(name='htmlz_class_style', recommended_value='external',
level=OptionRecommendation.LOW,
choices=['external', 'inline'],
help=_('How to handle the CSS when using css-type = \'class\'.\n'
@ -43,26 +42,26 @@ class HTMLZOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
with TemporaryDirectory('_txtz_output') as tdir:
# HTML
if opts.css_type == 'inline':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer as OEB2HTMLizer
elif opts.css_type == 'tag':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer as OEB2HTMLizer
else:
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
# HTML
if opts.htmlz_css_type == 'inline':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
OEB2HTMLizer = OEB2HTMLInlineCSSizer
elif opts.htmlz_css_type == 'tag':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
OEB2HTMLizer = OEB2HTMLNoCSSizer
else:
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
with TemporaryDirectory('_txtz_output') as tdir:
htmlizer = OEB2HTMLizer(log)
html = htmlizer.oeb2html(oeb_book, opts)
html = etree.fromstring(html)
html = tostring(html, pretty_print=True)
with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
tf.write(html)
# CSS
if opts.css_type == 'class' and opts.class_style == 'external':
if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
tf.write(htmlizer.get_css(oeb_book))
@ -78,8 +77,8 @@ class HTMLZOutput(OutputFormatPlugin):
img.write(item.data)
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
txtz = ZipFile(output_path, 'w')
txtz.add_dir(tdir)
htmlz = ZipFile(output_path, 'w')
htmlz.add_dir(tdir)

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, textwrap
import os, textwrap, sys
from copy import deepcopy
from lxml import etree
@ -413,7 +413,12 @@ class LRFInput(InputFormatPlugin):
('calibre', 'image-block'): image_block,
}
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
try:
result = transform(doc)
except RuntimeError:
sys.setrecursionlimit(5000)
result = transform(doc)
with open('content.opf', 'wb') as f:
f.write(result)
styles.write()

View File

@ -198,8 +198,10 @@ class Metadata(object):
return copy.deepcopy(ans)
def _clean_identifier(self, typ, val):
typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
val = val.strip().replace(',', '|').replace(':', '|')
if typ:
typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
if val:
val = val.strip().replace(',', '|').replace(':', '|')
return typ, val
def set_identifiers(self, identifiers):

View File

@ -503,7 +503,7 @@ if __name__ == '__main__': # tests {{{
( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']},
[title_test('Learning Python: Powerful Object-Oriented Programming',
[title_test('Learning Python, 3rd Edition',
exact=True), authors_test(['Mark Lutz'])
]

View File

@ -15,6 +15,7 @@ from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream
from calibre.utils.config import JSONConfig
from calibre.utils.titlecase import titlecase
from calibre.ebooks.metadata import check_isbn
msprefs = JSONConfig('metadata_sources.json')
@ -236,6 +237,7 @@ class Source(Plugin):
mi.title = fixcase(mi.title)
mi.authors = list(map(fixcase, mi.authors))
mi.tags = list(map(fixcase, mi.tags))
mi.isbn = check_isbn(mi.isbn)
# }}}

View File

@ -14,6 +14,7 @@ from io import BytesIO
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata.sources.base import create_log
from calibre.ebooks.metadata.xisbn import xisbn
# How long to wait for more results after first result is found
WAIT_AFTER_FIRST_RESULT = 30 # seconds
@ -120,7 +121,41 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
log('We have %d merged results, merging took: %.2f seconds' %
(len(merged_results), time.time() - start_time))
class ISBNMerge(object):
def __init__(self):
self.pools = {}
def isbn_in_pool(self, isbn):
if isbn:
for p in self.pools:
if isbn in p:
return p
return None
def pool_has_result_from_same_source(self, pool, result):
results = self.pools[pool][1]
for r in results:
if r.identify_plugin is result.identify_plugin:
return True
return False
def add_result(self, result, isbn):
pool = self.isbn_in_pool(isbn)
if pool is None:
isbns, min_year = xisbn.get_isbn_pool(isbn)
if not isbns:
isbns = frozenset([isbn])
self.pool[isbns] = pool = (min_year, [])
if not self.pool_has_result_from_same_source(pool, result):
pool[1].append(result)
def merge_identify_results(result_map, log):
pass
for plugin, results in result_map.iteritems():
for result in results:
isbn = result.isbn
if isbn:
isbns, min_year = xisbn.get_isbn_pool(isbn)

View File

@ -71,6 +71,20 @@ class xISBN(object):
ans.add(i)
return ans
def get_isbn_pool(self, isbn):
data = self.get_data(isbn)
isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
min_year = 100000
for x in data:
try:
year = int(x['year'])
if year < min_year:
min_year = year
except:
continue
if min_year == 100000:
min_year = None
return isbns, min_year
xisbn = xISBN()

View File

@ -20,7 +20,8 @@ class RemoveAdobeMargins(object):
self.oeb, self.opts, self.log = oeb, opts, log
for item in self.oeb.manifest:
if item.media_type == 'application/vnd.adobe-page-template+xml':
if item.media_type in ('application/vnd.adobe-page-template+xml',
'application/vnd.adobe.page-template+xml'):
self.log('Removing page margins specified in the'
' Adobe page template')
for elem in item.data.xpath(
@ -35,7 +36,7 @@ class RemoveFakeMargins(object):
'''
Remove left and right margins from paragraph/divs if the same margin is specified
on almost all the elements of at that level.
on almost all the elements at that level.
Must be called only after CSS flattening
'''

View File

@ -34,7 +34,7 @@ class PDFInput(InputFormatPlugin):
from calibre.ebooks.pdf.reflow import PDFDocument
if pdfreflow_err:
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
pdfreflow.reflow(stream.read())
pdfreflow.reflow(stream.read(), 1, -1)
xml = open('index.xml', 'rb').read()
PDFDocument(xml, self.opts, self.log)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -24,13 +24,14 @@ extern "C" {
pdfreflow_reflow(PyObject *self, PyObject *args) {
char *pdfdata;
Py_ssize_t size;
int first_page, last_page, num = 0;
if (!PyArg_ParseTuple(args, "s#", &pdfdata, &size))
if (!PyArg_ParseTuple(args, "s#ii", &pdfdata, &size, &first_page, &last_page))
return NULL;
try {
Reflow reflow(pdfdata, static_cast<std::ifstream::pos_type>(size));
reflow.render();
num = reflow.render(first_page, last_page);
} catch (std::exception &e) {
PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL;
} catch (...) {
@ -38,7 +39,7 @@ extern "C" {
"Unknown exception raised while rendering PDF"); return NULL;
}
Py_RETURN_NONE;
return Py_BuildValue("i", num);
}
static PyObject *
@ -166,8 +167,8 @@ extern "C" {
static
PyMethodDef pdfreflow_methods[] = {
{"reflow", pdfreflow_reflow, METH_VARARGS,
"reflow(pdf_data)\n\n"
"Reflow the specified PDF."
"reflow(pdf_data, first_page, last_page)\n\n"
"Reflow the specified PDF. Returns the number of pages in the PDF. If last_page is -1 renders to end of document."
},
{"get_metadata", pdfreflow_get_metadata, METH_VARARGS,
"get_metadata(pdf_data, cover)\n\n"

View File

@ -712,16 +712,18 @@ Reflow::Reflow(char *pdfdata, size_t sz) :
}
void
Reflow::render() {
int
Reflow::render(int first_page, int last_page) {
if (!this->doc->okToCopy())
cout << "Warning, this document has the copy protection flag set, ignoring." << endl;
globalParams->setTextEncoding(encoding);
int first_page = 1;
int last_page = doc->getNumPages();
int doc_pages = doc->getNumPages();
if (last_page < 1 || last_page > doc_pages) last_page = doc_pages;
if (first_page < 1) first_page = 1;
if (first_page > last_page) first_page = last_page;
XMLOutputDev *xml_out = new XMLOutputDev(this->doc);
doc->displayPages(xml_out, first_page, last_page,
@ -733,9 +735,12 @@ Reflow::render() {
false //Printing
);
this->dump_outline();
if (last_page - first_page == doc_pages - 1)
this->dump_outline();
delete xml_out;
return doc_pages;
}
void Reflow::dump_outline() {

View File

@ -66,7 +66,7 @@ class Reflow {
~Reflow();
/* Convert the PDF to XML. All files are output to the current directory */
void render();
int render(int first_page, int last_page);
/* Get the PDF Info Dictionary */
map<string, string> get_info();

View File

@ -147,7 +147,8 @@ class ConvertAction(InterfaceAction):
def book_auto_converted_mail(self, job):
temp_files, fmt, book_id, delete_from_library, to, fmts, subject = self.conversion_jobs[job]
self.book_converted(job)
self.gui.send_by_mail(to, fmts, delete_from_library, subject, specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
self.gui.send_by_mail(to, fmts, delete_from_library, subject=subject,
specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
def book_auto_converted_news(self, job):
temp_files, fmt, book_id = self.conversion_jobs[job]

View File

@ -270,6 +270,8 @@ class BookInfo(QWebView):
<style type="text/css">
body, td {background-color: transparent; font-size: %dpx; color: %s }
a { text-decoration: none; color: blue }
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
table { margin-bottom: 0; padding-bottom: 0; }
</style>
</head>
<body>
@ -278,9 +280,10 @@ class BookInfo(QWebView):
<html>
'''%(f, c)
if self.vertical:
extra = ''
if comments:
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
self.setHtml(templ%(u'<table>%s</table>'%rows))
extra = u'<div class="description">%s</div>'%comments
self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
else:
left_pane = u'<table>%s</table>'%rows
right_pane = u'<div>%s</div>'%comments

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import shutil, functools, re, os, traceback
from contextlib import closing
from operator import attrgetter
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
QModelIndex, QVariant, QDate, QColor
@ -18,7 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import tweaks, prefs
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
from calibre.utils.icu import sort_key
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
@ -984,6 +983,21 @@ class OnDeviceSearch(SearchQueryParser): # {{{
# }}}
class DeviceDBSortKeyGen(object): # {{{
def __init__(self, attr, keyfunc, db):
self.attr = attr
self.db = db
self.keyfunc = keyfunc
def __call__(self, x):
try:
ans = self.keyfunc(getattr(self.db[x], self.attr))
except:
ans = None
return ans
# }}}
class DeviceBooksModel(BooksModel): # {{{
booklist_dirtied = pyqtSignal()
@ -1089,59 +1103,40 @@ class DeviceBooksModel(BooksModel): # {{{
def sort(self, col, order, reset=True):
descending = order != Qt.AscendingOrder
def strcmp(attr):
ag = attrgetter(attr)
def _strcmp(x, y):
x = ag(self.db[x])
y = ag(self.db[y])
if x == None:
x = ''
if y == None:
y = ''
return icu_strcmp(x.strip(), y.strip())
return _strcmp
def datecmp(x, y):
x = self.db[x].datetime
y = self.db[y].datetime
return cmp(dt_factory(x, assume_utc=True), dt_factory(y,
assume_utc=True))
def sizecmp(x, y):
x, y = int(self.db[x].size), int(self.db[y].size)
return cmp(x, y)
def tagscmp(x, y):
x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key))
y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key))
return cmp(x, y)
def libcmp(x, y):
x, y = self.db[x].in_library, self.db[y].in_library
return cmp(x, y)
def authorcmp(x, y):
ax = getattr(self.db[x], 'author_sort', None)
ay = getattr(self.db[y], 'author_sort', None)
if ax and ay:
x = ax
y = ay
else:
x, y = authors_to_string(self.db[x].authors), \
authors_to_string(self.db[y].authors)
return cmp(x, y)
cname = self.column_map[col]
fcmp = {
'title': strcmp('title_sorter'),
'authors' : authorcmp,
'size' : sizecmp,
'timestamp': datecmp,
'collections': tagscmp,
'inlibrary': libcmp,
def author_key(x):
try:
ax = self.db[x].author_sort
if not ax:
raise Exception('')
except:
try:
ax = authors_to_string(self.db[x].authors)
except:
ax = ''
return ax
keygen = {
'title': ('title_sorter', lambda x: sort_key(x) if x else ''),
'authors' : author_key,
'size' : ('size', int),
'timestamp': ('datetime', functools.partial(dt_factory, assume_utc=True)),
'collections': ('device_collections', lambda x:sorted(x,
key=sort_key)),
'inlibrary': ('in_library', lambda x: x),
}[cname]
self.map.sort(cmp=fcmp, reverse=descending)
keygen = keygen if callable(keygen) else DeviceDBSortKeyGen(
keygen[0], keygen[1], self.db)
self.map.sort(key=keygen, reverse=descending)
if len(self.map) == len(self.db):
self.sorted_map = list(self.map)
else:
self.sorted_map = list(range(len(self.db)))
self.sorted_map.sort(cmp=fcmp, reverse=descending)
self.sorted_map.sort(key=keygen, reverse=descending)
self.sorted_on = (self.column_map[col], order)
self.sort_history.insert(0, self.sorted_on)
if hasattr(keygen, 'db'):
keygen.db = None
if reset:
self.reset()

View File

@ -5,6 +5,8 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap
from PyQt4.Qt import QAbstractTableModel, QVariant, QFont, Qt
@ -26,15 +28,15 @@ class EmailAccounts(QAbstractTableModel): # {{{
self.default_font = QFont()
self.default_font.setBold(True)
self.default_font = QVariant(self.default_font)
self.tooltips =[NONE] + map(QVariant,
self.tooltips =[NONE] + list(map(QVariant, map(textwrap.fill,
[_('Formats to email. The first matching format will be sent.'),
_('Subject of the email to use when sending. When left blank '
'the title will be used for the subject. Also, the same '
'the title will be used for the subject. Also, the same '
'templates used for "Save to disk" such as {title} and '
'{author_sort} can be used here.'),
'<p>'+_('If checked, downloaded news will be automatically '
'mailed <br>to this email address '
'(provided it is in one of the listed formats).')])
'(provided it is in one of the listed formats).')])))
def rowCount(self, *args):
return len(self.account_order)

View File

@ -109,7 +109,7 @@ class SearchBox2(QComboBox): # {{{
def normalize_state(self):
self.setToolTip(self.tool_tip_text)
self.line_edit.setStyleSheet(
'QLineEdit{color:black;background-color:%s;}' % self.normal_background)
'QLineEdit{color:none;background-color:%s;}' % self.normal_background)
def text(self):
return self.currentText()

View File

@ -136,17 +136,17 @@ class PostInstall:
self.icon_resources = []
self.menu_resources = []
self.mime_resources = []
if islinux:
if islinux or isfreebsd:
self.setup_completion()
self.install_man_pages()
if islinux:
if islinux or isfreebsd:
self.setup_desktop_integration()
self.create_uninstaller()
from calibre.utils.config import config_dir
if os.path.exists(config_dir):
os.chdir(config_dir)
if islinux:
if islinux or isfreebsd:
for f in os.listdir('.'):
if os.stat(f).st_uid == 0:
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)

View File

@ -40,3 +40,84 @@ Sections
glossary
The main |app| user interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
gui
Adding your favorite news website to |app|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
news
The |app| e-book viewer
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Customizing |app|'s e-book conversion
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Editing e-book metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Frequently Asked Questions
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
faq
Tutorials
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
tutorials
Customizing |app|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
customize
The Command Line Interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
cli/cli-index
Setting up a |app| development environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
develop

View File

@ -263,20 +263,18 @@ Tips for developing new recipes
The best way to develop new recipes is to use the command line interface. Create the recipe using your favorite python editor and save it to a file say :file:`myrecipe.recipe`. The `.recipe` extension is required. You can download content using this recipe with the command::
ebook-convert myrecipe.recipe output_dir --test -vv
ebook-convert myrecipe.recipe .epub --test -vv --debug-pipeline debug
The :command:`ebook-convert` will download all the webpages and save them to the directory :file:`output_dir`, creating it if necessary. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds.
The command :command:`ebook-convert` will download all the webpages and save them to the EPUB file :file:`myrecipe.epub`. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds. In addition, ebook-convert will put the downloaded HTML into the ``debug/input`` directory, where ``debug`` is the directory you specified in the :option:`--debug-pipeline` option.
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`debug/input/index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
ebook-convert myrecipe.recipe myrecipe.epub
ebook-convert myrecipe.recipe myrecipe.mobi
...
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, add a comment to the ticket http://bugs.calibre-ebook.com/ticket/405
Alternatively, you could just post your recipe in the calibre forum at http://www.mobileread.com/forums/forumdisplay.php?f=166 to share it with other calibre users.
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, post your recipe in the `calibre recipes forum <http://www.mobileread.com/forums/forumdisplay.php?f=228>`_ to share it with other calibre users.
.. seealso::

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff