mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Sync to trunk.
This commit is contained in:
commit
5731cad1be
@ -19,6 +19,77 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.7.53
|
||||
date: 2011-04-01
|
||||
|
||||
new features:
|
||||
- title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
|
||||
tickets: [743535]
|
||||
|
||||
- title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
|
||||
|
||||
- title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
|
||||
|
||||
- title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
|
||||
|
||||
- title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
|
||||
tickets: [744020]
|
||||
|
||||
- title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
|
||||
tickets: [742686]
|
||||
|
||||
- title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
|
||||
tickets: [743486]
|
||||
|
||||
- title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
|
||||
tickets: [743533]
|
||||
|
||||
- title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
|
||||
tickets: [743645]
|
||||
|
||||
- title: "FB2 Output: Option to set the FB2 genre explicitly."
|
||||
tickets: [743178]
|
||||
|
||||
- title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix text color in the search bar set to black instead of the system font color"
|
||||
tickets: [746846]
|
||||
|
||||
- title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
|
||||
tickets: [745428]
|
||||
|
||||
- title: "Make sorting on the device view faster and more robust."
|
||||
tickets: [742626]
|
||||
|
||||
- title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
|
||||
tickets: [745001]
|
||||
|
||||
- title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
|
||||
tickets: [744365]
|
||||
|
||||
- title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
|
||||
tickets: [744122]
|
||||
|
||||
- title: "RTF Input: Handle RTF files with too many levels of list nesting."
|
||||
tickets: [743243]
|
||||
|
||||
improved recipes:
|
||||
- Irish Times
|
||||
- LifeHacker
|
||||
- Estadao
|
||||
- Folha de Sao Paulo
|
||||
|
||||
new recipes:
|
||||
- title: Financieele Dagblad
|
||||
author: marvin_2
|
||||
|
||||
- title: "Prost Amerika, WV Hooligan and SB Nation"
|
||||
author: rylsfan
|
||||
|
||||
- title: "Cracked.com"
|
||||
author: Nudgenudge
|
||||
|
||||
- version: 0.7.52
|
||||
date: 2011-03-25
|
||||
|
||||
|
@ -1,134 +1,129 @@
|
||||
#!/usr/bin/env python
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
class Estadao(BasicNewsRecipe):
|
||||
THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
|
||||
LANGUAGE = 'pt_br'
|
||||
language = 'pt'
|
||||
LANGHTM = 'pt-br'
|
||||
ENCODING = 'utf'
|
||||
ENCHTM = 'utf-8'
|
||||
directionhtm = 'ltr'
|
||||
requires_version = (0,8,47)
|
||||
news = True
|
||||
publication_type = 'newsportal'
|
||||
|
||||
title = u'Estadao'
|
||||
__author__ = 'Euler Alves'
|
||||
description = u'Brazilian news from Estad\xe3o'
|
||||
publisher = u'Estad\xe3o'
|
||||
category = 'news, rss'
|
||||
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
timefmt = ' [%d %b %Y (%a)]'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
,'--category', category
|
||||
,'--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
hoje = datetime.now()-timedelta(days=2)
|
||||
pubdate = hoje.strftime('%a, %d %b')
|
||||
if hoje.hour<10:
|
||||
hoje = hoje-timedelta(days=1)
|
||||
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
|
||||
SCREENSHOT = 'http://estadao.com.br/'
|
||||
cover_margins = (0,0,'white')
|
||||
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
|
||||
remove_tags = [
|
||||
dict(name='div',
|
||||
attrs={'id':[
|
||||
'bb-md-noticia-tabs'
|
||||
]})
|
||||
,dict(name='div',
|
||||
attrs={'class':[
|
||||
'tags'
|
||||
,'discussion'
|
||||
,'bb-gg adsense_container'
|
||||
]})
|
||||
|
||||
,dict(name='a')
|
||||
,dict(name='iframe')
|
||||
,dict(name='link')
|
||||
,dict(name='script')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
|
||||
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
|
||||
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
|
||||
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
|
||||
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
|
||||
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
|
||||
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
|
||||
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
|
||||
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
|
||||
]
|
||||
|
||||
conversion_options = {
|
||||
'title' : title
|
||||
,'comments' : description
|
||||
,'publisher' : publisher
|
||||
,'tags' : category
|
||||
,'language' : LANGUAGE
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv':'Content-Language'}):
|
||||
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
|
||||
soup.head.insert(0,meta0)
|
||||
if not soup.find(attrs={'http-equiv':'Content-Type'}):
|
||||
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
|
||||
soup.head.insert(0,meta1)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images. assumes that the new html has the correct path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
width, height = img.size
|
||||
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||
pw = PixelWand()
|
||||
if( width > height and width > 590) :
|
||||
print 'Rotate image'
|
||||
img.rotate(pw, -90)
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = self.CAPA
|
||||
pedido = Request(self.CAPA)
|
||||
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
|
||||
pedido.add_header('Accept-Charset',self.ENCHTM)
|
||||
pedido.add_header('Referer',self.SCREENSHOT)
|
||||
try:
|
||||
resposta = urlopen(pedido)
|
||||
soup = BeautifulSoup(resposta)
|
||||
cover_item = soup.find('body')
|
||||
if cover_item:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
except URLError:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
class Estadao(BasicNewsRecipe):
|
||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||
LANGUAGE = 'pt_br'
|
||||
language = 'pt'
|
||||
LANGHTM = 'pt-br'
|
||||
ENCODING = 'utf'
|
||||
ENCHTM = 'utf-8'
|
||||
directionhtm = 'ltr'
|
||||
requires_version = (0,7,47)
|
||||
news = True
|
||||
|
||||
title = u'Estad\xe3o'
|
||||
__author__ = 'Euler Alves'
|
||||
description = u'Brazilian news from Estad\xe3o'
|
||||
publisher = u'Estad\xe3o'
|
||||
category = 'news, rss'
|
||||
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
timefmt = ' [%d %b %Y (%a)]'
|
||||
|
||||
hoje = datetime.now()-timedelta(days=2)
|
||||
pubdate = hoje.strftime('%a, %d %b')
|
||||
if hoje.hour<10:
|
||||
hoje = hoje-timedelta(days=1)
|
||||
CAPA = 'http://www.estadao.com.br/estadaodehoje/'+hoje.strftime('%Y%m%d')+'/img/capadodia.jpg'
|
||||
SCREENSHOT = 'http://estadao.com.br/'
|
||||
cover_margins = (0,0,'white')
|
||||
masthead_url = 'http://www.estadao.com.br/estadao/novo/img/logo.png'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['bb-md-noticia','corpo']})]
|
||||
remove_tags = [
|
||||
dict(name='div',
|
||||
attrs={'id':[
|
||||
'bb-md-noticia-tabs'
|
||||
]})
|
||||
,dict(name='div',
|
||||
attrs={'class':[
|
||||
'tags'
|
||||
,'discussion'
|
||||
,'bb-gg adsense_container'
|
||||
]})
|
||||
|
||||
,dict(name='a')
|
||||
,dict(name='iframe')
|
||||
,dict(name='link')
|
||||
,dict(name='script')
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
|
||||
,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
|
||||
,(u'Brasil', u'http://www.estadao.com.br/rss/brasil.xml')
|
||||
,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
|
||||
,(u'Cinema', u'http://blogs.estadao.com.br/cinema/feed/')
|
||||
,(u'Planeta', u'http://www.estadao.com.br/rss/planeta.xml')
|
||||
,(u'Ci\xEAncia', u'http://www.estadao.com.br/rss/ciencia.xml')
|
||||
,(u'Sa\xFAde', u'http://www.estadao.com.br/rss/saude.xml')
|
||||
,(u'Pol\xEDtica', u'http://www.estadao.com.br/rss/politica.xml')
|
||||
]
|
||||
|
||||
conversion_options = {
|
||||
'title' : title
|
||||
,'comments' : description
|
||||
,'publisher' : publisher
|
||||
,'tags' : category
|
||||
,'language' : LANGUAGE
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv':'Content-Language'}):
|
||||
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
|
||||
soup.head.insert(0,meta0)
|
||||
if not soup.find(attrs={'http-equiv':'Content-Type'}):
|
||||
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
|
||||
soup.head.insert(0,meta1)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images. assumes that the new html has the correct path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
width, height = img.size
|
||||
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
pw = PixelWand()
|
||||
if( width > height and width > 590) :
|
||||
print 'Rotate image'
|
||||
img.rotate(pw, -90)
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
if self.THUMBALIZR_API:
|
||||
cover_url = self.CAPA
|
||||
pedido = Request(self.CAPA)
|
||||
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
|
||||
pedido.add_header('Accept-Charset',self.ENCHTM)
|
||||
pedido.add_header('Referer',self.SCREENSHOT)
|
||||
try:
|
||||
resposta = urlopen(pedido)
|
||||
soup = BeautifulSoup(resposta)
|
||||
cover_item = soup.find('body')
|
||||
if cover_item:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
except URLError:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
|
||||
|
29
recipes/financieele_dagblad.recipe
Normal file
29
recipes/financieele_dagblad.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class fd(BasicNewsRecipe):
|
||||
title = u'Het Financieele Dagblad'
|
||||
__author__ = 'marvin_2'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.fd.nl/static/gfx/logo-fd-164x78.gif'
|
||||
language = 'nl'
|
||||
|
||||
keep_only_tags = (dict(name = 'div', attrs = {'class': ['headlinearticle']}))
|
||||
remove_tags = [dict(name='span' , attrs={'class':['opties']})]
|
||||
|
||||
feeds = [
|
||||
(u'Overzicht',u'http://www.fd.nl/nieuws/overzicht/?view=RSS&profiel=OPENBAAR')
|
||||
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;}
|
||||
strong{font-weight:bold; margin-right:5pt;margin-top:20pt;}
|
||||
.datum_ie {font-style:italic;font-size:small;}
|
||||
img {align:left;}
|
||||
'''
|
||||
|
||||
|
@ -1,149 +1,151 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
class FolhaOnline(BasicNewsRecipe):
|
||||
THUMBALIZR_API = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
|
||||
LANGUAGE = 'pt_br'
|
||||
language = 'pt'
|
||||
LANGHTM = 'pt-br'
|
||||
ENCODING = 'cp1252'
|
||||
ENCHTM = 'iso-8859-1'
|
||||
directionhtm = 'ltr'
|
||||
requires_version = (0,8,47)
|
||||
news = True
|
||||
publication_type = 'newsportal'
|
||||
|
||||
title = u'Folha de S\xE3o Paulo'
|
||||
__author__ = 'Euler Alves'
|
||||
description = u'Brazilian news from Folha de S\xE3o Paulo'
|
||||
publisher = u'Folha de S\xE3o Paulo'
|
||||
category = 'news, rss'
|
||||
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
timefmt = ' [%d %b %Y (%a)]'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
,'--category', category
|
||||
,'--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
hoje = datetime.now()
|
||||
pubdate = hoje.strftime('%a, %d %b')
|
||||
if hoje.hour<6:
|
||||
hoje = hoje-timedelta(days=1)
|
||||
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
|
||||
SCREENSHOT = 'http://www1.folha.uol.com.br/'
|
||||
cover_margins = (0,0,'white')
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
|
||||
remove_tags = [
|
||||
dict(name='div',
|
||||
attrs={'id':[
|
||||
'articleButton'
|
||||
,'bookmarklets'
|
||||
,'ad-180x150-1'
|
||||
,'contextualAdsArticle'
|
||||
,'articleEnd'
|
||||
,'articleComments'
|
||||
]})
|
||||
,dict(name='div',
|
||||
attrs={'class':[
|
||||
'openBox adslibraryArticle'
|
||||
]})
|
||||
|
||||
,dict(name='a')
|
||||
,dict(name='iframe')
|
||||
,dict(name='link')
|
||||
,dict(name='script')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
|
||||
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
|
||||
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
|
||||
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
|
||||
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
||||
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
|
||||
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
|
||||
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
||||
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
|
||||
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
|
||||
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
|
||||
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
|
||||
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
|
||||
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
|
||||
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
|
||||
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
|
||||
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
|
||||
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
|
||||
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
|
||||
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
|
||||
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
|
||||
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
|
||||
]
|
||||
|
||||
conversion_options = {
|
||||
'title' : title
|
||||
,'comments' : description
|
||||
,'publisher' : publisher
|
||||
,'tags' : category
|
||||
,'language' : LANGUAGE
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv':'Content-Language'}):
|
||||
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
|
||||
soup.head.insert(0,meta0)
|
||||
if not soup.find(attrs={'http-equiv':'Content-Type'}):
|
||||
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
|
||||
soup.head.insert(0,meta1)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images. assumes that the new html has the correct path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
width, height = img.size
|
||||
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||
pw = PixelWand()
|
||||
if( width > height and width > 590) :
|
||||
print 'Rotate image'
|
||||
img.rotate(pw, -90)
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = self.CAPA
|
||||
pedido = Request(self.CAPA)
|
||||
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
|
||||
pedido.add_header('Accept-Charset',self.ENCHTM)
|
||||
pedido.add_header('Referer',self.SCREENSHOT)
|
||||
try:
|
||||
resposta = urlopen(pedido)
|
||||
soup = BeautifulSoup(resposta)
|
||||
cover_item = soup.find('body')
|
||||
if cover_item:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
except URLError:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime, timedelta
|
||||
from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
from urllib2 import Request, urlopen, URLError
|
||||
|
||||
class FolhaOnline(BasicNewsRecipe):
|
||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||
LANGUAGE = 'pt_br'
|
||||
language = 'pt'
|
||||
LANGHTM = 'pt-br'
|
||||
ENCODING = 'cp1252'
|
||||
ENCHTM = 'iso-8859-1'
|
||||
directionhtm = 'ltr'
|
||||
requires_version = (0,7,47)
|
||||
news = True
|
||||
|
||||
title = u'Folha de S\xE3o Paulo'
|
||||
__author__ = 'Euler Alves'
|
||||
description = u'Brazilian news from Folha de S\xE3o Paulo'
|
||||
publisher = u'Folha de S\xE3o Paulo'
|
||||
category = 'news, rss'
|
||||
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
summary_length = 1000
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
timefmt = ' [%d %b %Y (%a)]'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
,'--category', category
|
||||
,'--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
hoje = datetime.now()
|
||||
pubdate = hoje.strftime('%a, %d %b')
|
||||
if hoje.hour<6:
|
||||
hoje = hoje-timedelta(days=1)
|
||||
CAPA = 'http://www1.folha.uol.com.br/fsp/images/cp'+hoje.strftime('%d%m%Y')+'.jpg'
|
||||
SCREENSHOT = 'http://www1.folha.uol.com.br/'
|
||||
cover_margins = (0,0,'white')
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleNew'})]
|
||||
remove_tags = [
|
||||
dict(name='div',
|
||||
attrs={'id':[
|
||||
'articleButton'
|
||||
,'bookmarklets'
|
||||
,'ad-180x150-1'
|
||||
,'contextualAdsArticle'
|
||||
,'articleEnd'
|
||||
,'articleComments'
|
||||
]})
|
||||
,dict(name='div',
|
||||
attrs={'class':[
|
||||
'openBox adslibraryArticle'
|
||||
]})
|
||||
|
||||
,dict(name='a')
|
||||
,dict(name='iframe')
|
||||
,dict(name='link')
|
||||
,dict(name='script')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml')
|
||||
,(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml')
|
||||
,(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml')
|
||||
,(u'Ci\xEAncia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml')
|
||||
,(u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml')
|
||||
,(u'Equil\xEDbrio e Sa\xFAde', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml')
|
||||
,(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml')
|
||||
,(u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml')
|
||||
,(u'Pelo Mundo', u'http://feeds.folha.uol.com.br/pelomundo.folha.rssblog.uol.com.br/')
|
||||
,(u'Circuito integrado', u'http://feeds.folha.uol.com.br/circuitointegrado.folha.rssblog.uol.com.br/')
|
||||
,(u'Blog do Fred', u'http://feeds.folha.uol.com.br/blogdofred.folha.rssblog.uol.com.br/')
|
||||
,(u'Maria In\xEAs Dolci', u'http://feeds.folha.uol.com.br/mariainesdolci.folha.blog.uol.com.br/')
|
||||
,(u'Eduardo Ohata', u'http://feeds.folha.uol.com.br/folha/pensata/eduardoohata/rss091.xml')
|
||||
,(u'Kennedy Alencar', u'http://feeds.folha.uol.com.br/folha/pensata/kennedyalencar/rss091.xml')
|
||||
,(u'Eliane Catanh\xEAde', u'http://feeds.folha.uol.com.br/folha/pensata/elianecantanhede/rss091.xml')
|
||||
,(u'Fernado Canzian', u'http://feeds.folha.uol.com.br/folha/pensata/fernandocanzian/rss091.xml')
|
||||
,(u'Gilberto Dimenstein', u'http://feeds.folha.uol.com.br/folha/pensata/gilbertodimenstein/rss091.xml')
|
||||
,(u'H\xE9lio Schwartsman', u'http://feeds.folha.uol.com.br/folha/pensata/helioschwartsman/rss091.xml')
|
||||
,(u'Jo\xE3o Pereira Coutinho', u'http://http://feeds.folha.uol.com.br/folha/pensata/joaopereiracoutinho/rss091.xml')
|
||||
,(u'Luiz Caversan', u'http://http://feeds.folha.uol.com.br/folha/pensata/luizcaversan/rss091.xml')
|
||||
,(u'S\xE9rgio Malbergier', u'http://http://feeds.folha.uol.com.br/folha/pensata/sergiomalbergier/rss091.xml')
|
||||
,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
|
||||
]
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'title' : title
|
||||
,'comments' : description
|
||||
,'publisher' : publisher
|
||||
,'tags' : category
|
||||
,'language' : LANGUAGE
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv':'Content-Language'}):
|
||||
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
|
||||
soup.head.insert(0,meta0)
|
||||
if not soup.find(attrs={'http-equiv':'Content-Type'}):
|
||||
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
|
||||
soup.head.insert(0,meta1)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images. assumes that the new html has the correct path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
width, height = img.size
|
||||
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
pw = PixelWand()
|
||||
if( width > height and width > 590) :
|
||||
print 'Rotate image'
|
||||
img.rotate(pw, -90)
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = self.CAPA
|
||||
pedido = Request(self.CAPA)
|
||||
pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
|
||||
pedido.add_header('Accept-Charset',self.ENCHTM)
|
||||
pedido.add_header('Referer',self.SCREENSHOT)
|
||||
try:
|
||||
resposta = urlopen(pedido)
|
||||
soup = BeautifulSoup(resposta)
|
||||
cover_item = soup.find('body')
|
||||
if cover_item:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
except URLError:
|
||||
cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
|
||||
return cover_url
|
||||
|
@ -35,8 +35,8 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 714 B |
BIN
recipes/icons/folhadesaopaulo.png
Normal file
BIN
recipes/icons/folhadesaopaulo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 392 B |
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan"
|
||||
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
|
||||
'''
|
||||
irishtimes.com
|
||||
'''
|
||||
@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IrishTimes(BasicNewsRecipe):
|
||||
title = u'The Irish Times'
|
||||
__author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
|
||||
encoding = 'ISO-8859-15'
|
||||
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
|
||||
language = 'en_IE'
|
||||
timefmt = ' (%A, %B %d, %Y)'
|
||||
|
||||
oldest_article = 3
|
||||
|
||||
oldest_article = 1.0
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads= 1
|
||||
simultaneous_downloads= 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
feeds = [
|
||||
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
|
||||
@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe):
|
||||
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
|
||||
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
|
||||
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
|
||||
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
|
||||
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
|
||||
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
|
||||
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
|
||||
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
|
||||
('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
|
||||
('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
|
||||
('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
|
||||
('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
|
||||
('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = 'http://www.irishtimes.com' + \
|
||||
(((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
||||
|
||||
|
@ -1,37 +1,100 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, NA'
|
||||
'''
|
||||
lifehacker.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Lifehacker(BasicNewsRecipe):
|
||||
title = 'Lifehacker'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
|
||||
publisher = 'lifehacker.com'
|
||||
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import datetime
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.utils.magick import Image, PixelWand
|
||||
|
||||
class LifeHacker(BasicNewsRecipe):
|
||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||
LANGUAGE = 'en'
|
||||
LANGHTM = 'en'
|
||||
language = 'en'
|
||||
ENCODING = 'utf'
|
||||
ENCHTM = 'utf-8'
|
||||
requires_version = (0,7,47)
|
||||
news = True
|
||||
|
||||
title = u'LifeHacker'
|
||||
__author__ = 'Euler Alves'
|
||||
description = u'Tips, tricks, and downloads for getting things done.'
|
||||
publisher = u'lifehacker.com'
|
||||
author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
|
||||
category = 'news, rss'
|
||||
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 20
|
||||
summary_length = 1000
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
remove_empty_feeds = True
|
||||
timefmt = ' [%d %b %Y (%a)]'
|
||||
|
||||
hoje = datetime.now()
|
||||
pubdate = hoje.strftime('%a, %d %b')
|
||||
cover_url = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
|
||||
cover_margins = (0,0,'white')
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
|
||||
|
||||
remove_tags = [
|
||||
{'class': 'feedflare'},
|
||||
dict(name='div',
|
||||
attrs={'class':[
|
||||
'ad_container'
|
||||
,'ad_300x250'
|
||||
,'ad_interstitial'
|
||||
,'share-wrap'
|
||||
,'ad_300x600'
|
||||
,'ad_perma-footer-adsense'
|
||||
,'ad_perma-panorama'
|
||||
,'ad panorama'
|
||||
,'ad_container'
|
||||
]})
|
||||
,dict(name='div',
|
||||
attrs={'id':[
|
||||
'agegate_container'
|
||||
,'agegate_container_rejected'
|
||||
,'sharemenu-wrap'
|
||||
]})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
|
||||
|
||||
conversion_options = {
|
||||
'title' : title
|
||||
,'comments' : description
|
||||
,'publisher' : publisher
|
||||
,'tags' : category
|
||||
,'language' : LANGUAGE
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
if not soup.find(attrs={'http-equiv':'Content-Language'}):
|
||||
meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
|
||||
soup.head.insert(0,meta0)
|
||||
if not soup.find(attrs={'http-equiv':'Content-Type'}):
|
||||
meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
|
||||
soup.head.insert(0,meta1)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images. assumes that the new html has the correct path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
width, height = img.size
|
||||
print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
pw = PixelWand()
|
||||
if( width > height and width > 590) :
|
||||
print 'Rotate image'
|
||||
img.rotate(pw, -90)
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -45,7 +45,6 @@ class Stage3(Command):
|
||||
sub_commands = ['upload_user_manual', 'upload_demo', 'sdist',
|
||||
'upload_to_sourceforge', 'upload_to_google_code',
|
||||
'tag_release', 'upload_to_server',
|
||||
'upload_to_mobileread',
|
||||
]
|
||||
|
||||
class Stage4(Command):
|
||||
|
@ -5,7 +5,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, glob
|
||||
import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \
|
||||
glob, stat
|
||||
from subprocess import check_call
|
||||
from tempfile import NamedTemporaryFile, mkdtemp
|
||||
from zipfile import ZipFile
|
||||
@ -344,6 +345,8 @@ class UploadUserManual(Command): # {{{
|
||||
def build_plugin_example(self, path):
|
||||
from calibre import CurrentDir
|
||||
with NamedTemporaryFile(suffix='.zip') as f:
|
||||
os.fchmod(f.fileno(),
|
||||
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
|
||||
with CurrentDir(self.d(path)):
|
||||
with ZipFile(f, 'w') as zf:
|
||||
for x in os.listdir('.'):
|
||||
@ -352,8 +355,8 @@ class UploadUserManual(Command): # {{{
|
||||
for y in os.listdir(x):
|
||||
zf.write(os.path.join(x, y))
|
||||
bname = self.b(path) + '_plugin.zip'
|
||||
subprocess.check_call(['scp', f.name, 'divok:%s/%s'%(DOWNLOADS,
|
||||
bname)])
|
||||
dest = '%s/%s'%(DOWNLOADS, bname)
|
||||
subprocess.check_call(['scp', f.name, 'divok:'+dest])
|
||||
|
||||
def run(self, opts):
|
||||
path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples')
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.52'
|
||||
__version__ = '0.7.53'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re, importlib
|
||||
|
@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
try:
|
||||
if encoding.lower().strip() == 'macintosh':
|
||||
encoding = 'mac-roman'
|
||||
if encoding.lower().replace('_', '-').strip() in (
|
||||
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
||||
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
|
||||
# Microsoft Word exports to HTML with encoding incorrectly set to
|
||||
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
|
||||
encoding = 'gbk'
|
||||
raw = raw.decode(encoding, 'replace')
|
||||
except LookupError:
|
||||
encoding = 'utf-8'
|
||||
@ -110,4 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
if resolve_entities:
|
||||
raw = substitute_entites(raw)
|
||||
|
||||
|
||||
|
||||
return raw, encoding
|
||||
|
@ -26,15 +26,15 @@ class OEB2HTML(object):
|
||||
links and images can be retrieved after calling oeb2html to get the mapping
|
||||
of OEB links and images to the new names used in the html returned by oeb2html.
|
||||
Images will always be referenced as if they are in an images directory.
|
||||
|
||||
|
||||
Use get_css to get the CSS classes for the OEB document as a string.
|
||||
'''
|
||||
|
||||
|
||||
def __init__(self, log=None):
|
||||
self.log = default_log if log is None else log
|
||||
self.links = {}
|
||||
self.images = {}
|
||||
|
||||
|
||||
def oeb2html(self, oeb_book, opts):
|
||||
self.log.info('Converting OEB book to HTML...')
|
||||
self.opts = opts
|
||||
@ -52,7 +52,7 @@ class OEB2HTML(object):
|
||||
output.append('\n\n')
|
||||
output.append('</body></html>')
|
||||
return ''.join(output)
|
||||
|
||||
|
||||
def dump_text(self, elem, stylizer, page):
|
||||
raise NotImplementedError
|
||||
|
||||
@ -78,7 +78,7 @@ class OEB2HTML(object):
|
||||
href = '#%s' % self.links[href]
|
||||
attribs['href'] = href
|
||||
return attribs
|
||||
|
||||
|
||||
def rewrite_images(self, tag, attribs, page):
|
||||
if tag == 'img':
|
||||
src = attribs.get('src', None)
|
||||
@ -102,14 +102,14 @@ class OEB2HTML(object):
|
||||
if item.media_type == 'text/css':
|
||||
css = item.data.cssText
|
||||
break
|
||||
return css
|
||||
return css
|
||||
|
||||
|
||||
class OEB2HTMLNoCSSizer(OEB2HTML):
|
||||
'''
|
||||
This will remap a small number of CSS styles to equivalent HTML tags.
|
||||
'''
|
||||
|
||||
|
||||
def dump_text(self, elem, stylizer, page):
|
||||
'''
|
||||
@elem: The element in the etree that we are working on.
|
||||
@ -218,7 +218,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
tags = []
|
||||
tag = barename(elem.tag)
|
||||
attribs = elem.attrib
|
||||
|
||||
|
||||
style_a = '%s' % style
|
||||
if tag == 'body':
|
||||
tag = 'div'
|
||||
@ -226,7 +226,7 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||
if not style['page-break-before'] == 'always':
|
||||
style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
|
||||
tags.append(tag)
|
||||
|
||||
|
||||
# Remove attributes we won't want.
|
||||
if 'class' in attribs:
|
||||
del attribs['class']
|
||||
@ -275,7 +275,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||
inline classes (style tag in the head) or reference an external
|
||||
CSS file called style.css.
|
||||
'''
|
||||
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = []
|
||||
for item in oeb_book.spine:
|
||||
@ -283,7 +283,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||
output.append('\n\n')
|
||||
if self.opts.class_style == 'external':
|
||||
if self.opts.htmlz_class_style == 'external':
|
||||
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||
else:
|
||||
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
||||
@ -307,16 +307,16 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||
|
||||
# Setup our variables.
|
||||
text = ['']
|
||||
style = stylizer.style(elem)
|
||||
#style = stylizer.style(elem)
|
||||
tags = []
|
||||
tag = barename(elem.tag)
|
||||
attribs = elem.attrib
|
||||
|
||||
|
||||
if tag == 'body':
|
||||
tag = 'div'
|
||||
attribs['id'] = self.get_link_id(page.href, '')
|
||||
tags.append(tag)
|
||||
|
||||
|
||||
# Remove attributes we won't want.
|
||||
if 'style' in attribs:
|
||||
del attribs['style']
|
||||
|
@ -12,7 +12,6 @@ from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.html import tostring
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
@ -22,17 +21,17 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
name = 'HTMLZ Output'
|
||||
author = 'John Schember'
|
||||
file_type = 'htmlz'
|
||||
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='css_type', recommended_value='class',
|
||||
OptionRecommendation(name='htmlz_css_type', recommended_value='class',
|
||||
level=OptionRecommendation.LOW,
|
||||
choices=['class', 'inline', 'tag'],
|
||||
help=_('Specify the handling of CSS. Default is class.\n'
|
||||
'class: Use CSS classes and have elements reference them.\n'
|
||||
'inline: Write the CSS as an inline style attribute.\n'
|
||||
'tag: Turn as many CSS styles into HTML tags.'
|
||||
'tag: Turn as many CSS styles as possible into HTML tags.'
|
||||
)),
|
||||
OptionRecommendation(name='class_style', recommended_value='external',
|
||||
OptionRecommendation(name='htmlz_class_style', recommended_value='external',
|
||||
level=OptionRecommendation.LOW,
|
||||
choices=['external', 'inline'],
|
||||
help=_('How to handle the CSS when using css-type = \'class\'.\n'
|
||||
@ -43,26 +42,26 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
with TemporaryDirectory('_txtz_output') as tdir:
|
||||
# HTML
|
||||
if opts.css_type == 'inline':
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer as OEB2HTMLizer
|
||||
elif opts.css_type == 'tag':
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer as OEB2HTMLizer
|
||||
else:
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
|
||||
|
||||
# HTML
|
||||
if opts.htmlz_css_type == 'inline':
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
|
||||
OEB2HTMLizer = OEB2HTMLInlineCSSizer
|
||||
elif opts.htmlz_css_type == 'tag':
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
|
||||
OEB2HTMLizer = OEB2HTMLNoCSSizer
|
||||
else:
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
|
||||
|
||||
|
||||
with TemporaryDirectory('_txtz_output') as tdir:
|
||||
htmlizer = OEB2HTMLizer(log)
|
||||
html = htmlizer.oeb2html(oeb_book, opts)
|
||||
|
||||
html = etree.fromstring(html)
|
||||
html = tostring(html, pretty_print=True)
|
||||
|
||||
|
||||
with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
|
||||
tf.write(html)
|
||||
|
||||
|
||||
# CSS
|
||||
if opts.css_type == 'class' and opts.class_style == 'external':
|
||||
if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
|
||||
with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
|
||||
tf.write(htmlizer.get_css(oeb_book))
|
||||
|
||||
@ -78,8 +77,8 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
img.write(item.data)
|
||||
|
||||
# Metadata
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
||||
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
|
||||
|
||||
txtz = ZipFile(output_path, 'w')
|
||||
txtz.add_dir(tdir)
|
||||
htmlz = ZipFile(output_path, 'w')
|
||||
htmlz.add_dir(tdir)
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, textwrap
|
||||
import os, textwrap, sys
|
||||
from copy import deepcopy
|
||||
|
||||
from lxml import etree
|
||||
@ -413,7 +413,12 @@ class LRFInput(InputFormatPlugin):
|
||||
('calibre', 'image-block'): image_block,
|
||||
}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
result = transform(doc)
|
||||
try:
|
||||
result = transform(doc)
|
||||
except RuntimeError:
|
||||
sys.setrecursionlimit(5000)
|
||||
result = transform(doc)
|
||||
|
||||
with open('content.opf', 'wb') as f:
|
||||
f.write(result)
|
||||
styles.write()
|
||||
|
@ -198,8 +198,10 @@ class Metadata(object):
|
||||
return copy.deepcopy(ans)
|
||||
|
||||
def _clean_identifier(self, typ, val):
|
||||
typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
|
||||
val = val.strip().replace(',', '|').replace(':', '|')
|
||||
if typ:
|
||||
typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
|
||||
if val:
|
||||
val = val.strip().replace(',', '|').replace(':', '|')
|
||||
return typ, val
|
||||
|
||||
def set_identifiers(self, identifiers):
|
||||
|
@ -503,7 +503,7 @@ if __name__ == '__main__': # tests {{{
|
||||
( # This isbn not on amazon
|
||||
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
|
||||
'authors':['Lutz']},
|
||||
[title_test('Learning Python: Powerful Object-Oriented Programming',
|
||||
[title_test('Learning Python, 3rd Edition',
|
||||
exact=True), authors_test(['Mark Lutz'])
|
||||
]
|
||||
|
||||
|
@ -15,6 +15,7 @@ from calibre.customize import Plugin
|
||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||
from calibre.utils.config import JSONConfig
|
||||
from calibre.utils.titlecase import titlecase
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
|
||||
msprefs = JSONConfig('metadata_sources.json')
|
||||
|
||||
@ -236,6 +237,7 @@ class Source(Plugin):
|
||||
mi.title = fixcase(mi.title)
|
||||
mi.authors = list(map(fixcase, mi.authors))
|
||||
mi.tags = list(map(fixcase, mi.tags))
|
||||
mi.isbn = check_isbn(mi.isbn)
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -14,6 +14,7 @@ from io import BytesIO
|
||||
|
||||
from calibre.customize.ui import metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.base import create_log
|
||||
from calibre.ebooks.metadata.xisbn import xisbn
|
||||
|
||||
# How long to wait for more results after first result is found
|
||||
WAIT_AFTER_FIRST_RESULT = 30 # seconds
|
||||
@ -120,7 +121,41 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
|
||||
log('We have %d merged results, merging took: %.2f seconds' %
|
||||
(len(merged_results), time.time() - start_time))
|
||||
|
||||
class ISBNMerge(object):
|
||||
|
||||
def __init__(self):
|
||||
self.pools = {}
|
||||
|
||||
def isbn_in_pool(self, isbn):
|
||||
if isbn:
|
||||
for p in self.pools:
|
||||
if isbn in p:
|
||||
return p
|
||||
return None
|
||||
|
||||
def pool_has_result_from_same_source(self, pool, result):
|
||||
results = self.pools[pool][1]
|
||||
for r in results:
|
||||
if r.identify_plugin is result.identify_plugin:
|
||||
return True
|
||||
return False
|
||||
|
||||
def add_result(self, result, isbn):
|
||||
pool = self.isbn_in_pool(isbn)
|
||||
if pool is None:
|
||||
isbns, min_year = xisbn.get_isbn_pool(isbn)
|
||||
if not isbns:
|
||||
isbns = frozenset([isbn])
|
||||
self.pool[isbns] = pool = (min_year, [])
|
||||
|
||||
if not self.pool_has_result_from_same_source(pool, result):
|
||||
pool[1].append(result)
|
||||
|
||||
def merge_identify_results(result_map, log):
|
||||
pass
|
||||
for plugin, results in result_map.iteritems():
|
||||
for result in results:
|
||||
isbn = result.isbn
|
||||
if isbn:
|
||||
isbns, min_year = xisbn.get_isbn_pool(isbn)
|
||||
|
||||
|
||||
|
@ -71,6 +71,20 @@ class xISBN(object):
|
||||
ans.add(i)
|
||||
return ans
|
||||
|
||||
def get_isbn_pool(self, isbn):
|
||||
data = self.get_data(isbn)
|
||||
isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
|
||||
min_year = 100000
|
||||
for x in data:
|
||||
try:
|
||||
year = int(x['year'])
|
||||
if year < min_year:
|
||||
min_year = year
|
||||
except:
|
||||
continue
|
||||
if min_year == 100000:
|
||||
min_year = None
|
||||
return isbns, min_year
|
||||
|
||||
|
||||
xisbn = xISBN()
|
||||
|
@ -20,7 +20,8 @@ class RemoveAdobeMargins(object):
|
||||
self.oeb, self.opts, self.log = oeb, opts, log
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type == 'application/vnd.adobe-page-template+xml':
|
||||
if item.media_type in ('application/vnd.adobe-page-template+xml',
|
||||
'application/vnd.adobe.page-template+xml'):
|
||||
self.log('Removing page margins specified in the'
|
||||
' Adobe page template')
|
||||
for elem in item.data.xpath(
|
||||
@ -35,7 +36,7 @@ class RemoveFakeMargins(object):
|
||||
|
||||
'''
|
||||
Remove left and right margins from paragraph/divs if the same margin is specified
|
||||
on almost all the elements of at that level.
|
||||
on almost all the elements at that level.
|
||||
|
||||
Must be called only after CSS flattening
|
||||
'''
|
||||
|
@ -34,7 +34,7 @@ class PDFInput(InputFormatPlugin):
|
||||
from calibre.ebooks.pdf.reflow import PDFDocument
|
||||
if pdfreflow_err:
|
||||
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
|
||||
pdfreflow.reflow(stream.read())
|
||||
pdfreflow.reflow(stream.read(), 1, -1)
|
||||
xml = open('index.xml', 'rb').read()
|
||||
PDFDocument(xml, self.opts, self.log)
|
||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||
|
@ -24,13 +24,14 @@ extern "C" {
|
||||
pdfreflow_reflow(PyObject *self, PyObject *args) {
|
||||
char *pdfdata;
|
||||
Py_ssize_t size;
|
||||
int first_page, last_page, num = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#", &pdfdata, &size))
|
||||
if (!PyArg_ParseTuple(args, "s#ii", &pdfdata, &size, &first_page, &last_page))
|
||||
return NULL;
|
||||
|
||||
try {
|
||||
Reflow reflow(pdfdata, static_cast<std::ifstream::pos_type>(size));
|
||||
reflow.render();
|
||||
num = reflow.render(first_page, last_page);
|
||||
} catch (std::exception &e) {
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL;
|
||||
} catch (...) {
|
||||
@ -38,7 +39,7 @@ extern "C" {
|
||||
"Unknown exception raised while rendering PDF"); return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
return Py_BuildValue("i", num);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@ -166,8 +167,8 @@ extern "C" {
|
||||
static
|
||||
PyMethodDef pdfreflow_methods[] = {
|
||||
{"reflow", pdfreflow_reflow, METH_VARARGS,
|
||||
"reflow(pdf_data)\n\n"
|
||||
"Reflow the specified PDF."
|
||||
"reflow(pdf_data, first_page, last_page)\n\n"
|
||||
"Reflow the specified PDF. Returns the number of pages in the PDF. If last_page is -1 renders to end of document."
|
||||
},
|
||||
{"get_metadata", pdfreflow_get_metadata, METH_VARARGS,
|
||||
"get_metadata(pdf_data, cover)\n\n"
|
||||
|
@ -712,16 +712,18 @@ Reflow::Reflow(char *pdfdata, size_t sz) :
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Reflow::render() {
|
||||
int
|
||||
Reflow::render(int first_page, int last_page) {
|
||||
|
||||
if (!this->doc->okToCopy())
|
||||
cout << "Warning, this document has the copy protection flag set, ignoring." << endl;
|
||||
|
||||
globalParams->setTextEncoding(encoding);
|
||||
|
||||
int first_page = 1;
|
||||
int last_page = doc->getNumPages();
|
||||
int doc_pages = doc->getNumPages();
|
||||
if (last_page < 1 || last_page > doc_pages) last_page = doc_pages;
|
||||
if (first_page < 1) first_page = 1;
|
||||
if (first_page > last_page) first_page = last_page;
|
||||
|
||||
XMLOutputDev *xml_out = new XMLOutputDev(this->doc);
|
||||
doc->displayPages(xml_out, first_page, last_page,
|
||||
@ -733,9 +735,12 @@ Reflow::render() {
|
||||
false //Printing
|
||||
);
|
||||
|
||||
this->dump_outline();
|
||||
if (last_page - first_page == doc_pages - 1)
|
||||
this->dump_outline();
|
||||
|
||||
delete xml_out;
|
||||
|
||||
return doc_pages;
|
||||
}
|
||||
|
||||
void Reflow::dump_outline() {
|
||||
|
@ -66,7 +66,7 @@ class Reflow {
|
||||
~Reflow();
|
||||
|
||||
/* Convert the PDF to XML. All files are output to the current directory */
|
||||
void render();
|
||||
int render(int first_page, int last_page);
|
||||
|
||||
/* Get the PDF Info Dictionary */
|
||||
map<string, string> get_info();
|
||||
|
@ -147,7 +147,8 @@ class ConvertAction(InterfaceAction):
|
||||
def book_auto_converted_mail(self, job):
|
||||
temp_files, fmt, book_id, delete_from_library, to, fmts, subject = self.conversion_jobs[job]
|
||||
self.book_converted(job)
|
||||
self.gui.send_by_mail(to, fmts, delete_from_library, subject, specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
|
||||
self.gui.send_by_mail(to, fmts, delete_from_library, subject=subject,
|
||||
specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
|
||||
|
||||
def book_auto_converted_news(self, job):
|
||||
temp_files, fmt, book_id = self.conversion_jobs[job]
|
||||
|
@ -270,6 +270,8 @@ class BookInfo(QWebView):
|
||||
<style type="text/css">
|
||||
body, td {background-color: transparent; font-size: %dpx; color: %s }
|
||||
a { text-decoration: none; color: blue }
|
||||
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
|
||||
table { margin-bottom: 0; padding-bottom: 0; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@ -278,9 +280,10 @@ class BookInfo(QWebView):
|
||||
<html>
|
||||
'''%(f, c)
|
||||
if self.vertical:
|
||||
extra = ''
|
||||
if comments:
|
||||
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
|
||||
self.setHtml(templ%(u'<table>%s</table>'%rows))
|
||||
extra = u'<div class="description">%s</div>'%comments
|
||||
self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
|
||||
else:
|
||||
left_pane = u'<table>%s</table>'%rows
|
||||
right_pane = u'<div>%s</div>'%comments
|
||||
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import shutil, functools, re, os, traceback
|
||||
from contextlib import closing
|
||||
from operator import attrgetter
|
||||
|
||||
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
|
||||
QModelIndex, QVariant, QDate, QColor
|
||||
@ -18,7 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.config import tweaks, prefs
|
||||
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
|
||||
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
|
||||
@ -984,6 +983,21 @@ class OnDeviceSearch(SearchQueryParser): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class DeviceDBSortKeyGen(object): # {{{
|
||||
|
||||
def __init__(self, attr, keyfunc, db):
|
||||
self.attr = attr
|
||||
self.db = db
|
||||
self.keyfunc = keyfunc
|
||||
|
||||
def __call__(self, x):
|
||||
try:
|
||||
ans = self.keyfunc(getattr(self.db[x], self.attr))
|
||||
except:
|
||||
ans = None
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
class DeviceBooksModel(BooksModel): # {{{
|
||||
|
||||
booklist_dirtied = pyqtSignal()
|
||||
@ -1089,59 +1103,40 @@ class DeviceBooksModel(BooksModel): # {{{
|
||||
|
||||
def sort(self, col, order, reset=True):
|
||||
descending = order != Qt.AscendingOrder
|
||||
def strcmp(attr):
|
||||
ag = attrgetter(attr)
|
||||
def _strcmp(x, y):
|
||||
x = ag(self.db[x])
|
||||
y = ag(self.db[y])
|
||||
if x == None:
|
||||
x = ''
|
||||
if y == None:
|
||||
y = ''
|
||||
return icu_strcmp(x.strip(), y.strip())
|
||||
return _strcmp
|
||||
def datecmp(x, y):
|
||||
x = self.db[x].datetime
|
||||
y = self.db[y].datetime
|
||||
return cmp(dt_factory(x, assume_utc=True), dt_factory(y,
|
||||
assume_utc=True))
|
||||
def sizecmp(x, y):
|
||||
x, y = int(self.db[x].size), int(self.db[y].size)
|
||||
return cmp(x, y)
|
||||
def tagscmp(x, y):
|
||||
x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key))
|
||||
y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key))
|
||||
return cmp(x, y)
|
||||
def libcmp(x, y):
|
||||
x, y = self.db[x].in_library, self.db[y].in_library
|
||||
return cmp(x, y)
|
||||
def authorcmp(x, y):
|
||||
ax = getattr(self.db[x], 'author_sort', None)
|
||||
ay = getattr(self.db[y], 'author_sort', None)
|
||||
if ax and ay:
|
||||
x = ax
|
||||
y = ay
|
||||
else:
|
||||
x, y = authors_to_string(self.db[x].authors), \
|
||||
authors_to_string(self.db[y].authors)
|
||||
return cmp(x, y)
|
||||
cname = self.column_map[col]
|
||||
fcmp = {
|
||||
'title': strcmp('title_sorter'),
|
||||
'authors' : authorcmp,
|
||||
'size' : sizecmp,
|
||||
'timestamp': datecmp,
|
||||
'collections': tagscmp,
|
||||
'inlibrary': libcmp,
|
||||
def author_key(x):
|
||||
try:
|
||||
ax = self.db[x].author_sort
|
||||
if not ax:
|
||||
raise Exception('')
|
||||
except:
|
||||
try:
|
||||
ax = authors_to_string(self.db[x].authors)
|
||||
except:
|
||||
ax = ''
|
||||
return ax
|
||||
|
||||
keygen = {
|
||||
'title': ('title_sorter', lambda x: sort_key(x) if x else ''),
|
||||
'authors' : author_key,
|
||||
'size' : ('size', int),
|
||||
'timestamp': ('datetime', functools.partial(dt_factory, assume_utc=True)),
|
||||
'collections': ('device_collections', lambda x:sorted(x,
|
||||
key=sort_key)),
|
||||
'inlibrary': ('in_library', lambda x: x),
|
||||
}[cname]
|
||||
self.map.sort(cmp=fcmp, reverse=descending)
|
||||
keygen = keygen if callable(keygen) else DeviceDBSortKeyGen(
|
||||
keygen[0], keygen[1], self.db)
|
||||
self.map.sort(key=keygen, reverse=descending)
|
||||
if len(self.map) == len(self.db):
|
||||
self.sorted_map = list(self.map)
|
||||
else:
|
||||
self.sorted_map = list(range(len(self.db)))
|
||||
self.sorted_map.sort(cmp=fcmp, reverse=descending)
|
||||
self.sorted_map.sort(key=keygen, reverse=descending)
|
||||
self.sorted_on = (self.column_map[col], order)
|
||||
self.sort_history.insert(0, self.sorted_on)
|
||||
if hasattr(keygen, 'db'):
|
||||
keygen.db = None
|
||||
if reset:
|
||||
self.reset()
|
||||
|
||||
|
@ -5,6 +5,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import textwrap
|
||||
|
||||
from PyQt4.Qt import QAbstractTableModel, QVariant, QFont, Qt
|
||||
|
||||
|
||||
@ -26,15 +28,15 @@ class EmailAccounts(QAbstractTableModel): # {{{
|
||||
self.default_font = QFont()
|
||||
self.default_font.setBold(True)
|
||||
self.default_font = QVariant(self.default_font)
|
||||
self.tooltips =[NONE] + map(QVariant,
|
||||
self.tooltips =[NONE] + list(map(QVariant, map(textwrap.fill,
|
||||
[_('Formats to email. The first matching format will be sent.'),
|
||||
_('Subject of the email to use when sending. When left blank '
|
||||
'the title will be used for the subject. Also, the same '
|
||||
'the title will be used for the subject. Also, the same '
|
||||
'templates used for "Save to disk" such as {title} and '
|
||||
'{author_sort} can be used here.'),
|
||||
'<p>'+_('If checked, downloaded news will be automatically '
|
||||
'mailed <br>to this email address '
|
||||
'(provided it is in one of the listed formats).')])
|
||||
'(provided it is in one of the listed formats).')])))
|
||||
|
||||
def rowCount(self, *args):
|
||||
return len(self.account_order)
|
||||
|
@ -109,7 +109,7 @@ class SearchBox2(QComboBox): # {{{
|
||||
def normalize_state(self):
|
||||
self.setToolTip(self.tool_tip_text)
|
||||
self.line_edit.setStyleSheet(
|
||||
'QLineEdit{color:black;background-color:%s;}' % self.normal_background)
|
||||
'QLineEdit{color:none;background-color:%s;}' % self.normal_background)
|
||||
|
||||
def text(self):
|
||||
return self.currentText()
|
||||
|
@ -136,17 +136,17 @@ class PostInstall:
|
||||
self.icon_resources = []
|
||||
self.menu_resources = []
|
||||
self.mime_resources = []
|
||||
if islinux:
|
||||
if islinux or isfreebsd:
|
||||
self.setup_completion()
|
||||
self.install_man_pages()
|
||||
if islinux:
|
||||
if islinux or isfreebsd:
|
||||
self.setup_desktop_integration()
|
||||
self.create_uninstaller()
|
||||
|
||||
from calibre.utils.config import config_dir
|
||||
if os.path.exists(config_dir):
|
||||
os.chdir(config_dir)
|
||||
if islinux:
|
||||
if islinux or isfreebsd:
|
||||
for f in os.listdir('.'):
|
||||
if os.stat(f).st_uid == 0:
|
||||
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
|
||||
|
@ -40,3 +40,84 @@ Sections
|
||||
glossary
|
||||
|
||||
|
||||
The main |app| user interface
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
gui
|
||||
|
||||
Adding your favorite news website to |app|
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
news
|
||||
|
||||
The |app| e-book viewer
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
viewer
|
||||
|
||||
Customizing |app|'s e-book conversion
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
viewer
|
||||
|
||||
Editing e-book metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
viewer
|
||||
|
||||
Frequently Asked Questions
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
faq
|
||||
|
||||
Tutorials
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
tutorials
|
||||
|
||||
Customizing |app|
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
customize
|
||||
|
||||
The Command Line Interface
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
cli/cli-index
|
||||
|
||||
Setting up a |app| development environment
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
develop
|
||||
|
||||
|
||||
|
@ -263,20 +263,18 @@ Tips for developing new recipes
|
||||
|
||||
The best way to develop new recipes is to use the command line interface. Create the recipe using your favorite python editor and save it to a file say :file:`myrecipe.recipe`. The `.recipe` extension is required. You can download content using this recipe with the command::
|
||||
|
||||
ebook-convert myrecipe.recipe output_dir --test -vv
|
||||
ebook-convert myrecipe.recipe .epub --test -vv --debug-pipeline debug
|
||||
|
||||
The :command:`ebook-convert` will download all the webpages and save them to the directory :file:`output_dir`, creating it if necessary. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds.
|
||||
The command :command:`ebook-convert` will download all the webpages and save them to the EPUB file :file:`myrecipe.epub`. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds. In addition, ebook-convert will put the downloaded HTML into the ``debug/input`` directory, where ``debug`` is the directory you specified in the :option:`--debug-pipeline` option.
|
||||
|
||||
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
|
||||
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`debug/input/index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
|
||||
|
||||
ebook-convert myrecipe.recipe myrecipe.epub
|
||||
ebook-convert myrecipe.recipe myrecipe.mobi
|
||||
...
|
||||
|
||||
|
||||
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, add a comment to the ticket http://bugs.calibre-ebook.com/ticket/405
|
||||
|
||||
Alternatively, you could just post your recipe in the calibre forum at http://www.mobileread.com/forums/forumdisplay.php?f=166 to share it with other calibre users.
|
||||
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, post your recipe in the `calibre recipes forum <http://www.mobileread.com/forums/forumdisplay.php?f=228>`_ to share it with other calibre users.
|
||||
|
||||
|
||||
.. seealso::
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user