This commit is contained in:
Sengian 2012-05-12 12:16:42 +02:00
commit 16a9c42edc
126 changed files with 47367 additions and 36095 deletions

View File

@ -2,6 +2,7 @@
.check-cache.pickle
src/calibre/plugins
resources/images.qrc
resources/compiled_coffeescript.zip
src/calibre/ebooks/oeb/display/test/*.js
src/calibre/manual/.build/
src/calibre/manual/cli/
@ -16,7 +17,6 @@ resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml
resources/builtin_recipes.zip
resources/template-functions.json
resources/display/*.js
setup/installer/windows/calibre/build.log
src/calibre/translations/.errors
src/cssutils/.svn/

View File

@ -19,6 +19,67 @@
# new recipes:
# - title:
- version: 0.8.51
date: 2012-05-11
new features:
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
tickets: [994514]
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
tickets: [996890]
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
tickets: [994811]
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
tickets: [994838]
bug fixes:
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
tickets: [996102]
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
tickets: [997034]
- title: "Fix download of news in AZW3 format not working"
tickets: [996439]
- title: "Pocketbook driver: Update for new PB 611 firmware."
tickets: [903079]
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
tickets: [994939]
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
tickets: [994861]
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
improved recipes:
- Mainichi news
- derStandard
- Endgadget Japan
new recipes:
- title: Mainichi English
author: Hiroshi Miura
- title: The Grid TO
author: Yusuf W
- title: National Geographic (Italy)
author: faber1971
- title: Rebelion
author: Marc Busque
- version: 0.8.50
date: 2012-05-04

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from time import strftime
class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich'
publisher ='derStandard.at'
category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
for t in soup.findAll(['ul', 'li']):
t.name = 'div'
return soup
def get_cover_url(self):
highResolution = True
date = strftime("%Y/%Y%m%d")
# it is also possible for the past
#date = '2012/20120503'
urlP1 = 'http://epaper.derstandarddigital.at/'
urlP2 = 'data_ep/STAN/' + date
urlP3 = '/V.B1/'
urlP4 = 'paper.htm'
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
br = self.clone_browser(self.browser)
htmlF = br.open_novisit(urlHTML)
htmlC = htmlF.read()
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
# consists of part2 + part3 + 'pages/' + code
# 'pages/' has length 6, code has lenght 36
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
code = htmlC[index:index + 36]
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
if highResolution:
urlPic = urlPic + '_b'
urlPic = urlPic + '.png'
return urlPic

View File

@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
no_stylesheets = True
language = 'ja'
encoding = 'utf-8'
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
index = 'http://japanese.engadget.com/'
remove_javascript = True
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
itt = topstories.find('h4')
itema = itt.find('a',href=True)
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Latest Posts', newsarticles))
return feeds
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})

82
recipes/folha.recipe Normal file
View File

@ -0,0 +1,82 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.folha.uol.com.br
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Folha_de_s_paulo(BasicNewsRecipe):
title = u'Folha de São Paulo - portal'
__author__ = 'Darko Miletic'
description = 'Um Jornala a servicao do Brasil'
publisher = 'Folhapress'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'pt_BR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
keep_only_tags = [dict(attrs={'id':'articleNew'})]
feeds = [
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
curl = url.partition('/*')[2]
return curl
def print_version(self, url):
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
def get_cover_url(self):
soup = self.index_to_soup('http://www.folha.uol.com.br/')
cont = soup.find('div', attrs={'id':'newspaper'})
if cont:
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
if ai:
return ai.img['src']
return None

View File

@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
language = 'pt_BR'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'

View File

@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
language = 'pt'
language = 'pt_BR'
no_stylesheets = True
max_articles_per_feed = 40
remove_javascript = True

79
recipes/grid_to.recipe Normal file
View File

@ -0,0 +1,79 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheGridTO(BasicNewsRecipe):
#: The title to use for the ebook
title = u'The Grid TO'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
'accessible voice for Toronto.')
#: The author of this recipe
__author__ = u'Yusuf W'
#: The language that the news is in. Must be an ISO-639 code either
#: two or three characters long
language = 'en_CA'
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'newspaper'
#: Convenient flag to disable loading of stylesheets for websites
#: that have overly complex stylesheets unsuitable for conversion
#: to ebooks formats
#: If True stylesheets are not downloaded and processed
no_stylesheets = True
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
remove_tags_before = dict(name='div', id='content')
remove_tags_after = dict(name='div', id='content')
remove_tags = [
dict(name='div', attrs={'class':'right-content pull-right'}),
dict(name='div', attrs={'class':'right-content'}),
dict(name='div', attrs={'class':'ftr-line'}),
dict(name='div', attrs={'class':'pull-right'}),
dict(name='div', id='comments'),
dict(name='div', id='tags')
]
#: Keep only the specified tags and their children.
#keep_only_tags = [dict(name='div', id='content')]
cover_margins = (0, 0, '#ffffff')
INDEX = 'http://www.thegridto.com'
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX)
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
return cover_url
def parse_index(self):
# Get the latest issue
soup = self.index_to_soup(self.INDEX)
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
# Parse the index of the latest issue
self.INDEX = self.INDEX + a['href']
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in ['city', 'life', 'culture']:
section_class = 'left-content article-listing ' + section + ' pull-left'
div = soup.find(attrs={'class': section_class})
articles = []
for tag in div.findAllNext(attrs={'class':'search-block'}):
a = tag.findAll('a', href=True)[1]
title = self.tag_to_string(a)
url = a['href']
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
feeds.append((section, articles))
return feeds

BIN
recipes/icons/folha.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
top_url = 'http://www.jiji.com/'
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
remove_tags_before = dict(id="article-area")
remove_tags_after = dict(id="ad_google")
def get_cover_url(self):

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
'''
http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 5
oldest_article = 1
encoding = 'utf8'
use_embedded_content = False
no_stylesheets = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}),
dict(name='p', attrs={'class':'disclaimer clearfix'}),
dict(name='div', attrs={'id':'main'}),
dict(attrs={'id':'contA'})
]
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
dict(name='div', attrs={'class':'generalbox'}),
dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
(u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
]
def preprocess_html(self, soup):

View File

@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
index = 'http://mainichi.jp/select/'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('latest', newsarticles))
return feeds

View File

@ -0,0 +1,67 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiEnglishNews(BasicNewsRecipe):
title = u'The Mainichi'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 40
description = 'Japanese traditional newspaper Mainichi news in English'
publisher = 'Mainichi News'
category = 'news, japan'
language = 'en_JP'
index = 'http://mainichi.jp/english/english/index.html'
remove_javascript = True
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
for section in soup.findAll('section'):
newsarticles = []
section_name = 'news'
hds = section.find('div', attrs={'class':'CategoryHead clr'})
if hds:
section_item = hds.find('h1')
if section_item:
section_name = section_item.find('a').string
items = section.find('ul', attrs={'class':'MaiLink'})
for item in items.findAll('li'):
if item:
itema = item.find('a')
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append((section_name, newsarticles))
return feeds

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 100
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
publisher = 'Mainichi Daily News'
category = 'news, Japan, IT, Electronics'
language = 'ja'
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds

View File

@ -0,0 +1,59 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyScienceNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 20
description = 'Japanese traditional newspaper Mainichi Daily News - science'
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
index = 'http://mainichi.jp/select/science'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Science', newsarticles))
return feeds

View File

@ -0,0 +1,16 @@
__version__ = 'v1.0'
__date__ = '5, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'National Geographic'
__author__ = 'faber1971'
description = 'Science magazine'
language = 'it'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]

View File

@ -1,5 +1,5 @@
"""
Pocket Calibre Recipe v1.0
Pocket Calibre Recipe v1.1
"""
__license__ = 'GPL v3'
__copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul', attrs={'id':'list'})
if ritem is None:
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
continue
for item in reversed(ritem.findAll('li')):
if articlesToGrab < 1:
break
@ -94,7 +97,7 @@ class Pocket(BasicNewsRecipe):
self.readList.append(readLink)
totalfeeds.append((feedtitle, articles))
if len(self.readList) < self.minimum_articles:
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
raise Exception("Not enough articles in Pocket! Change minimum_articles or add more articles.")
return totalfeeds
def mark_as_read(self, markList):

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-28 10:42+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
"PO-Revision-Date: 2012-05-03 16:09+0000\n"
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n"
"X-Generator: Launchpad (build 15149)\n"
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15195)\n"
"Language: ca\n"
#. name for aaa
@ -9936,11 +9936,11 @@ msgstr "Ibani"
#. name for ica
msgid "Ede Ica"
msgstr ""
msgstr "Ede Ica"
#. name for ich
msgid "Etkywan"
msgstr ""
msgstr "Etkywan"
#. name for icl
msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
#. name for ida
msgid "Idakho-Isukha-Tiriki"
msgstr ""
msgstr "Idakho-Isukha-Tiriki"
#. name for idb
msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
#. name for idc
msgid "Idon"
msgstr ""
msgstr "Idon"
#. name for idd
msgid "Ede Idaca"
msgstr ""
msgstr "Ede Idaca"
#. name for ide
msgid "Idere"
msgstr ""
msgstr "Idere"
#. name for idi
msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""
#. name for ido
msgid "Ido"
msgstr ""
msgstr "ido"
#. name for idr
msgid "Indri"
msgstr ""
msgstr "Indri"
#. name for ids
msgid "Idesa"
msgstr ""
msgstr "Idesa"
#. name for idt
msgid "Idaté"
msgstr ""
msgstr "Idaté"
#. name for idu
msgid "Idoma"
msgstr ""
msgstr "Idoma"
#. name for ifa
msgid "Ifugao; Amganad"
msgstr ""
msgstr "Ifugao; Amganad"
#. name for ifb
msgid "Ifugao; Batad"
msgstr ""
msgstr "Ifugao; Batad"
#. name for ife
msgid "Ifè"
msgstr ""
msgstr "Ifè"
#. name for iff
msgid "Ifo"
msgstr ""
msgstr "Ifo"
#. name for ifk
msgid "Ifugao; Tuwali"
msgstr ""
msgstr "Ifugao; Tuwali"
#. name for ifm
msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
#. name for ifu
msgid "Ifugao; Mayoyao"
msgstr ""
msgstr "Ifugao; Mayoyao"
#. name for ify
msgid "Kallahan; Keley-I"
msgstr ""
msgstr "Kallahan; Keley-I"
#. name for igb
msgid "Ebira"
msgstr ""
msgstr "Ebira"
#. name for ige
msgid "Igede"

View File

@ -8,14 +8,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-03-25 12:19+0000\n"
"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
"PO-Revision-Date: 2012-05-03 14:49+0000\n"
"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
"Language-Team: Serbian <gnu@prevod.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
"X-Generator: Launchpad (build 15008)\n"
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15195)\n"
"Language: sr\n"
#. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""
#. name for deu
msgid "German"
msgstr "немачки"
msgstr "Немачки"
#. name for dev
msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"
#. name for glg
msgid "Galician"
msgstr ""
msgstr "Галицијски"
#. name for glh
msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""
#. name for gmh
msgid "German; Middle High (ca. 1050-1500)"
msgstr ""
msgstr "Немачки; средње високи (ca. 1050-1500)"
#. name for gml
msgid "German; Middle Low"
msgstr ""
msgstr "Немачки; средње низак"
#. name for gmm
msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""
#. name for gsg
msgid "German Sign Language"
msgstr ""
msgstr "Немачки језик"
#. name for gsl
msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""
#. name for gsw
msgid "German; Swiss"
msgstr ""
msgstr "Немачки ; Швајцарска"
#. name for gta
msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""
#. name for nds
msgid "German; Low"
msgstr ""
msgstr "Немачки; низак"
#. name for ndt
msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""
#. name for nno
msgid "Norwegian Nynorsk"
msgstr "норвешки модерни"
msgstr "Норвешки модерни"
#. name for nnp
msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""
#. name for nob
msgid "Norwegian Bokmål"
msgstr ""
msgstr "Норвешки (књижевни)"
#. name for noc
msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""
#. name for nor
msgid "Norwegian"
msgstr "норвешки"
msgstr "Норвешки"
#. name for nos
msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""
#. name for nsl
msgid "Norwegian Sign Language"
msgstr ""
msgstr "Норвешки језик"
#. name for nsm
msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""
#. name for pdc
msgid "German; Pennsylvania"
msgstr ""
msgstr "Немачки ; Пенсилванија"
#. name for pdi
msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""
#. name for rmg
msgid "Norwegian; Traveller"
msgstr ""
msgstr "Норвешки; путнички"
#. name for rmh
msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""
#. name for sgg
msgid "Swiss-German Sign Language"
msgstr ""
msgstr "Швајцарско-Немачки језик"
#. name for sgh
msgid "Shughni"

View File

@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript'
COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
COFFEE_DIRS = ('ebooks/oeb/display',)
def add_options(self, parser):
parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,47 +47,67 @@ class Coffee(Command): # {{{
except KeyboardInterrupt:
pass
def show_js(self, jsfile):
def show_js(self, raw):
from pygments.lexers import JavascriptLexer
from pygments.formatters import TerminalFormatter
from pygments import highlight
with open(jsfile, 'rb') as f:
raw = f.read()
print highlight(raw, JavascriptLexer(), TerminalFormatter())
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
for toplevel, dest in self.COFFEE_DIRS.iteritems():
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
if self.newer(js, x):
src_files = {}
for src in self.COFFEE_DIRS:
for f in glob.glob(self.j(self.SRC, __appname__, src,
'*.coffee')):
bn = os.path.basename(f).rpartition('.')[0]
arcname = src.replace('/', '.') + '.' + bn + '.js'
src_files[arcname] = (f, os.stat(f).st_mtime)
existing = {}
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(dest):
with zipfile.ZipFile(dest, 'r') as zf:
for info in zf.infolist():
mtime = time.mktime(info.date_time + (0, 0, -1))
arcname = info.filename
if (arcname in src_files and src_files[arcname][1] <
mtime):
existing[arcname] = (zf.read(info), info)
todo = set(src_files) - set(existing)
updated = {}
for arcname in todo:
name = arcname.rpartition('.')[0]
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
timestamp else '', os.path.basename(x)))
timestamp else '', name))
src = src_files[arcname][0]
try:
cs = subprocess.check_output(self.compiler +
[x]).decode('utf-8')
js = subprocess.check_output(self.compiler +
[src]).decode('utf-8')
except Exception as e:
print ('\n\tCompilation of %s failed'%os.path.basename(x))
print ('\n\tCompilation of %s failed'%name)
print (e)
if ignore_errors:
with open(js, 'wb') as f:
f.write('# Compilation from coffeescript failed')
js = u'# Compilation from coffeescript failed'
else:
raise SystemExit(1)
else:
with open(js, 'wb') as f:
f.write(cs.encode('utf-8'))
if opts.show_js:
self.show_js(js)
print ('#'*80)
print ('#'*80)
zi = zipfile.ZipInfo()
zi.filename = arcname
zi.date_time = time.localtime()[:6]
updated[arcname] = (js.encode('utf-8'), zi)
if updated:
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
for raw, zi in updated.itervalues():
zf.writestr(zi, raw)
for raw, zi in existing.itervalues():
zf.writestr(zi, raw)
def clean(self):
for toplevel, dest in self.COFFEE_DIRS.iteritems():
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
x = x.rpartition('.')[0] + '.js'
x = self.j(dest, os.path.basename(x))
x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(x):
os.remove(x)
# }}}

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 50)
numeric_version = (0, 8, 51)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -57,6 +57,7 @@ class ANDROID(USBMS):
0x4316 : [0x216],
0x42d6 : [0x216],
0x42d7 : [0x216],
0x42f7 : [0x216],
},
# Freescale
0x15a2 : {
@ -193,7 +194,7 @@ class ANDROID(USBMS):
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE']
'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,7 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
name = 'PocketBook 360 Device Interface'
gui_name = 'PocketBook 360'
VENDOR_ID = [0x1f85, 0x525]
PRODUCT_ID = [0x1688, 0xa4a5]
BCD = [0x110]
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']

View File

@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
name = 'MOBI Input'
author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw', 'azw3'])
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
def convert(self, stream, options, file_ext, log,
accelerators):

View File

@ -306,6 +306,11 @@ class MOBIHeader(object): # {{{
self.extra_data_flags = 0
if self.has_extra_data_flags:
self.unknown4 = self.raw[184:192]
if self.file_version < 8:
self.first_text_record, self.last_text_record = \
struct.unpack_from(b'>HH', self.raw, 192)
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
else:
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192)
if self.fdst_count <= 1:
@ -409,6 +414,10 @@ class MOBIHeader(object): # {{{
a('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags:
a('Unknown4: %r'%self.unknown4)
if hasattr(self, 'first_text_record'):
a('First content record: %d'%self.first_text_record)
a('Last content record: %d'%self.last_text_record)
else:
r('FDST Index', 'fdst_idx')
a('FDST Count: %d'% self.fdst_count)
r('FCIS number', 'fcis_number')

View File

@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
continue
if not isinstance(flow, unicode):
try:
flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
flow = flow.decode(mr.header.codec, 'replace')
# links to raster image files from image tags
# image_pattern

View File

@ -207,9 +207,9 @@ class Mobi8Reader(object):
fname = 'svgimg' + nstr + '.svg'
else:
# search for CDATA and if exists inline it
if flowpart.find('[CDATA[') >= 0:
if flowpart.find(b'[CDATA[') >= 0:
typ = 'css'
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
format = 'inline'
dir = None
fname = None

View File

@ -382,6 +382,7 @@ class MobiWriter(object):
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old
last_content_record = len(self.records) - 1
# FCIS/FLIS (Seems to serve no purpose)
flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
# header
header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX
header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number

View File

@ -314,9 +314,9 @@ class KF8Writer(object):
return
# Flatten the ToC into a depth first list
fl = toc.iter() if is_periodical else toc.iterdescendants()
fl = toc.iterdescendants()
for i, item in enumerate(fl):
entry = {'id': id(item), 'index': i, 'href':item.href,
entry = {'id': id(item), 'index': i, 'href':item.href or '',
'label':(item.title or _('Unknown')),
'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0)

View File

@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
unknown2 = zeroes(8)
# 192: FDST
# In MOBI 6 the fdst record is instead two two byte fields storing the
# index of the first and last content records
fdst_record = DYN
fdst_count = DYN

View File

@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
# Drill down into iframes, etc.
while true
target = cdoc.elementFromPoint x, y
if not target or target.localName == 'html'
log("No element at (#{ x }, #{ y })")
if not target or target.localName in ['html', 'body']
# We ignore both html and body even though body could
# have text nodes under it as performance is very poor if body
# has large margins/padding (for e.g. in fullscreen mode)
# A possible solution for this is to wrap all text node
# children of body in <span> but that is seriously ugly and
# might have side effects. Lets do this only if there are lots of
# books in the wild that actually have text children of body,
# and even in this case it might be better to change the input
# plugin to prevent this from happening.
# log("No element at (#{ x }, #{ y })")
return null
name = target.localName

View File

@ -0,0 +1,76 @@
#!/usr/bin/env coffee
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
###
Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License
###
body_height = () ->
db = document.body
dde = document.documentElement
if db? and dde?
return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
dde.offsetHeight, db.clientHeight, dde.clientHeight)
return 0
abstop = (elem) ->
ans = elem.offsetTop
while elem.offsetParent
elem = elem.offsetParent
ans += elem.offsetTop
return ans
class BookIndexing
###
This class is a namespace to expose indexing functions via the
window.book_indexing object. The most important functions are:
anchor_positions(): Get the absolute (document co-ordinate system) position
for elements with the specified id/name attributes.
###
constructor: () ->
this.cache = {}
this.body_height_at_last_check = null
cache_valid: (anchors) ->
for a in anchors
if not Object.prototype.hasOwnProperty.call(this.cache, a)
return false
for p of this.cache
if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
return false
return true
anchor_positions: (anchors, use_cache=false) ->
if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
return this.cache
ans = {}
for anchor in anchors
elem = document.getElementById(anchor)
if elem == null
# Look for an <a name="anchor"> element
try
result = document.evaluate(
".//*[local-name() = 'a' and @name='#{ anchor }']",
document.body, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null)
elem = result.singleNodeValue
catch error
# The anchor had a ' or other invalid char
elem = null
if elem == null
pos = body_height() + 10000
else
pos = abstop(elem)
ans[anchor] = pos
this.cache = ans
this.body_height_at_last_check = body_height()
return ans
if window?
window.book_indexing = new BookIndexing()

View File

@ -1,383 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from cStringIO import StringIO
from PyQt4.Qt import QFontDatabase
from calibre.customize.ui import available_input_formats
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import (guess_type, prints, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def character_count(html):
'''
Return the number of "significant" text characters in a HTML string.
'''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
class SpineItem(unicode):
def __new__(cls, path, mime_type=None):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
raw = open(path, 'rb').read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw)
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(object):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log
if log is None:
self.log = Log()
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def find_missing_css_files(self):
for x in os.walk(os.path.dirname(self.pathtoopf)):
for f in x[-1]:
if f.endswith('.css'):
yield os.path.join(x[0], f)
def find_declared_css_files(self):
for item in self.opf.manifest:
if item.mime_type and 'css' in item.mime_type.lower():
yield item.path
def find_embedded_fonts(self):
'''
This will become unnecessary once Qt WebKit supports the @font-face rule.
'''
css_files = set(self.find_declared_css_files())
if not css_files:
css_files = set(self.find_missing_css_files())
bad_map = {}
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
for csspath in css_files:
try:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
except:
continue
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
block = match.group(1)
family = font_family_pat.search(block)
url = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
if url:
path = url.group(1).split('/')
path = os.path.join(os.path.dirname(csspath), *path)
if not os.access(path, os.R_OK):
continue
id = QFontDatabase.addApplicationFont(path)
if id != -1:
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
if family:
family = family.group(1)
specified_families = [x.strip().replace('"',
'').replace("'", '') for x in family.split(',')]
aliasing_ok = False
for f in specified_families:
bad_map[f] = families[0]
if not aliasing_ok and f in families:
aliasing_ok = True
if not aliasing_ok:
prints('WARNING: Family aliasing not fully supported.')
prints('\tDeclared family: %r not in actual families: %r'
% (family, families))
else:
prints('Loaded embedded font:', repr(family))
if bad_map:
def prepend_embedded_font(match):
for bad, good in bad_map.items():
if bad in match.group(1):
prints('Substituting font family: %s -> %s'%(bad, good))
return match.group().replace(bad, '"%s"'%good)
from calibre.ebooks.chardet import force_encoding
for csspath in css_files:
with open(csspath, 'r+b') as f:
css = f.read()
enc = force_encoding(css, False)
css = css.decode(enc, 'replace')
ncss = font_family_pat.sub(prepend_embedded_font, css)
if ncss != css:
f.seek(0)
f.truncate()
f.write(ncss.encode(enc))
def __enter__(self, processed=False, only_input_plugin=False):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin:
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
not hasattr(self.pathtoopf, 'manifest'):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(SpineItem(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
open(cfile, 'wb').write(chtml)
self.spine[0:0] = [SpineItem(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(SpineItem(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
self.read_bookmarks()
return self
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
StringIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re
from calibre.customize.ui import available_input_formats
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
def EbookIterator(*args, **kwargs):
'For backwards compatibility'
from calibre.ebooks.oeb.iterator.book import EbookIterator
return EbookIterator(*args, **kwargs)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True, run_char_count=False,
read_anchor_map=False)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,187 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from functools import partial
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import default_log
from calibre import (guess_type, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(BookmarksMixin):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log or default_log
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def __enter__(self, processed=False, only_input_plugin=False,
run_char_count=True, read_anchor_map=True):
''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. '''
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
self.pathtoopf = plumber.input_plugin(inf,
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
# Run the HTML preprocess/parsing from the conversion pipeline as
# well
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
and not hasattr(self.pathtoopf, 'manifest')):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
run_char_count=run_char_count)
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(Spiny(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
'azw', 'azw3'}:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
with open(cfile, 'wb') as f:
f.write(chtml)
self.spine[0:0] = [Spiny(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(Spiny(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
if read_anchor_map:
create_indexing_data(self.spine, self.toc)
self.read_bookmarks()
return self
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
try:
os.remove(x)
except:
pass

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from io import BytesIO
from calibre.utils.zipfile import safe_replace
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
class BookmarksMixin(object):
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
BytesIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks

View File

@ -0,0 +1,120 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os
from functools import partial
from operator import attrgetter
from collections import namedtuple
from calibre import guess_type
from calibre.ebooks.chardet import xml_to_unicode
def character_count(html):
''' Return the number of "significant" text characters in a HTML string. '''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
def anchor_map(html):
''' Return map of all anchor names to their offsets in the html '''
ans = {}
for match in re.finditer(
r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
anchor = match.group(0)
ans[anchor] = ans.get(anchor, match.start())
return ans
class SpineItem(unicode):
def __new__(cls, path, mime_type=None, read_anchor_map=True,
run_char_count=True):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
with open(path, 'rb') as f:
raw = f.read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw) if run_char_count else 10000
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
obj.index_entries = []
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class IndexEntry(object):
def __init__(self, spine, toc_entry, num):
self.num = num
self.text = toc_entry.text or _('Unknown')
self.key = toc_entry.abspath
self.anchor = self.start_anchor = toc_entry.fragment or None
try:
self.spine_pos = spine.index(self.key)
except ValueError:
self.spine_pos = -1
self.anchor_pos = 0
if self.spine_pos > -1:
self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
0)
self.depth = 0
p = toc_entry.parent
while p is not None:
self.depth += 1
p = p.parent
self.sort_key = (self.spine_pos, self.anchor_pos)
self.spine_count = len(spine)
def find_end(self, all_entries):
potential_enders = [i for i in all_entries if
i.depth <= self.depth and
(
(i.spine_pos == self.spine_pos and i.anchor_pos >
self.anchor_pos)
or
i.spine_pos > self.spine_pos
)]
if potential_enders:
# potential_enders is sorted by (spine_pos, anchor_pos)
end = potential_enders[0]
self.end_spine_pos = end.spine_pos
self.end_anchor = end.anchor
else:
self.end_spine_pos = self.spine_count - 1
self.end_anchor = None
def create_indexing_data(spine, toc):
if not toc: return
f = partial(IndexEntry, spine)
index_entries = list(map(f,
(t for t in toc.flat() if t is not toc),
(i-1 for i, t in enumerate(toc.flat()) if t is not toc)
))
index_entries.sort(key=attrgetter('sort_key'))
[ i.find_end(index_entries) for i in index_entries ]
ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
for spine_pos, spine_item in enumerate(spine):
for i in index_entries:
if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
continue # Does not touch this file
start = i.anchor if i.spine_pos == spine_pos else None
end = i.end_anchor if i.spine_pos == spine_pos else None
spine_item.index_entries.append(ie(i, start, end))

View File

@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
# Remove any encoding-specifying <meta/> elements
for meta in META_XP(data):
meta.getparent().remove(meta)
etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type',
'content': '%s; charset=utf-8' % XHTML_NS})
meta = etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type'})
meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
# attribute
# Ensure has a <body/>
if not xpath(data, '/h:html/h:body'):
body = xpath(data, '//h:body')

View File

@ -347,6 +347,10 @@ class Stylizer(object):
style = self.flatten_style(rule.style)
self.page_rule.update(style)
elif isinstance(rule, CSSFontFaceRule):
if rule.style.length > 1:
# Ignore the meaningless font face rules generated by the
# benighted MS Word that contain only a font-family declaration
# and nothing else
self.font_face_rules.append(rule)
return results

View File

@ -137,8 +137,9 @@ def _config(): # {{{
c.add_opt('LRF_ebook_viewer_options', default=None,
help=_('Options for the LRF ebook viewer'))
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
help=_('Formats that are viewed using the internal viewer'))
'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
'SNB', 'HTMLZ'], help=_(
'Formats that are viewed using the internal viewer'))
c.add_opt('column_map', default=ALL_COLUMNS,
help=_('Columns to be displayed in the book list'))
c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))

View File

@ -357,7 +357,9 @@ class MetadataSingleDialogBase(ResizableDialog):
old_tags = self.tags.current_val
tags = mi.tags if mi.tags else []
if old_tags and merge_tags:
tags += old_tags
ltags, lotags = {t.lower() for t in tags}, {t.lower() for t in
old_tags}
tags = [t for t in tags if t.lower() in ltags-lotags] + old_tags
self.tags.current_val = tags
if not mi.is_null('identifiers'):
current = self.identifiers.current_val
@ -463,7 +465,12 @@ class MetadataSingleDialogBase(ResizableDialog):
ResizableDialog.reject(self)
def save_state(self):
try:
gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
except:
# Weird failure, see https://bugs.launchpad.net/bugs/995271
import traceback
traceback.print_exc()
# Dialog use methods {{{
def start(self, row_list, current_row, view_slot=None,

View File

@ -955,7 +955,7 @@ class FullFetch(QDialog): # {{{
# QWebView. Seems to only happen on windows, but keep it for all
# platforms just in case.
self.identify_widget.comments_view.setMaximumHeight(500)
self.resize(850, 550)
self.resize(850, 600)
self.finished.connect(self.cleanup)
@ -1034,7 +1034,7 @@ class CoverFetch(QDialog): # {{{
self.covers_widget.chosen.connect(self.accept)
l.addWidget(self.covers_widget)
self.resize(850, 550)
self.resize(850, 600)
self.finished.connect(self.cleanup)

View File

@ -11,6 +11,7 @@ import os, zipfile
import calibre
from calibre.utils.localization import lang_as_iso639_1
from calibre.utils.resources import compiled_coffeescript
class JavaScriptLoader(object):
@ -27,7 +28,7 @@ class JavaScriptLoader(object):
}.iteritems()}
CS = {
'cfi':('ebooks/oeb/display/cfi.coffee', 'display/cfi.js'),
'cfi':'ebooks.oeb.display.cfi',
}
ORDER = ('jquery', 'jquery_scrollTo', 'bookmarks', 'referencing', 'images',
@ -59,21 +60,9 @@ class JavaScriptLoader(object):
ans = P(src, data=True,
allow_user_override=False).decode('utf-8')
else:
f = getattr(calibre, '__file__', None)
if self._dynamic_coffeescript and f and os.path.exists(f):
src = src[0]
src = os.path.join(os.path.dirname(f), *(src.split('/')))
from calibre.utils.serve_coffee import compile_coffeescript
with open(src, 'rb') as f:
cs, errors = compile_coffeescript(f.read(), src)
if errors:
for line in errors:
print (line)
raise Exception('Failed to compile coffeescript'
': %s'%src)
ans = cs
else:
ans = P(src[1], data=True, allow_user_override=False)
dynamic = (self._dynamic_coffeescript and
os.path.exists(calibre.__file__))
ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8')
self._cache[name] = ans
return ans

View File

@ -1,24 +1,24 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import traceback, os, sys, functools, collections, re
import traceback, os, sys, functools, collections, textwrap
from functools import partial
from threading import Thread
from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray,
QSize, QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation,
QPainter, QBrush, QColor, QStandardItemModel, QPalette, QStandardItem,
QUrl, QRegExpValidator, QRegExp, QLineEdit, QToolButton, QMenu,
QInputDialog, QAction, QKeySequence)
from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, QByteArray, QSize,
QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation, QPainter,
QBrush, QColor, pyqtSignal, QUrl, QRegExpValidator, QRegExp, QLineEdit,
QToolButton, QMenu, QInputDialog, QAction, QKeySequence, QModelIndex)
from calibre.gui2.viewer.main_ui import Ui_EbookViewer
from calibre.gui2.viewer.printing import Printing
from calibre.gui2.viewer.bookmarkmanager import BookmarkManager
from calibre.gui2.viewer.toc import TOC
from calibre.gui2.widgets import ProgressIndicator
from calibre.gui2.main_window import MainWindow
from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
info_dialog, error_dialog, open_url, available_height
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre.gui2 import (Application, ORG_NAME, APP_UID, choose_files,
info_dialog, error_dialog, open_url, available_height)
from calibre.ebooks.oeb.iterator.book import EbookIterator
from calibre.ebooks import DRMError
from calibre.constants import islinux, isbsd, isosx, filesystem_encoding
from calibre.utils.config import Config, StringConfig, JSONConfig
@ -31,31 +31,6 @@ from calibre.ptempfile import reset_base_dir
vprefs = JSONConfig('viewer')
class TOCItem(QStandardItem):
def __init__(self, toc):
text = toc.text
if text:
text = re.sub(r'\s', ' ', text)
QStandardItem.__init__(self, text if text else '')
self.abspath = toc.abspath
self.fragment = toc.fragment
for t in toc:
self.appendRow(TOCItem(t))
self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
@classmethod
def type(cls):
return QStandardItem.UserType+10
class TOC(QStandardItemModel):
def __init__(self, toc):
QStandardItemModel.__init__(self)
for t in toc:
self.appendRow(TOCItem(t))
self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
class Worker(Thread):
def run(self):
@ -142,31 +117,22 @@ class DoubleSpinBox(QDoubleSpinBox):
' [{0:.0%}]'.format(float(val)/self.maximum()))
self.blockSignals(False)
class HelpfulLineEdit(QLineEdit):
class Reference(QLineEdit):
HELP_TEXT = _('Go to...')
goto = pyqtSignal(object)
def __init__(self, *args):
QLineEdit.__init__(self, *args)
self.default_palette = QApplication.palette(self)
self.gray = QPalette(self.default_palette)
self.gray.setBrush(QPalette.Text, QBrush(QColor('gray')))
self.connect(self, SIGNAL('editingFinished()'),
lambda : self.emit(SIGNAL('goto(PyQt_PyObject)'), unicode(self.text())))
self.clear_to_help_mode()
self.setValidator(QRegExpValidator(QRegExp(r'\d+\.\d+'), self))
self.setToolTip(textwrap.fill('<p>'+_(
'Go to a reference. To get reference numbers, use the <i>reference '
'mode</i>, by clicking the reference mode button in the toolbar.')))
if hasattr(self, 'setPlaceholderText'):
self.setPlaceholderText(_('Go to...'))
self.editingFinished.connect(self.editing_finished)
def focusInEvent(self, ev):
self.setPalette(QApplication.palette(self))
if self.in_help_mode():
self.setText('')
return QLineEdit.focusInEvent(self, ev)
def in_help_mode(self):
return unicode(self.text()) == self.HELP_TEXT
def clear_to_help_mode(self):
self.setPalette(self.gray)
self.setText(self.HELP_TEXT)
def editing_finished(self):
self.goto.emit(unicode(self.text()))
class RecentAction(QAction):
@ -207,9 +173,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.pos.setMinimum(1.)
self.pos.setMinimumWidth(150)
self.tool_bar2.insertWidget(self.action_find_next, self.pos)
self.reference = HelpfulLineEdit()
self.reference.setValidator(QRegExpValidator(QRegExp(r'\d+\.\d+'), self.reference))
self.reference.setToolTip(_('Go to a reference. To get reference numbers, use the reference mode.'))
self.reference = Reference()
self.tool_bar2.insertSeparator(self.action_find_next)
self.tool_bar2.insertWidget(self.action_find_next, self.reference)
self.tool_bar2.insertSeparator(self.action_find_next)
@ -233,8 +197,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
if isosx:
qs += [Qt.CTRL+Qt.Key_W]
self.action_quit.setShortcuts(qs)
self.connect(self.action_quit, SIGNAL('triggered(bool)'),
lambda x:QApplication.instance().quit())
self.action_quit.triggered.connect(self.quit)
self.action_focus_search = QAction(self)
self.addAction(self.action_focus_search)
self.action_focus_search.setShortcuts([Qt.Key_Slash,
@ -247,42 +210,34 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.action_table_of_contents.setCheckable(True)
self.toc.setMinimumWidth(80)
self.action_reference_mode.setCheckable(True)
self.connect(self.action_reference_mode, SIGNAL('triggered(bool)'),
lambda x: self.view.reference_mode(x))
self.connect(self.action_metadata, SIGNAL('triggered(bool)'), lambda x:self.metadata.setVisible(x))
self.action_reference_mode.triggered[bool].connect(self.view.reference_mode)
self.action_metadata.triggered[bool].connect(self.metadata.setVisible)
self.action_table_of_contents.toggled[bool].connect(self.set_toc_visible)
self.connect(self.action_copy, SIGNAL('triggered(bool)'), self.copy)
self.action_copy.triggered[bool].connect(self.copy)
self.action_font_size_larger.triggered.connect(self.font_size_larger)
self.action_font_size_smaller.triggered.connect(self.font_size_smaller)
self.connect(self.action_open_ebook, SIGNAL('triggered(bool)'),
self.open_ebook)
self.connect(self.action_next_page, SIGNAL('triggered(bool)'),
lambda x:self.view.next_page())
self.connect(self.action_previous_page, SIGNAL('triggered(bool)'),
lambda x:self.view.previous_page())
self.connect(self.action_find_next, SIGNAL('triggered(bool)'),
lambda x:self.find(unicode(self.search.text()), repeat=True))
self.connect(self.action_find_previous, SIGNAL('triggered(bool)'),
lambda x:self.find(unicode(self.search.text()),
repeat=True, backwards=True))
self.connect(self.action_full_screen, SIGNAL('triggered(bool)'),
self.toggle_fullscreen)
self.action_open_ebook.triggered[bool].connect(self.open_ebook)
self.action_next_page.triggered.connect(self.view.next_page)
self.action_previous_page.triggered.connect(self.view.previous_page)
self.action_find_next.triggered.connect(self.find_next)
self.action_find_previous.triggered.connect(self.find_previous)
self.action_full_screen.triggered[bool].connect(self.toggle_fullscreen)
self.action_full_screen.setShortcuts([Qt.Key_F11, Qt.CTRL+Qt.SHIFT+Qt.Key_F])
self.action_full_screen.setToolTip(_('Toggle full screen (%s)') %
_(' or ').join([unicode(x.toString(x.NativeText)) for x in
self.action_full_screen.shortcuts()]))
self.connect(self.action_back, SIGNAL('triggered(bool)'), self.back)
self.connect(self.action_bookmark, SIGNAL('triggered(bool)'), self.bookmark)
self.connect(self.action_forward, SIGNAL('triggered(bool)'), self.forward)
self.connect(self.action_preferences, SIGNAL('triggered(bool)'), lambda x: self.view.config(self))
self.action_back.triggered[bool].connect(self.back)
self.action_forward.triggered[bool].connect(self.forward)
self.action_bookmark.triggered[bool].connect(self.bookmark)
self.action_preferences.triggered.connect(lambda :
self.view.config(self))
self.pos.editingFinished.connect(self.goto_page_num)
self.connect(self.vertical_scrollbar, SIGNAL('valueChanged(int)'),
lambda x: self.goto_page(x/100.))
self.vertical_scrollbar.valueChanged[int].connect(lambda
x:self.goto_page(x/100.))
self.search.search.connect(self.find)
self.search.focus_to_library.connect(lambda: self.view.setFocus(Qt.OtherFocusReason))
self.connect(self.toc, SIGNAL('clicked(QModelIndex)'), self.toc_clicked)
self.connect(self.reference, SIGNAL('goto(PyQt_PyObject)'), self.goto)
self.toc.clicked[QModelIndex].connect(self.toc_clicked)
self.reference.goto.connect(self.goto)
self.bookmarks_menu = QMenu()
self.action_bookmark.setMenu(self.bookmarks_menu)
@ -335,8 +290,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.print_menu.addAction(QIcon(I('print-preview.png')), _('Print Preview'))
self.action_print.setMenu(self.print_menu)
self.tool_bar.widgetForAction(self.action_print).setPopupMode(QToolButton.MenuButtonPopup)
self.connect(self.action_print, SIGNAL("triggered(bool)"), partial(self.print_book, preview=False))
self.connect(self.print_menu.actions()[0], SIGNAL("triggered(bool)"), partial(self.print_book, preview=True))
self.action_print.triggered.connect(self.print_book)
self.print_menu.actions()[0].triggered.connect(self.print_preview)
ca = self.view.copy_action
ca.setShortcut(QKeySequence.Copy)
self.addAction(ca)
@ -381,13 +336,22 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
m.addAction(RecentAction(path, m))
count += 1
def closeEvent(self, e):
def shutdown(self):
if self.isFullScreen():
self.action_full_screen.trigger()
e.ignore()
return
return False
self.save_state()
return True
def quit(self):
if self.shutdown():
QApplication.instance().quit()
def closeEvent(self, e):
if self.shutdown():
return MainWindow.closeEvent(self, e)
else:
e.ignore()
def toggle_toolbars(self):
for x in ('tool_bar', 'tool_bar2'):
@ -440,8 +404,11 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
c = config().parse()
return c.remember_current_page
def print_book(self, preview):
Printing(self.iterator.spine, preview)
def print_book(self):
Printing(self.iterator.spine, False)
def print_preview(self):
Printing(self.iterator.spine, True)
def toggle_fullscreen(self, x):
if self.isFullScreen():
@ -629,6 +596,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.pending_search_dir = 'backwards' if backwards else 'forwards'
self.load_path(self.iterator.spine[index])
def find_next(self):
self.find(unicode(self.search.text()), repeat=True)
def find_previous(self):
self.find(unicode(self.search.text()), repeat=True, backwards=True)
def do_search(self, text, backwards):
self.pending_search = None
self.pending_search_dir = None
@ -829,11 +802,13 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
if not title:
title = os.path.splitext(os.path.basename(pathtoebook))[0]
if self.iterator.toc:
self.toc_model = TOC(self.iterator.toc)
self.toc_model = TOC(self.iterator.spine, self.iterator.toc)
self.toc.setModel(self.toc_model)
if self.show_toc_on_open:
self.action_table_of_contents.setChecked(True)
else:
self.toc_model = TOC(self.iterator.spine)
self.toc.setModel(self.toc_model)
self.action_table_of_contents.setChecked(False)
if isbytestring(pathtoebook):
pathtoebook = force_unicode(pathtoebook, filesystem_encoding)

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from PyQt4.Qt import QStandardItem, QStandardItemModel, Qt
from calibre.ebooks.metadata.toc import TOC as MTOC
class TOCItem(QStandardItem):
def __init__(self, toc):
text = toc.text
if text:
text = re.sub(r'\s', ' ', text)
QStandardItem.__init__(self, text if text else '')
self.abspath = toc.abspath
self.fragment = toc.fragment
for t in toc:
self.appendRow(TOCItem(t))
self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
@classmethod
def type(cls):
return QStandardItem.UserType+10
class TOC(QStandardItemModel):
def __init__(self, spine, toc=None):
QStandardItemModel.__init__(self)
if toc is None:
toc = MTOC()
for t in toc:
self.appendRow(TOCItem(t))
self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More