Merge branch 'kovidgoyal/master'

This commit is contained in:
Charles Haley 2013-07-04 08:22:08 +02:00
commit cfa952b1c1
56 changed files with 40776 additions and 480 deletions

View File

@ -20,6 +20,50 @@
# new recipes:
# - title:
- version: 0.9.37
date: 2013-06-28
new features:
- title: "Conversion: Add option to embed all referenced fonts"
type: major
description: "Add an option to embed all fonts that are referenced in the input document but are not already embedded. This will search your system for the referenced font, and if found, the font will be embedded. Only works if the output format supports font embedding (for example: EPUB or AZW3). The option is under the Look & Feel section of the conversion dialog."
- title: "ToC Editor: When generating a ToC from files, if the file has no text, do not skip it. Instead create an entry using the filename of the file."
- title: "AZW3 Input: Add support for the page-progression-direction that is used to indicate page turns should happen from right to left. The attribute is passed into EPUB when converting."
tickets: [1194766]
- title: "ebook-convert: Add a --from-opf option to read metadata from OPF files directly, instead of having to run ebook-meta --from-opf after conversion"
bug fixes:
- title: "PDF Output: Fix Table of Contents being added to the end of the PDF even without the Add Table of Contents option being enabled."
tickets: [1194836]
- title: "When auto-merging books on add, also merge identifiers."
- title: "Fix an error when using the Template Editor to create a template that uses custom columns."
tickets: [1193763]
- title: "LRF Output: Fix " entities in attribute values causing problems"
- title: "News download: Apply the default page margin conversion settings. Also, when converting to PDF, apply the pdf conversion defaults."
tickets: [1193912]
- title: "Fix a regression that broke scanning for books on all devices that used the Aluratek Color driver."
tickets: [1192940]
- title: "fetch-ebbok-metadata: Fix --opf argument erroneously requiring a value"
- title: "When waiting before sending email, log the wait."
tickets: [1195173]
improved recipes:
- taz.de (RSS)
- Miradas al sur
- Frontline
- La Nacion (Costa Rica)
- version: 0.9.36
date: 2013-06-21

View File

@ -0,0 +1,45 @@
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class DemocracyNowRecipe(BasicNewsRecipe):
title = u'Democracy now!'
__author__ = u'Antoine Beaupré'
description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.' # noqa
language = 'en'
cover_url = 'http://www.democracynow.org/images/dn-logo-for-podcast.png'
oldest_article = 1
max_articles_per_feed = 10
publication_type = 'magazine'
auto_cleanup = False
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
feeds = [
(u'Daily news', u'http://www.democracynow.org/democracynow.rss')]
keep_only_tags = [dict(name='div', attrs={'id': 'page'}), ]
remove_tags = [dict(name='div', attrs={'id': 'topics_list'}),
dict(name='div', attrs={'id': 'header'}),
dict(name='div', attrs={'id': 'footer'}),
dict(name='div', attrs={'id': 'right'}),
dict(name='div', attrs={'id': 'left-panel'}),
dict(name='div', attrs={'id': 'top-video-content'}),
dict(name='div', attrs={'id': 'google-news-date'}),
dict(name='div', attrs={'id': 'story-donate'}),
dict(
name='div', attrs={'id': 'transcript-expand-collapse'}),
dict(name='span', attrs={'class': 'show-links'}),
dict(name='span', attrs={'class': 'storyNav'}),
dict(name='div', attrs={'class': 'headline_share'}),
dict(name='div', attrs={'class': 'mediaBar'}),
dict(name='div', attrs={'class': 'shareAndPrinterBar'}),
dict(name='div', attrs={'class': 'utility-navigation'}),
dict(name='div', attrs={'class': 'bottomContentNav'}),
dict(name='div', attrs={'class': 'recentShows'}),
dict(
name='div', attrs={'class': 'printer-and-transcript-links'}),
]

View File

@ -47,13 +47,7 @@ class GN(BasicNewsRecipe):
return feeds
def find_articles(self, main_block):
for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href']
}
for a in main_block.findAll('div', attrs={'class':'sr-document'}):
for a in main_block.findAll('div', attrs={'class':['prev_doc2', 'sr-document']}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),

View File

@ -1,5 +1,4 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2011-2013, Darko Miletic <darko.miletic at gmail.com>'
'''
www.iprofesional.com
'''
@ -19,13 +18,15 @@ class iProfesional(BasicNewsRecipe):
use_embedded_content = False
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'nesportal'
masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png'
publication_type = 'newsportal'
masthead_url = 'http://www.iprofesional.com/img/header/logoiprofesional.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
body{font-family: 'Droid Sans',Arial,sans-serif }
img{margin-bottom: 0.4em; display:block}
.titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
.autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
.titulo{font-family: WhitneyBoldWhitneyBold,Arial,Helvetica,sans-serif; color: blue}
.fecha-archivo{font-weight: bold; color: rgb(205, 150, 24)}
.description{font-weight: bold; color: gray }
.firma{font-size: small}
"""
conversion_options = {
@ -35,14 +36,8 @@ class iProfesional(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
remove_tags = [
dict(name=['meta','link','base','embed','object','iframe'])
,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
]
remove_attributes=['lang','xmlns:og','xmlns:fb']
keep_only_tags = [dict(attrs={'class':'desarrollo'})]
remove_tags = [dict(name=['meta','link','base','embed','object','iframe'])]
feeds = [
(u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
@ -74,6 +69,7 @@ class iProfesional(BasicNewsRecipe):
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
if 'alt' not in item:
item['alt'] = 'image'
return soup

View File

@ -4,9 +4,7 @@ sur.infonews.com
'''
import datetime
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class MiradasAlSur(BasicNewsRecipe):
title = 'Miradas al Sur'
@ -61,7 +59,7 @@ class MiradasAlSur(BasicNewsRecipe):
todayweekday = cdate.isoweekday()
if (todayweekday != 7):
cdate -= datetime.timedelta(days=todayweekday)
cover_page_url = cdate.strftime('http://sur.infonews.com/ediciones/%Y-%m-%d/tapa');
cover_page_url = cdate.strftime('http://sur.infonews.com/ediciones/%Y-%m-%d/tapa')
soup = self.index_to_soup(cover_page_url)
cover_item = soup.find('img', attrs={'class':lambda x: x and 'imagecache-tapa_edicion_full' in x.split()})
if cover_item:

View File

@ -1,23 +1,43 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Alexander Schremmer <alex@alexanderweb.de>'
__copyright__ = '2013, Alexander Schremmer <alex@alexanderweb.de>, Robert Riemann <robert@riemann.cc>'
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TazRSSRecipe(BasicNewsRecipe):
title = u'Taz.de (die tageszeitung) RSS Feed - German'
__author__ = 'Alexander Schremmer'
title = u'Taz - die Tageszeitung'
description = u'Taz.de - die tageszeitung'
__author__ = 'Alexander Schremmer, Robert Riemann'
language = 'de'
lang = 'de-DE'
oldest_article = 7
max_articles_per_feed = 100
publisher = 'taz Entwicklungs GmbH & Co. Medien KG'
# masthead_url = u'http://galeria-autonomica.de/wp-content/uploads/a_taz-logo.gif'
masthead_url = u'http://upload.wikimedia.org/wikipedia/de/thumb/1/15/Die-Tageszeitung-Logo.svg/500px-Die-Tageszeitung-Logo.svg.png'
conversion_options = {'publisher': publisher,
'language': lang,
}
feeds = [(u'TAZ main feed', u'http://www.taz.de/rss.xml')]
feeds = [
(u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'),
(u'Politik', u'http://www.taz.de/Politik/!p2;rss/'),
(u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'),
(u'Netz', u'http://www.taz.de/Netz/!p5;rss/'),
(u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'),
(u'Leben', u'http://www.taz.de/Leben/!p10;rss/'),
(u'Sport', u'http://www.taz.de/Sport/!p12;rss/'),
(u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'),
(u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'),
(u'Nord', u'http://www.taz.de/Nord/!p11;rss/')
]
keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})]
remove_tags = [
dict(name=['div'], attrs={'class': 'artikelwerbung'}),
dict(name=['ul'], attrs={'class': 'toolbar'}),]
dict(name=['ul'], attrs={'class': 'toolbar'}),
# remove: taz paywall
dict(name=['div'], attrs={'id': 'tzi_paywall'}),
# remove: Artikel zum Thema (not working on Kindle)
dict(name=['div'], attrs={'class': re.compile(r".*\bsect_seealso\b.*")}),
dict(name=['div'], attrs={'class': 'sectfoot'})
]

Binary file not shown.

View File

@ -0,0 +1,51 @@
Provisioning a file hosting server
====================================
Create the ssh authorized keys file.
Edit /etc/ssh/sshd_config and change PermitRootLogin to without-password.
Restart sshd.
hostname whatever
Edit /etc/hosts and put in FQDN in the appropriate places, for example::
27.0.1.1 download.calibre-ebook.com download
46.28.49.116 download.calibre-ebook.com download
dpkg-reconfigure tzdata
set timezone to Asia/Kolkata
service cron restart
apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools mosh
chsh -s /bin/zsh
mkdir -p /root/staging /root/work/vim /srv/download /srv/manual
scp .zshrc .vimrc server:
scp -r ~/work/vim/zsh-syntax-highlighting server:work/vim
If the server has a backup hard-disk, mount it at /mnt/backup and edit /etc/fstab so that it is auto-mounted.
Then, add the following to crontab::
@daily /usr/bin/rsync -ha /srv /mnt/backup
@daily /usr/bin/rsync -ha /etc /mnt/backup
Nginx
------
Copy over /etc/nginx/sites-available/default from another file server. When
copying, remember to use cat instead of cp to preserve hardlinks (the file is a
hardlink to /etc/nginx/sites-enabled/default)
rsync /srv from another file server
service nginx start
Services
---------
SSH into sourceforge and downloadbestsoftware so that their host keys are
stored.
ssh -oStrictHostKeyChecking=no kovid@www.downloadbestsoft-mirror1.com
ssh -oStrictHostKeyChecking=no kovidgoyal,calibre@frs.sourceforge.net
ssh -oStrictHostKeyChecking=no files.calibre-ebook.com (and whatever other mirrors are present)

View File

@ -7,16 +7,14 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib
import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib, shutil
from argparse import ArgumentParser, FileType
from subprocess import check_call
from tempfile import NamedTemporaryFile
from collections import OrderedDict
import mechanize
from lxml import html
def login_to_google(username, password): # {{{
import mechanize
br = mechanize.Browser()
br.addheaders = [('User-agent',
'Mozilla/5.0 (X11; Linux x86_64; rv:9.0) Gecko/20100101 Firefox/9.0')]
@ -246,6 +244,7 @@ class GoogleCode(Base): # {{{
return login_to_google(self.username, self.gmail_password)
def get_files_hosted_by_google_code(self):
from lxml import html
self.info('Getting existing files in google code:', self.gc_project)
raw = urllib2.urlopen(self.files_list).read()
root = html.fromstring(raw)
@ -380,11 +379,115 @@ class SourceForge(Base): # {{{
# }}}
def generate_index(): # {{{
os.chdir('/srv/download')
releases = set()
for x in os.listdir('.'):
if os.path.isdir(x) and '.' in x:
releases.add(tuple((int(y) for y in x.split('.'))))
rmap = OrderedDict()
for rnum in sorted(releases, reverse=True):
series = rnum[:2] if rnum[0] == 0 else rnum[:1]
if series not in rmap:
rmap[series] = []
rmap[series].append(rnum)
template = '''<!DOCTYPE html>\n<html lang="en"> <head> <meta charset="utf-8"> <title>{title}</title> <style type="text/css"> {style} </style> </head> <body> <h1>{title}</h1> <p>{msg}</p> {body} </body> </html> ''' # noqa
style = '''
body { font-family: sans-serif; background-color: #eee; }
a { text-decoration: none; }
a:visited { color: blue }
a:hover { color: red }
ul { list-style-type: none }
li { padding-bottom: 1ex }
dd li { text-indent: 0; margin: 0 }
dd ul { padding: 0; margin: 0 }
dt { font-weight: bold }
dd { margin-bottom: 2ex }
'''
body = []
for series in rmap:
body.append('<li><a href="{0}.html" title="Releases in the {0}.x series">{0}.x</a>\xa0\xa0\xa0<span style="font-size:smaller">[{1} releases]</span></li>'.format( # noqa
'.'.join(map(type(''), series)), len(rmap[series])))
body = '<ul>{0}</ul>'.format(' '.join(body))
index = template.format(title='Previous calibre releases', style=style, msg='Choose a series of calibre releases', body=body)
with open('index.html', 'wb') as f:
f.write(index.encode('utf-8'))
for series, releases in rmap.iteritems():
sname = '.'.join(map(type(''), series))
body = [
'<li><a href="{0}/" title="Release {0}">{0}</a></li>'.format('.'.join(map(type(''), r)))
for r in releases]
body = '<ul class="release-list">{0}</ul>'.format(' '.join(body))
index = template.format(title='Previous calibre releases (%s.x)' % sname, style=style,
msg='Choose a calibre release', body=body)
with open('%s.html' % sname, 'wb') as f:
f.write(index.encode('utf-8'))
for r in releases:
rname = '.'.join(map(type(''), r))
os.chdir(rname)
try:
body = []
files = os.listdir('.')
windows = [x for x in files if x.endswith('.msi')]
if windows:
windows = ['<li><a href="{0}" title="{1}">{1}</a></li>'.format(
x, 'Windows 64-bit Installer' if '64bit' in x else 'Windows 32-bit Installer')
for x in windows]
body.append('<dt>Windows</dt><dd><ul>{0}</ul></dd>'.format(' '.join(windows)))
portable = [x for x in files if '-portable-' in x]
if portable:
body.append('<dt>Calibre Portable</dt><dd><a href="{0}" title="{1}">{1}</a></dd>'.format(
portable[0], 'Calibre Portable Installer'))
osx = [x for x in files if x.endswith('.dmg')]
if osx:
body.append('<dt>Apple Mac</dt><dd><a href="{0}" title="{1}">{1}</a></dd>'.format(
osx[0], 'OS X Disk Image (.dmg)'))
linux = [x for x in files if x.endswith('.bz2')]
if linux:
linux = ['<li><a href="{0}" title="{1}">{1}</a></li>'.format(
x, 'Linux 64-bit binary' if 'x86_64' in x else 'Linux 32-bit binary')
for x in linux]
body.append('<dt>Linux</dt><dd><ul>{0}</ul></dd>'.format(' '.join(linux)))
source = [x for x in files if x.endswith('.xz') or x.endswith('.gz')]
if source:
body.append('<dt>Source Code</dt><dd><a href="{0}" title="{1}">{1}</a></dd>'.format(
source[0], 'Source code (all platforms)'))
body = '<dl>{0}</dl>'.format(''.join(body))
index = template.format(title='calibre release (%s)' % rname, style=style,
msg='', body=body)
with open('index.html', 'wb') as f:
f.write(index.encode('utf-8'))
finally:
os.chdir('..')
# }}}
def upload_to_servers(files, version): # {{{
for server, rdir in {'files':'/usr/share/nginx/html'}.iteritems():
base = '/srv/download/'
dest = os.path.join(base, version)
if not os.path.exists(dest):
os.mkdir(dest)
for src in files:
shutil.copyfile(src, os.path.join(dest, os.path.basename(src)))
cwd = os.getcwd()
try:
generate_index()
finally:
os.chdir(cwd)
for server, rdir in {'files':'/srv/download/'}.iteritems():
print('Uploading to server:', server)
server = '%s.calibre-ebook.com' % server
rdir = '%s/%s/' % (rdir, version)
# Copy the generated index files
print ('Copying generated index')
check_call(['rsync', '-hza', '-e', 'ssh -x', '--include', '*.html',
'--filter', '-! */', base, 'root@%s:%s' % (server, rdir)])
# Copy the release files
rdir = '%s%s/' % (rdir, version)
for x in files:
start = time.time()
print ('Uploading', x)
@ -400,6 +503,7 @@ def upload_to_servers(files, version): # {{{
else:
break
print ('Uploaded in', int(time.time() - start), 'seconds\n\n')
# }}}
def upload_to_dbs(files, version): # {{{
@ -530,3 +634,4 @@ if __name__ == '__main__':
# }}}

39178
setup/iso_639_3.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -113,6 +113,6 @@ class TagRelease(Command):
def run(self, opts):
self.info('Tagging release')
subprocess.check_call('git tag -a {0} -m "version-{0}"'.format(__version__).split())
subprocess.check_call('git push origin {0}'.format(__version__).split())
subprocess.check_call('git tag -a v{0} -m "version-{0}"'.format(__version__).split())
subprocess.check_call('git push origin v{0}'.format(__version__).split())

View File

@ -21,6 +21,11 @@ def qt_sources():
class POT(Command): # {{{
description = 'Update the .pot translation template and upload it'
LP_BASE = os.path.join(os.path.dirname(Command.SRC))
if not os.path.exists(os.path.join(LP_BASE, 'setup', 'iso_639')):
# We are in a git checkout, translations are assumed to be in a
# directory called calibre-translations at the same level as the
# calibre directory.
LP_BASE = os.path.join(os.path.dirname(os.path.dirname(Command.SRC)), 'calibre-translations')
LP_SRC = os.path.join(LP_BASE, 'src')
LP_PATH = os.path.join(LP_SRC, os.path.join(__appname__, 'translations'))
@ -317,21 +322,24 @@ class GetTranslations(Translations): # {{{
class ISO639(Command): # {{{
description = 'Compile translations for ISO 639 codes'
description = 'Compile language code maps for performance'
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
'iso639.pickle')
def run(self, opts):
src = POT.LP_ISO_PATH
src = self.j(self.d(self.SRC), 'setup', 'iso_639_3.xml')
if not os.path.exists(src):
raise Exception(src + ' does not exist')
dest = self.DEST
base = self.d(dest)
if not os.path.exists(base):
os.makedirs(base)
if not self.newer(dest, [src, __file__]):
self.info('Pickled code is up to date')
return
self.info('Pickling ISO-639 codes to', dest)
from lxml import etree
root = etree.fromstring(open(self.j(src, 'iso_639_3.xml'), 'rb').read())
root = etree.fromstring(open(src, 'rb').read())
by_2 = {}
by_3b = {}
by_3t = {}
@ -345,7 +353,7 @@ class ISO639(Command): # {{{
threet = x.get('id')
threeb = x.get('part2_code', None)
if threeb is None:
# Only recognize langauges in ISO-639-2
# Only recognize languages in ISO-639-2
continue
name = x.get('name')

View File

@ -19,10 +19,9 @@ from setup import Command, __version__, installer_name, __appname__
PREFIX = "/var/www/calibre-ebook.com"
DOWNLOADS = PREFIX+"/htdocs/downloads"
BETAS = DOWNLOADS +'/betas'
USER_MANUAL = '/var/www/localhost/htdocs/'
HTML2LRF = "calibre/ebooks/lrf/html/demo"
TXT2LRF = "src/calibre/ebooks/lrf/txt/demo"
STAGING_HOST = '67.207.135.179'
STAGING_HOST = 'download.calibre-ebook.com'
STAGING_USER = 'root'
STAGING_DIR = '/root/staging'
@ -135,19 +134,26 @@ class UploadInstallers(Command): # {{{
available = set(glob.glob('dist/*'))
files = {x:installer_description(x) for x in
all_possible.intersection(available)}
sizes = {os.path.basename(x):os.path.getsize(x) for x in files}
self.record_sizes(sizes)
tdir = mkdtemp()
backup = os.path.join('/mnt/external/calibre/%s' % __version__)
if not os.path.exists(backup):
os.mkdir(backup)
try:
self.upload_to_staging(tdir, backup, files)
self.upload_to_sourceforge()
self.upload_to_calibre()
self.upload_to_sourceforge()
self.upload_to_dbs()
# self.upload_to_google(opts.replace)
finally:
shutil.rmtree(tdir, ignore_errors=True)
def record_sizes(self, sizes):
print ('\nRecording dist sizes')
args = ['%s:%s:%s' % (__version__, fname, size) for fname, size in sizes.iteritems()]
check_call(['ssh', 'divok', 'dist_sizes'] + args)
def upload_to_staging(self, tdir, backup, files):
os.mkdir(tdir+'/dist')
hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)),
@ -155,9 +161,9 @@ class UploadInstallers(Command): # {{{
shutil.copyfile(hosting, os.path.join(tdir, 'hosting.py'))
for f in files:
for x in (tdir, backup):
dest = os.path.join(x, f)
shutil.copyfile(f, dest)
for x in (tdir+'/dist', backup):
dest = os.path.join(x, os.path.basename(f))
shutil.copy2(f, x)
os.chmod(dest, stat.S_IREAD|stat.S_IWRITE|stat.S_IRGRP|stat.S_IROTH)
with open(os.path.join(tdir, 'fmap'), 'wb') as fo:
@ -219,9 +225,9 @@ class UploadUserManual(Command): # {{{
for x in glob.glob(self.j(path, '*')):
self.build_plugin_example(x)
for host in ('download', 'files'):
check_call(' '.join(['rsync', '-z', '-r', '--progress',
'manual/.build/html/',
'bugs:%s'%USER_MANUAL]), shell=True)
'manual/.build/html/', '%s:/srv/manual/' % host]), shell=True)
# }}}
class UploadDemo(Command): # {{{
@ -249,8 +255,6 @@ class UploadToServer(Command): # {{{
description = 'Upload miscellaneous data to calibre server'
def run(self, opts):
check_call('ssh divok rm -f %s/calibre-\*.tar.xz'%DOWNLOADS, shell=True)
# check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True)
check_call('gpg --armor --detach-sign dist/calibre-*.tar.xz',
shell=True)
check_call('scp dist/calibre-*.tar.xz.asc divok:%s/signatures/'%DOWNLOADS,

View File

@ -310,9 +310,9 @@ def get_parsed_proxy(typ='http', debug=True):
proxy = proxies.get(typ, None)
if proxy:
pattern = re.compile((
'(?:ptype://)?' \
'(?:(?P<user>\w+):(?P<pass>.*)@)?' \
'(?P<host>[\w\-\.]+)' \
'(?:ptype://)?'
'(?:(?P<user>\w+):(?P<pass>.*)@)?'
'(?P<host>[\w\-\.]+)'
'(?::(?P<port>\d+))?').replace('ptype', typ)
)
@ -670,8 +670,8 @@ def human_readable(size, sep=' '):
""" Convert a size in bytes into a human readable form """
divisor, suffix = 1, "B"
for i, candidate in enumerate(('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')):
if size < 1024**(i+1):
divisor, suffix = 1024**(i), candidate
if size < (1 << ((i + 1) * 10)):
divisor, suffix = (1 << (i * 10)), candidate
break
size = str(float(size)/divisor)
if size.find(".") > -1:

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 36)
numeric_version = (0, 9, 37)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -77,7 +77,7 @@ class OptionRecommendation(object):
self.option.choices:
raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
if not (isinstance(self.recommended_value, (int, float, str, unicode))
or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name +
repr(self.recommended_value) +
@ -139,8 +139,10 @@ class InputFormatPlugin(Plugin):
file_types = set([])
#: If True, this input plugin generates a collection of images,
#: one per HTML file. You can obtain access to the images via
#: convenience method, :meth:`get_image_collection`.
#: one per HTML file. This can be set dynamically, in the convert method
#: if the input files can be both image collections and non-image collections.
#: If you set this to True, you must implement the get_images() method that returns
#: a list of images.
is_image_collection = False
#: Number of CPU cores used by this plugin
@ -238,7 +240,6 @@ class InputFormatPlugin(Plugin):
ret = self.convert(stream, options, file_ext,
log, accelerators)
return ret
def postprocess_book(self, oeb, opts, log):
@ -313,7 +314,6 @@ class OutputFormatPlugin(Plugin):
Plugin.__init__(self, *args)
self.report_progress = DummyReporter()
def convert(self, oeb_book, output, input_plugin, opts, log):
'''
Render the contents of `oeb_book` (which is an instance of
@ -363,3 +363,4 @@ class OutputFormatPlugin(Plugin):

View File

@ -96,7 +96,7 @@ class ANDROID(USBMS):
# Google
0x18d1 : {
0x0001 : [0x0223, 0x230, 0x9999],
0x0001 : [0x0222, 0x0223, 0x230, 0x9999],
0x0002 : [0x9999],
0x0003 : [0x0230, 0x9999],
0x4e11 : [0x0100, 0x226, 0x227],
@ -219,7 +219,7 @@ class ANDROID(USBMS):
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0',
'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12',
'MEDIATEK', 'KEENHI', 'TECLAST', 'SURFTAB', 'XENTA',]
'MEDIATEK', 'KEENHI', 'TECLAST', 'SURFTAB', 'XENTA', 'OBREEY_S']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'A953', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -241,7 +241,7 @@ class ANDROID(USBMS):
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB',
'PROD_TAB13-201',
'PROD_TAB13-201', 'URFPAD2',
]
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
@ -254,7 +254,7 @@ class ANDROID(USBMS):
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894',
'_USB', 'PROD_TAB13-201',
'_USB', 'PROD_TAB13-201', 'URFPAD2'
]
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -418,6 +418,14 @@ class libiMobileDevice():
if False:
self._idevice_set_debug_level(DEBUG)
def mkdir(self, path):
'''
Mimic mkdir(), creating a directory at path. Does not create
intermediate folders
'''
self._log_location("'%s'" % path)
return self._afc_make_directory(path)
def mount_ios_app(self, app_name=None, app_id=None):
'''
Convenience method to get iDevice ready to talk to app_name or app_id
@ -1007,6 +1015,27 @@ class libiMobileDevice():
self.log(" %s: %s" % (key, file_stats[key]))
return file_stats
def _afc_make_directory(self, path):
'''
Creates a directory on the device. Does not create intermediate dirs.
Args:
client: (AFC_CLIENT_T) The client to use to make a directory
dir: (const char *) The directory's fully-qualified path
Result:
error: AFC_E_SUCCESS on success or an AFC_E_* error value
'''
self._log_location("%s" % repr(path))
error = self.lib.afc_make_directory(byref(self.afc),
str(path)) & 0xFFFF
if error:
if self.verbose:
self.log(" ERROR: %s" % self._afc_error(error))
return error
def _afc_read_directory(self, directory=''):
'''
Gets a directory listing of the directory requested

View File

@ -227,16 +227,17 @@ class TREKSTOR(USBMS):
VENDOR_ID = [0x1e68]
PRODUCT_ID = [0x0041, 0x0042, 0x0052, 0x004e, 0x0056,
0x0067, # This is for the Pyrus Mini
0x006f, # This is for the Pyrus Maxi
0x003e, # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
0x5cL, # This is for the 4ink http://www.mobileread.com/forums/showthread.php?t=191318
]
BCD = [0x0002, 0x100]
BCD = [0x0002, 0x100, 0x0222]
EBOOK_DIR_MAIN = 'Ebooks'
VENDOR_NAME = 'TREKSTOR'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7',
'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS', 'PYRUS_MINI']
'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS', 'PYRUS_MINI', 'PYRUS_MAXI']
SUPPORTS_SUB_DIRS = True
SUPPORTS_SUB_DIRS_DEFAULT = False

View File

@ -27,7 +27,7 @@ specified as the first two arguments to the command.
The output ebook format is guessed from the file extension of \
output_file. output_file can also be of the special format .EXT where \
EXT is the output file extension. In this case, the name of the output \
file is derived the name of the input file. Note that the filenames must \
file is derived from the name of the input file. Note that the filenames must \
not start with a hyphen. Finally, if output_file has no extension, then \
it is treated as a directory and an "open ebook" (OEB) consisting of HTML \
files is written to that directory. These files are the files that would \
@ -94,6 +94,8 @@ def option_recommendation_to_cli_option(add_option, rec):
if opt.long_switch == 'verbose':
attrs['action'] = 'count'
attrs.pop('type', '')
if opt.name == 'read_metadata_from_opf':
switches.append('--from-opf')
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
switches = ['--disable-'+opt.long_switch]
add_option(Option(*switches, **attrs))
@ -136,7 +138,7 @@ def add_pipeline_options(parser, plumber):
[
'base_font_size', 'disable_font_rescaling',
'font_size_mapping', 'embed_font_family',
'subset_embedded_fonts',
'subset_embedded_fonts', 'embed_all_fonts',
'line_height', 'minimum_line_height',
'linearize_tables',
'extra_css', 'filter_css',
@ -190,7 +192,7 @@ def add_pipeline_options(parser, plumber):
),
'METADATA' : (_('Options to set metadata in the output'),
plumber.metadata_option_names,
plumber.metadata_option_names + ['read_metadata_from_opf'],
),
'DEBUG': (_('Options to help with debugging the conversion'),
[
@ -320,7 +322,7 @@ def main(args=sys.argv):
opts.search_replace = read_sr_patterns(opts.search_replace, log)
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
OptionRecommendation.HIGH)
for n in parser.options_iter()
if n.dest]
plumber.merge_ui_recommendations(recommendations)
@ -342,3 +344,4 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())

View File

@ -134,8 +134,7 @@ OptionRecommendation(name='output_profile',
help=_('Specify the output profile. The output profile '
'tells the conversion system how to optimize the '
'created document for the specified device. In some cases, '
'an output profile is required to produce documents that '
'will work on a device. For example EPUB on the SONY reader. '
'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. '
'Choices are:') +
', '.join([x.short_name for x in output_profiles()])
),
@ -205,6 +204,17 @@ OptionRecommendation(name='embed_font_family',
'with some output formats, principally EPUB and AZW3.')
),
OptionRecommendation(name='embed_all_fonts',
recommended_value=False, level=OptionRecommendation.LOW,
help=_(
'Embed every font that is referenced in the input document '
'but not already embedded. This will search your system for the '
'fonts, and if found, they will be embedded. Embedding will only work '
'if the format you are converting to supports embedded fonts, such as '
'EPUB, AZW3 or PDF. Please ensure that you have the proper license for embedding '
'the fonts used in this document.'
)),
OptionRecommendation(name='subset_embedded_fonts',
recommended_value=False, level=OptionRecommendation.LOW,
help=_(
@ -965,6 +975,9 @@ OptionRecommendation(name='search_replace',
if self.for_regex_wizard and hasattr(self.opts, 'no_process'):
self.opts.no_process = True
self.flush()
if self.opts.embed_all_fonts or self.opts.embed_font_family:
# Start the threaded font scanner now, for performance
from calibre.utils.fonts.scanner import font_scanner # noqa
import cssutils, logging
cssutils.log.setLevel(logging.WARN)
get_types_map() # Ensure the mimetypes module is intialized
@ -1129,6 +1142,10 @@ OptionRecommendation(name='search_replace',
RemoveFakeMargins()(self.oeb, self.log, self.opts)
RemoveAdobeMargins()(self.oeb, self.log, self.opts)
if self.opts.embed_all_fonts:
from calibre.ebooks.oeb.transforms.embed_fonts import EmbedFonts
EmbedFonts()(self.oeb, self.log, self.opts)
if self.opts.subset_embedded_fonts and self.output_plugin.file_type != 'pdf':
from calibre.ebooks.oeb.transforms.subset import SubsetFonts
SubsetFonts()(self.oeb, self.log, self.opts)

View File

@ -8,6 +8,8 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from calibre.ebooks.docx.names import ancestor
def mergeable(previous, current):
if previous.tail or current.tail:
return False
@ -97,6 +99,16 @@ def before_count(root, tag, limit=10):
return limit
def cleanup_markup(log, root, styles, dest_dir, detect_cover):
# Move <hr>s outside paragraphs, if possible.
for hr in root.xpath('//span/hr'):
p = ancestor(hr, 'p')
descendants = tuple(p.iterdescendants())
if descendants[-1] is hr:
parent = p.getparent()
idx = parent.index(p)
parent.insert(idx+1, hr)
hr.tail = '\n\t'
# Merge consecutive spans that have the same styling
current_run = []
for span in root.xpath('//span'):
@ -165,3 +177,4 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
return path

View File

@ -183,7 +183,7 @@ class DOCX(object):
root = fromstring(raw)
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
target = item.get('Target')
if item.get('TargetMode', None) != 'External':
if item.get('TargetMode', None) != 'External' and not target.startswith('#'):
target = '/'.join((base, target.lstrip('/')))
typ = item.get('Type')
Id = item.get('Id')

View File

@ -0,0 +1,108 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from calibre.ebooks.docx.names import XPath, get
class Field(object):
def __init__(self, start):
self.start = start
self.end = None
self.contents = []
self.instructions = []
def add_instr(self, elem):
raw = elem.text
if not raw:
return
name, rest = raw.strip().partition(' ')[0::2]
self.instructions.append((name, rest.strip()))
WORD, FLAG = 0, 1
scanner = re.Scanner([
(r'\\\S{1}', lambda s, t: (t, FLAG)), # A flag of the form \x
(r'"[^"]*"', lambda s, t: (t[1:-1], WORD)), # Quoted word
(r'[^\s\\"]\S*', lambda s, t: (t, WORD)), # A non-quoted word, must not start with a backslash or a space or a quote
(r'\s+', None),
], flags=re.DOTALL)
def parse_hyperlink(raw, log):
ans = {}
last_option = None
raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02')
for token, token_type in scanner.scan(raw)[0]:
token = token.replace('\x01', '\\').replace('\x02', '"')
if not ans:
if token_type is not WORD:
log('Invalid hyperlink, first token is not a URL (%s)' % raw)
return ans
ans['url'] = token
if token_type is FLAG:
last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None)
if last_option is not None:
ans[last_option] = None
elif token_type is WORD:
if last_option is not None:
ans[last_option] = token
return ans
class Fields(object):
def __init__(self):
self.fields = []
def __call__(self, doc, log):
stack = []
for elem in XPath(
'//*[name()="w:p" or name()="w:r" or name()="w:instrText" or (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end"))]')(doc):
if elem.tag.endswith('}fldChar'):
typ = get(elem, 'w:fldCharType')
if typ == 'begin':
stack.append(Field(elem))
self.fields.append(stack[-1])
else:
try:
stack.pop().end = elem
except IndexError:
pass
elif elem.tag.endswith('}instrText'):
if stack:
stack[-1].add_instr(elem)
else:
if stack:
stack[-1].contents.append(elem)
# Parse hyperlink fields
self.hyperlink_fields = []
for field in self.fields:
if len(field.instructions) == 1 and field.instructions[0][0] == 'HYPERLINK':
hl = parse_hyperlink(field.instructions[0][1], log)
if hl:
if 'target' in hl and hl['target'] is None:
hl['target'] = '_blank'
all_runs = []
current_runs = []
# We only handle spans in a single paragraph
# being wrapped in <a>
for x in field.contents:
if x.tag.endswith('}p'):
if current_runs:
all_runs.append(current_runs)
current_runs = []
elif x.tag.endswith('}r'):
current_runs.append(x)
if current_runs:
all_runs.append(current_runs)
for runs in all_runs:
self.hyperlink_fields.append((hl, runs))

View File

@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml.html.builder import IMG
from lxml.html.builder import IMG, HR
from calibre.ebooks.docx.names import XPath, get, barename
from calibre.utils.filenames import ascii_filename
@ -96,6 +96,7 @@ class Images(object):
self.used = {}
self.names = set()
self.all_images = set()
self.links = []
def __call__(self, relationships_by_id):
self.rid_map = relationships_by_id
@ -125,8 +126,18 @@ class Images(object):
self.all_images.add('images/' + name)
return name
def pic_to_img(self, pic, alt=None):
def pic_to_img(self, pic, alt, parent):
name = None
link = None
for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent):
link = {'id':get(hl, 'r:id')}
tgt = hl.get('tgtFrame', None)
if tgt:
link['target'] = tgt
title = hl.get('tooltip', None)
if title:
link['title'] = title
for pr in XPath('descendant::pic:cNvPr')(pic):
name = pr.get('name', None)
if name:
@ -138,6 +149,8 @@ class Images(object):
src = self.generate_filename(rid, name)
img = IMG(src='images/%s' % src)
img.set('alt', alt or 'Image')
if link is not None:
self.links.append((img, link))
return img
def drawing_to_html(self, drawing, page):
@ -145,7 +158,7 @@ class Images(object):
for inline in XPath('./wp:inline')(drawing):
style, alt = get_image_properties(inline)
for pic in XPath('descendant::pic:pic')(inline):
ans = self.pic_to_img(pic, alt)
ans = self.pic_to_img(pic, alt, inline)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
@ -156,13 +169,33 @@ class Images(object):
style, alt = get_image_properties(anchor)
self.get_float_properties(anchor, style, page)
for pic in XPath('descendant::pic:pic')(anchor):
ans = self.pic_to_img(pic, alt)
ans = self.pic_to_img(pic, alt, anchor)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
yield ans
def pict_to_html(self, pict, page):
# First see if we have an <hr>
is_hr = len(pict) == 1 and get(pict[0], 'o:hr') in {'t', 'true'}
if is_hr:
style = {}
hr = HR()
try:
pct = float(get(pict[0], 'o:hrpct'))
except (ValueError, TypeError, AttributeError):
pass
else:
if pct > 0:
style['width'] = '%.3g%%' % pct
align = get(pict[0], 'o:hralign', 'center')
if align in {'left', 'right'}:
style['margin-left'] = '0' if align == 'left' else 'auto'
style['margin-right'] = 'auto' if align == 'left' else '0'
if style:
hr.set('style', '; '.join(('%s:%s' % (k, v) for k, v in style.iteritems())))
yield hr
for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict):
rid = get(imagedata, 'r:id')
if rid in self.rid_map:

View File

@ -403,6 +403,11 @@ class Styles(object):
ps.margin_top = 0
last_para = p
def apply_section_page_breaks(self, paras):
for p in paras:
ps = self.resolve_paragraph(p)
ps.pageBreakBefore = True
def register(self, css, prefix):
h = hash(frozenset(css.iteritems()))
ans, _ = self.classes.get(h, (None, None))

View File

@ -26,6 +26,7 @@ from calibre.ebooks.docx.footnotes import Footnotes
from calibre.ebooks.docx.cleanup import cleanup_markup
from calibre.ebooks.docx.theme import Theme
from calibre.ebooks.docx.toc import create_toc
from calibre.ebooks.docx.fields import Fields
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
@ -52,6 +53,7 @@ class Convert(object):
self.body = BODY()
self.theme = Theme()
self.tables = Tables()
self.fields = Fields()
self.styles = Styles(self.tables)
self.images = Images()
self.object_map = OrderedDict()
@ -79,6 +81,7 @@ class Convert(object):
def __call__(self):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.fields(doc, self.log)
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
@ -96,7 +99,11 @@ class Convert(object):
p = self.convert_p(wp)
self.body.append(p)
paras.append(wp)
self.read_block_anchors(doc)
self.styles.apply_contextual_spacing(paras)
# Apply page breaks at the start of every section, except the first
# section (since that will be the start of the file)
self.styles.apply_section_page_breaks(self.section_starts[1:])
notes_header = None
if self.footnotes.has_notes:
@ -177,6 +184,7 @@ class Convert(object):
def read_page_properties(self, doc):
current = []
self.page_map = OrderedDict()
self.section_starts = []
for p in descendants(doc, 'w:p', 'w:tbl'):
if p.tag.endswith('}tbl'):
@ -186,8 +194,10 @@ class Convert(object):
sect = tuple(descendants(p, 'w:sectPr'))
if sect:
pr = PageProperties(sect)
for x in current + [p]:
paras = current + [p]
for x in paras:
self.page_map[x] = pr
self.section_starts.append(paras[0])
current = []
else:
current.append(p)
@ -287,6 +297,22 @@ class Convert(object):
opf.render(of, ncx, 'toc.ncx')
return os.path.join(self.dest_dir, 'metadata.opf')
def read_block_anchors(self, doc):
doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
if doc_anchors:
current_bm = None
rmap = {v:k for k, v in self.object_map.iteritems()}
for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
if p.tag.endswith('}p'):
if current_bm and p in rmap:
para = rmap[p]
if 'id' not in para.attrib:
para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues())))
self.anchor_map[current_bm] = para.get('id')
current_bm = None
elif p in doc_anchors:
current_bm = get(p, 'w:name')
def convert_p(self, p):
dest = P()
self.object_map[dest] = p
@ -316,7 +342,13 @@ class Convert(object):
elif x.tag.endswith('}bookmarkStart'):
anchor = get(x, 'w:name')
if anchor and anchor not in self.anchor_map:
old_anchor = current_anchor
self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues()))
if old_anchor is not None:
# The previous anchor was not applied to any element
for a, t in tuple(self.anchor_map.iteritems()):
if t == old_anchor:
self.anchor_map[a] = current_anchor
elif x.tag.endswith('}hyperlink'):
current_hyperlink = x
@ -396,6 +428,46 @@ class Convert(object):
# hrefs that point nowhere give epubcheck a hernia. The element
# should be styled explicitly by Word anyway.
# span.set('href', '#')
rmap = {v:k for k, v in self.object_map.iteritems()}
for hyperlink, runs in self.fields.hyperlink_fields:
spans = [rmap[r] for r in runs if r in rmap]
if not spans:
continue
if len(spans) > 1:
span = self.wrap_elems(spans, SPAN())
span.tag = 'a'
tgt = hyperlink.get('target', None)
if tgt:
span.set('target', tgt)
tt = hyperlink.get('title', None)
if tt:
span.set('title', tt)
url = hyperlink['url']
if url in self.anchor_map:
span.set('href', '#' + self.anchor_map[url])
continue
span.set('href', url)
for img, link in self.images.links:
parent = img.getparent()
idx = parent.index(img)
a = A(img)
a.tail, img.tail = img.tail, None
parent.insert(idx, a)
tgt = link.get('target', None)
if tgt:
a.set('target', tgt)
tt = link.get('title', None)
if tt:
a.set('title', tt)
rid = link['id']
if rid in relationships_by_id:
dest = relationships_by_id[rid]
if dest.startswith('#'):
if dest[1:] in self.anchor_map:
a.set('href', '#' + self.anchor_map[dest[1:]])
else:
a.set('href', dest)
def convert_run(self, run):
ans = SPAN()

View File

@ -1047,6 +1047,14 @@ class OPF(object): # {{{
if raw:
return raw.rpartition(':')[-1]
@property
def page_progression_direction(self):
spine = self.XPath('descendant::*[re:match(name(), "spine", "i")][1]')(self.root)
if spine:
for k, v in spine[0].attrib.iteritems():
if k == 'page-progression-direction' or k.endswith('}page-progression-direction'):
return v
def guess_cover(self):
'''
Try to guess a cover. Needed for some old/badly formed OPF files.
@ -1185,6 +1193,7 @@ class OPFCreator(Metadata):
'''
Metadata.__init__(self, title='', other=other)
self.base_path = os.path.abspath(base_path)
self.page_progression_direction = None
if self.application_id is None:
self.application_id = str(uuid.uuid4())
if not isinstance(self.toc, TOC):
@ -1356,6 +1365,8 @@ class OPFCreator(Metadata):
spine = E.spine()
if self.toc is not None:
spine.set('toc', 'ncx')
if self.page_progression_direction is not None:
spine.set('page-progression-direction', self.page_progression_direction)
if self.spine is not None:
for ref in self.spine:
if ref.id is not None:

View File

@ -20,7 +20,7 @@ from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc
from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup
from calibre.ebooks.metadata.opf2 import Guide, OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.utils import read_font_record
from calibre.ebooks.mobi.utils import read_font_record, read_resc_record
from calibre.ebooks.oeb.parse_utils import parse_html
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
from calibre.utils.imghdr import what
@ -65,6 +65,7 @@ class Mobi8Reader(object):
self.mobi6_reader, self.log = mobi6_reader, log
self.header = mobi6_reader.book_header
self.encrypted_fonts = []
self.resc_data = {}
def __call__(self):
self.mobi6_reader.check_for_drm()
@ -389,9 +390,11 @@ class Mobi8Reader(object):
data = sec[0]
typ = data[:4]
href = None
if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
b'FDST', b'DATP', b'AUDI', b'VIDE'}:
pass # Ignore these records
elif typ == b'RESC':
self.resc_data = read_resc_record(data)
elif typ == b'FONT':
font = read_font_record(data)
href = "fonts/%05d.%s" % (fname_idx, font['ext'])
@ -452,6 +455,9 @@ class Mobi8Reader(object):
opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
opf.create_spine(spine)
opf.set_toc(toc)
ppd = self.resc_data.get('page-progression-direction', None)
if ppd:
opf.page_progression_direction = ppd
with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx:
opf.render(of, ncx, 'toc.ncx')

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, string, zlib, os
import struct, string, zlib, os, re
from collections import OrderedDict
from io import BytesIO
@ -393,6 +393,15 @@ def mobify_image(data):
data = im.export('gif')
return data
def read_resc_record(data):
ans = {}
match = re.search(br'''<spine [^>]*page-progression-direction=['"](.+?)['"]''', data)
if match is not None:
ppd = match.group(1).lower()
if ppd in {b'ltr', b'rtl'}:
ans['page-progression-direction'] = ppd.decode('ascii')
return ans
# Font records {{{
def read_font_record(data, extent=1040):
'''

View File

@ -1210,6 +1210,7 @@ class Spine(object):
def __init__(self, oeb):
self.oeb = oeb
self.items = []
self.page_progression_direction = None
def _linear(self, linear):
if isinstance(linear, basestring):
@ -1896,4 +1897,6 @@ class OEBBook(object):
attrib={'media-type': PAGE_MAP_MIME})
spine.attrib['page-map'] = id
results[PAGE_MAP_MIME] = (href, self.pages.to_page_map())
if self.spine.page_progression_direction in {'ltr', 'rtl'}:
spine.attrib['page-progression-direction'] = self.spine.page_progression_direction
return results

View File

@ -0,0 +1,158 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys
from lxml import etree
from calibre import prints
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.polish.stats import normalize_font_properties
from calibre.utils.filenames import ascii_filename
props = {'font-family':None, 'font-weight':'normal', 'font-style':'normal', 'font-stretch':'normal'}
def matching_rule(font, rules):
ff = font['font-family']
if not isinstance(ff, basestring):
ff = tuple(ff)[0]
family = icu_lower(ff)
wt = font['font-weight']
style = font['font-style']
stretch = font['font-stretch']
for rule in rules:
if rule['font-style'] == style and rule['font-stretch'] == stretch and rule['font-weight'] == wt:
ff = rule['font-family']
if not isinstance(ff, basestring):
ff = tuple(ff)[0]
if icu_lower(ff) == family:
return rule
def embed_font(container, font, all_font_rules, report, warned):
rule = matching_rule(font, all_font_rules)
ff = font['font-family']
if not isinstance(ff, basestring):
ff = ff[0]
if rule is None:
from calibre.utils.fonts.scanner import font_scanner, NoFonts
if ff in warned:
return
try:
fonts = font_scanner.fonts_for_family(ff)
except NoFonts:
report(_('Failed to find fonts for family: %s, not embedding') % ff)
warned.add(ff)
return
wt = int(font.get('font-weight', '400'))
for f in fonts:
if f['weight'] == wt and f['font-style'] == font.get('font-style', 'normal') and f['font-stretch'] == font.get('font-stretch', 'normal'):
report('Embedding font %s from %s' % (f['full_name'], f['path']))
data = font_scanner.get_font_data(f)
fname = f['full_name']
ext = 'otf' if f['is_otf'] else 'ttf'
fname = ascii_filename(fname).replace(' ', '-').replace('(', '').replace(')', '')
item = container.generate_item('fonts/%s.%s'%(fname, ext), id_prefix='font')
name = container.href_to_name(item.get('href'), container.opf_name)
with container.open(name, 'wb') as out:
out.write(data)
href = container.name_to_href(name)
rule = {k:f.get(k, v) for k, v in props.iteritems()}
rule['src'] = 'url(%s)' % href
rule['name'] = name
return rule
msg = _('Failed to find font matching: family: %s; weight: %s; style: %s; stretch: %s') % (
ff, font['font-weight'], font['font-style'], font['font-stretch'])
if msg not in warned:
warned.add(msg)
report(msg)
else:
name = rule['src']
href = container.name_to_href(name)
rule = {k:ff if k == 'font-family' else rule.get(k, v) for k, v in props.iteritems()}
rule['src'] = 'url(%s)' % href
rule['name'] = name
return rule
def embed_all_fonts(container, stats, report):
all_font_rules = tuple(stats.all_font_rules.itervalues())
warned = set()
rules, nrules = [], []
modified = set()
for path in container.spine_items:
name = container.abspath_to_name(path)
fu = stats.font_usage_map.get(name, None)
fs = stats.font_spec_map.get(name, None)
fr = stats.font_rule_map.get(name, None)
if None in (fs, fu, fr):
continue
fs = {icu_lower(x) for x in fs}
for font in fu.itervalues():
if icu_lower(font['font-family']) not in fs:
continue
rule = matching_rule(font, fr)
if rule is None:
# This font was not already embedded in this HTML file, before
# processing started
rule = matching_rule(font, nrules)
if rule is None:
rule = embed_font(container, font, all_font_rules, report, warned)
if rule is not None:
rules.append(rule)
nrules.append(normalize_font_properties(rule.copy()))
modified.add(name)
stats.font_stats[rule['name']] = font['text']
else:
# This font was previously embedded by this code, update its stats
stats.font_stats[rule['name']] |= font['text']
modified.add(name)
if not rules:
report(_('No embeddable fonts found'))
return
# Write out CSS
rules = [';\n\t'.join('%s: %s' % (
k, '"%s"' % v if k == 'font-family' else v) for k, v in rule.iteritems() if (k in props and props[k] != v and v != '400') or k == 'src')
for rule in rules]
css = '\n\n'.join(['@font-face {\n\t%s\n}' % r for r in rules])
item = container.generate_item('fonts.css', id_prefix='font_embed')
name = container.href_to_name(item.get('href'), container.opf_name)
with container.open(name, 'wb') as out:
out.write(css.encode('utf-8'))
# Add link to CSS in all files that need it
for spine_name in modified:
root = container.parsed(spine_name)
head = root.xpath('//*[local-name()="head"][1]')[0]
href = container.name_to_href(name, spine_name)
etree.SubElement(head, XHTML('link'), rel='stylesheet', type='text/css', href=href).tail = '\n'
container.dirty(spine_name)
if __name__ == '__main__':
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
inbook = sys.argv[-1]
ebook = get_container(inbook, default_log)
report = []
stats = StatsCollector(ebook, do_embed=True)
embed_all_fonts(ebook, stats, report.append)
outbook, ext = inbook.rpartition('.')[0::2]
outbook += '_subset.'+ext
ebook.commit(outbook)
prints('\nReport:')
for msg in report:
prints(msg)
print()
prints('Output written to:', outbook)

View File

@ -67,6 +67,18 @@ class FontStats
ans.push(usage)
py_bridge.value = ans
get_font_families: () ->
ans = {}
for node in document.getElementsByTagName('*')
rules = document.defaultView.getMatchedCSSRules(node, '')
if rules
for rule in rules
style = rule.style
family = style.getPropertyValue('font-family')
if family
ans[family] = true
py_bridge.value = ans
if window?
window.font_stats = new FontStats()

View File

@ -14,6 +14,7 @@ from functools import partial
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import subset_all_fonts
from calibre.ebooks.oeb.polish.embed import embed_all_fonts
from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.replace import smarten_punctuation
from calibre.ebooks.oeb.polish.jacket import (
@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import (
from calibre.utils.logging import Log
ALL_OPTS = {
'embed': False,
'subset': False,
'opf': None,
'cover': None,
@ -47,6 +49,13 @@ changes needed for the desired effect.</p>
<p>Note that polishing only works on files in the %s formats.</p>\
''')%_(' or ').join('<b>%s</b>'%x for x in SUPPORTED),
'embed': _('''\
<p>Embed all fonts that are referenced in the document and are not already embedded.
This will scan your computer for the fonts, and if they are found, they will be
embedded into the document.</p>
<p>Please ensure that you have the proper license for embedding the fonts used in this document.</p>
'''),
'subset': _('''\
<p>Subsetting fonts means reducing an embedded font to contain
only the characters used from that font in the book. This
@ -118,8 +127,8 @@ def polish(file_map, opts, log, report):
ebook = get_container(inbook, log)
jacket = None
if opts.subset:
stats = StatsCollector(ebook)
if opts.subset or opts.embed:
stats = StatsCollector(ebook, do_embed=opts.embed)
if opts.opf:
rt(_('Updating metadata'))
@ -159,6 +168,11 @@ def polish(file_map, opts, log, report):
smarten_punctuation(ebook, report)
report('')
if opts.embed:
rt(_('Embedding referenced fonts'))
embed_all_fonts(ebook, stats, report)
report('')
if opts.subset:
rt(_('Subsetting embedded fonts'))
subset_all_fonts(ebook, stats.font_stats, report)
@ -197,6 +211,7 @@ def option_parser():
parser = OptionParser(usage=USAGE)
a = parser.add_option
o = partial(a, default=False, action='store_true')
o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed'])
o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
a('--cover', '-c', help=_(
'Path to a cover image. Changes the cover specified in the ebook. '

View File

@ -7,10 +7,11 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import json, sys, os
import json, sys, os, logging
from urllib import unquote
from collections import defaultdict
from cssutils import parseStyle
from cssutils import CSSParser
from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer,
pyqtSlot)
from PyQt4.QtWebKit import QWebPage, QWebView
@ -41,6 +42,7 @@ def normalize_font_properties(font):
'extra-expanded', 'ultra-expanded'}:
val = 'normal'
font['font-stretch'] = val
return font
widths = {x:i for i, x in enumerate(('ultra-condensed',
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
@ -48,7 +50,6 @@ widths = {x:i for i, x in enumerate(( 'ultra-condensed',
))}
def get_matching_rules(rules, font):
normalize_font_properties(font)
matches = []
# Filter on family
@ -157,10 +158,12 @@ class Page(QWebPage): # {{{
class StatsCollector(object):
def __init__(self, container):
def __init__(self, container, do_embed=False):
self.container = container
self.log = self.logger = container.log
self.do_embed = do_embed
must_use_qt()
self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.loop = QEventLoop()
self.view = QWebView()
@ -173,6 +176,10 @@ class StatsCollector(object):
self.render_queue = list(container.spine_items)
self.font_stats = {}
self.font_usage_map = {}
self.font_spec_map = {}
self.font_rule_map = {}
self.all_font_rules = {}
QTimer.singleShot(0, self.render_book)
@ -235,27 +242,35 @@ class StatsCollector(object):
rules = []
for rule in font_face_rules:
ff = rule.get('font-family', None)
if not ff: continue
style = parseStyle('font-family:%s'%ff, validate=False)
if not ff:
continue
style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
ff = [x.value for x in
style.getProperty('font-family').propertyValue]
if not ff or ff[0] == 'inherit':
continue
rule['font-family'] = frozenset(icu_lower(f) for f in ff)
src = rule.get('src', None)
if not src: continue
style = parseStyle('background-image:%s'%src, validate=False)
if not src:
continue
style = self.parser.parseStyle('background-image:%s'%src, validate=False)
src = style.getProperty('background-image').propertyValue[0].uri
name = self.href_to_name(src, '@font-face rule')
if name is None:
continue
rule['src'] = name
normalize_font_properties(rule)
rule['width'] = widths[rule['font-stretch']]
rule['weight'] = int(rule['font-weight'])
rules.append(rule)
if not rules:
if not rules and not self.do_embed:
return
self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
for rule in rules:
self.all_font_rules[rule['src']] = rule
for rule in rules:
if rule['src'] not in self.font_stats:
self.font_stats[rule['src']] = set()
@ -265,19 +280,48 @@ class StatsCollector(object):
if not isinstance(font_usage, list):
raise Exception('Unknown error occurred while reading font usage')
exclude = {'\n', '\r', '\t'}
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
for font in font_usage:
text = set()
for t in font['text']:
text |= frozenset(t)
text.difference_update(exclude)
if not text: continue
if not text:
continue
normalize_font_properties(font)
for rule in get_matching_rules(rules, font):
self.font_stats[rule['src']] |= text
if self.do_embed:
ff = [icu_lower(x) for x in font.get('font-family', [])]
if ff and ff[0] not in bad_fonts:
keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
val = fu[key]
if not val:
val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
val['text'] = set()
val['text'] |= text
self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)
if self.do_embed:
self.page.evaljs('window.font_stats.get_font_families()')
font_families = self.page.bridge_value
if not isinstance(font_families, dict):
raise Exception('Unknown error occurred while reading font families')
self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
for raw in font_families.iterkeys():
style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
for x in style.propertyValue:
x = x.value
if x and x.lower() not in bad_fonts:
fs.add(x)
if __name__ == '__main__':
from calibre.ebooks.oeb.polish.container import get_container
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
ebook = get_container(sys.argv[-1], default_log)
print (StatsCollector(ebook).font_stats)
print (StatsCollector(ebook, do_embed=True).font_stats)

View File

@ -281,14 +281,17 @@ def find_text(node):
def from_files(container):
toc = TOC()
for spinepath in container.spine_items:
for i, spinepath in enumerate(container.spine_items):
name = container.abspath_to_name(spinepath)
root = container.parsed(name)
body = XPath('//h:body')(root)
if not body:
continue
text = find_text(body[0])
if text:
if not text:
text = name.rpartition('/')[-1]
if i == 0 and text.rpartition('.')[0].lower() in {'titlepage', 'cover'}:
text = _('Cover')
toc.add(text, name)
return toc

View File

@ -330,6 +330,9 @@ class OEBReader(object):
if len(spine) == 0:
raise OEBError("Spine is empty")
self._spine_add_extra()
for val in xpath(opf, '/o2:package/o2:spine/@page-progression-direction'):
if val in {'ltr', 'rtl'}:
spine.page_progression_direction = val
def _guide_from_opf(self, opf):
guide = self.oeb.guide

View File

@ -0,0 +1,233 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import logging
from collections import defaultdict
import cssutils
from lxml import etree
from calibre import guess_type
from calibre.ebooks.oeb.base import XPath, CSS_MIME, XHTML
from calibre.ebooks.oeb.transforms.subset import get_font_properties, find_font_face_rules, elem_style
from calibre.utils.filenames import ascii_filename
from calibre.utils.fonts.scanner import font_scanner, NoFonts
def used_font(style, embedded_fonts):
ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in {
'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}]
if not ff:
return False, None
lnames = {unicode(x).lower() for x in ff}
matching_set = []
# Filter on font-family
for ef in embedded_fonts:
flnames = {x.lower() for x in ef.get('font-family', [])}
if not lnames.intersection(flnames):
continue
matching_set.append(ef)
if not matching_set:
return True, None
# Filter on font-stretch
widths = {x:i for i, x in enumerate(('ultra-condensed',
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded'
))}
width = widths[style.get('font-stretch', 'normal')]
for f in matching_set:
f['width'] = widths[style.get('font-stretch', 'normal')]
min_dist = min(abs(width-f['width']) for f in matching_set)
if min_dist > 0:
return True, None
nearest = [f for f in matching_set if abs(width-f['width']) ==
min_dist]
if width <= 4:
lmatches = [f for f in nearest if f['width'] <= width]
else:
lmatches = [f for f in nearest if f['width'] >= width]
matching_set = (lmatches or nearest)
# Filter on font-style
fs = style.get('font-style', 'normal')
matching_set = [f for f in matching_set if f.get('font-style', 'normal') == fs]
# Filter on font weight
fw = int(style.get('font-weight', '400'))
matching_set = [f for f in matching_set if f.get('weight', 400) == fw]
if not matching_set:
return True, None
return True, matching_set[0]
class EmbedFonts(object):
'''
Embed all referenced fonts, if found on system. Must be called after CSS flattening.
'''
def __call__(self, oeb, log, opts):
self.oeb, self.log, self.opts = oeb, log, opts
self.sheet_cache = {}
self.find_style_rules()
self.find_embedded_fonts()
self.parser = cssutils.CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
self.warned = set()
self.warned2 = set()
for item in oeb.spine:
if not hasattr(item.data, 'xpath'):
continue
sheets = []
for href in XPath('//h:link[@href and @type="text/css"]/@href')(item.data):
sheet = self.oeb.manifest.hrefs.get(item.abshref(href), None)
if sheet is not None:
sheets.append(sheet)
if sheets:
self.process_item(item, sheets)
def find_embedded_fonts(self):
'''
Find all @font-face rules and extract the relevant info from them.
'''
self.embedded_fonts = []
for item in self.oeb.manifest:
if not hasattr(item.data, 'cssRules'):
continue
self.embedded_fonts.extend(find_font_face_rules(item, self.oeb))
def find_style_rules(self):
'''
Extract all font related style information from all stylesheets into a
dict mapping classes to font properties specified by that class. All
the heavy lifting has already been done by the CSS flattening code.
'''
rules = defaultdict(dict)
for item in self.oeb.manifest:
if not hasattr(item.data, 'cssRules'):
continue
for i, rule in enumerate(item.data.cssRules):
if rule.type != rule.STYLE_RULE:
continue
props = {k:v for k,v in
get_font_properties(rule).iteritems() if v}
if not props:
continue
for sel in rule.selectorList:
sel = sel.selectorText
if sel and sel.startswith('.'):
# We dont care about pseudo-selectors as the worst that
# can happen is some extra characters will remain in
# the font
sel = sel.partition(':')[0]
rules[sel[1:]].update(props)
self.style_rules = dict(rules)
def get_page_sheet(self):
if self.page_sheet is None:
manifest = self.oeb.manifest
id_, href = manifest.generate('page_css', 'page_styles.css')
self.page_sheet = manifest.add(id_, href, CSS_MIME, data=self.parser.parseString('', validate=False))
head = self.current_item.xpath('//*[local-name()="head"][1]')
if head:
href = self.current_item.relhref(href)
l = etree.SubElement(head[0], XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l.tail = '\n'
else:
self.log.warn('No <head> cannot embed font rules')
return self.page_sheet
def process_item(self, item, sheets):
ff_rules = []
self.current_item = item
self.page_sheet = None
for sheet in sheets:
if 'page_css' in sheet.id:
ff_rules.extend(find_font_face_rules(sheet, self.oeb))
self.page_sheet = sheet
base = {'font-family':['serif'], 'font-weight': '400',
'font-style':'normal', 'font-stretch':'normal'}
for body in item.data.xpath('//*[local-name()="body"]'):
self.find_usage_in(body, base, ff_rules)
def find_usage_in(self, elem, inherited_style, ff_rules):
style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style)
for child in elem:
self.find_usage_in(child, style, ff_rules)
has_font, existing = used_font(style, ff_rules)
if not has_font:
return
if existing is None:
in_book = used_font(style, self.embedded_fonts)[1]
if in_book is None:
# Try to find the font in the system
added = self.embed_font(style)
if added is not None:
ff_rules.append(added)
self.embedded_fonts.append(added)
else:
# TODO: Create a page rule from the book rule (cannot use it
# directly as paths might be different)
item = in_book['item']
sheet = self.parser.parseString(in_book['rule'].cssText, validate=False)
rule = sheet.cssRules[0]
page_sheet = self.get_page_sheet()
href = page_sheet.abshref(item.href)
rule.style.setProperty('src', 'url(%s)' % href)
ff_rules.append(find_font_face_rules(sheet, self.oeb)[0])
page_sheet.data.insertRule(rule, len(page_sheet.data.cssRules))
def embed_font(self, style):
ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in {
'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}]
if not ff:
return
ff = ff[0]
if ff in self.warned or ff == 'inherit':
return
try:
fonts = font_scanner.fonts_for_family(ff)
except NoFonts:
self.log.warn('Failed to find fonts for family:', ff, 'not embedding')
self.warned.add(ff)
return
try:
weight = int(style.get('font-weight', '400'))
except (ValueError, TypeError, AttributeError):
w = style['font-weight']
if w not in self.warned2:
self.log.warn('Invalid weight in font style: %r' % w)
self.warned2.add(w)
return
for f in fonts:
if f['weight'] == weight and f['font-style'] == style.get('font-style', 'normal') and f['font-stretch'] == style.get('font-stretch', 'normal'):
self.log('Embedding font %s from %s' % (f['full_name'], f['path']))
data = font_scanner.get_font_data(f)
name = f['full_name']
ext = 'otf' if f['is_otf'] else 'ttf'
name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '')
fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext))
item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data)
item.unload_data_from_memory()
page_sheet = self.get_page_sheet()
href = page_sheet.relhref(item.href)
css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % (
f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href)
sheet = self.parser.parseString(css, validate=False)
page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules))
return find_font_face_rules(sheet, self.oeb)[0]

View File

@ -194,7 +194,7 @@ class CSSFlattener(object):
for i, font in enumerate(faces):
ext = 'otf' if font['is_otf'] else 'ttf'
fid, href = self.oeb.manifest.generate(id=u'font',
href=u'%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext))
href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext))
item = self.oeb.manifest.add(fid, href,
guess_type('dummy.'+ext)[0],
data=font_scanner.get_font_data(font))

View File

@ -339,6 +339,8 @@ class FlowSplitter(object):
# We want to keep the descendants of the split point in
# Tree 1
keep_descendants = True
# We want the split point element, but not its tail
elem.tail = '\n'
continue
if hit_split_point:
@ -357,6 +359,18 @@ class FlowSplitter(object):
for elem in tuple(body2.iterdescendants()):
if elem is split_point2:
if not before:
# Keep the split point element's tail, if it contains non-whitespace
# text
tail = elem.tail
if tail and not tail.isspace():
parent = elem.getparent()
idx = parent.index(elem)
if idx == 0:
parent.text = (parent.text or '') + tail
else:
sib = parent[idx-1]
sib.tail = (sib.tail or '') + tail
# Remove the element itself
nix_element(elem)
break
if elem in ancestors:

View File

@ -12,6 +12,111 @@ from collections import defaultdict
from calibre.ebooks.oeb.base import urlnormalize
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
def get_font_properties(rule, default=None):
'''
Given a CSS rule, extract normalized font properties from
it. Note that shorthand font property should already have been expanded
by the CSS flattening code.
'''
props = {}
s = rule.style
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
'font-style'):
g = 'uri' if q == 'src' else 'value'
try:
val = s.getProperty(q).propertyValue[0]
val = getattr(val, g)
if q == 'font-family':
val = [x.value for x in s.getProperty(q).propertyValue]
if val and val[0] == 'inherit':
val = None
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
val = None if q in {'src', 'font-family'} else default
if q in {'font-weight', 'font-stretch', 'font-style'}:
val = unicode(val).lower() if (val or val == 0) else val
if val == 'inherit':
val = default
if q == 'font-weight':
val = {'normal':'400', 'bold':'700'}.get(val, val)
if val not in {'100', '200', '300', '400', '500', '600', '700',
'800', '900', 'bolder', 'lighter'}:
val = default
if val == 'normal':
val = '400'
elif q == 'font-style':
if val not in {'normal', 'italic', 'oblique'}:
val = default
elif q == 'font-stretch':
if val not in {'normal', 'ultra-condensed', 'extra-condensed',
'condensed', 'semi-condensed', 'semi-expanded',
'expanded', 'extra-expanded', 'ultra-expanded'}:
val = default
props[q] = val
return props
def find_font_face_rules(sheet, oeb):
'''
Find all @font-face rules in the given sheet and extract the relevant info from them.
sheet can be either a ManifestItem or a CSSStyleSheet.
'''
ans = []
try:
rules = sheet.data.cssRules
except AttributeError:
rules = sheet.cssRules
for i, rule in enumerate(rules):
if rule.type != rule.FONT_FACE_RULE:
continue
props = get_font_properties(rule, default='normal')
if not props['font-family'] or not props['src']:
continue
try:
path = sheet.abshref(props['src'])
except AttributeError:
path = props['src']
ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
if not ff:
continue
props['item'] = ff
if props['font-weight'] in {'bolder', 'lighter'}:
props['font-weight'] = '400'
props['weight'] = int(props['font-weight'])
props['rule'] = rule
props['chars'] = set()
ans.append(props)
return ans
def elem_style(style_rules, cls, inherited_style):
'''
Find the effective style for the given element.
'''
classes = cls.split()
style = inherited_style.copy()
for cls in classes:
style.update(style_rules.get(cls, {}))
wt = style.get('font-weight', None)
pwt = inherited_style.get('font-weight', '400')
if wt == 'bolder':
style['font-weight'] = {
'100':'400',
'200':'400',
'300':'400',
'400':'700',
'500':'700',
}.get(pwt, '900')
elif wt == 'lighter':
style['font-weight'] = {
'600':'400', '700':'400',
'800':'700', '900':'700'}.get(pwt, '100')
return style
class SubsetFonts(object):
'''
@ -76,72 +181,15 @@ class SubsetFonts(object):
self.log('Reduced total font size to %.1f%% of original'%
(totals[0]/totals[1] * 100))
def get_font_properties(self, rule, default=None):
'''
Given a CSS rule, extract normalized font properties from
it. Note that shorthand font property should already have been expanded
by the CSS flattening code.
'''
props = {}
s = rule.style
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
'font-style'):
g = 'uri' if q == 'src' else 'value'
try:
val = s.getProperty(q).propertyValue[0]
val = getattr(val, g)
if q == 'font-family':
val = [x.value for x in s.getProperty(q).propertyValue]
if val and val[0] == 'inherit':
val = None
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
val = None if q in {'src', 'font-family'} else default
if q in {'font-weight', 'font-stretch', 'font-style'}:
val = unicode(val).lower() if (val or val == 0) else val
if val == 'inherit':
val = default
if q == 'font-weight':
val = {'normal':'400', 'bold':'700'}.get(val, val)
if val not in {'100', '200', '300', '400', '500', '600', '700',
'800', '900', 'bolder', 'lighter'}:
val = default
if val == 'normal': val = '400'
elif q == 'font-style':
if val not in {'normal', 'italic', 'oblique'}:
val = default
elif q == 'font-stretch':
if val not in { 'normal', 'ultra-condensed', 'extra-condensed',
'condensed', 'semi-condensed', 'semi-expanded',
'expanded', 'extra-expanded', 'ultra-expanded'}:
val = default
props[q] = val
return props
def find_embedded_fonts(self):
'''
Find all @font-face rules and extract the relevant info from them.
'''
self.embedded_fonts = []
for item in self.oeb.manifest:
if not hasattr(item.data, 'cssRules'): continue
for i, rule in enumerate(item.data.cssRules):
if rule.type != rule.FONT_FACE_RULE:
if not hasattr(item.data, 'cssRules'):
continue
props = self.get_font_properties(rule, default='normal')
if not props['font-family'] or not props['src']:
continue
path = item.abshref(props['src'])
ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
if not ff:
continue
props['item'] = ff
if props['font-weight'] in {'bolder', 'lighter'}:
props['font-weight'] = '400'
props['weight'] = int(props['font-weight'])
props['chars'] = set()
props['rule'] = rule
self.embedded_fonts.append(props)
self.embedded_fonts.extend(find_font_face_rules(item, self.oeb))
def find_style_rules(self):
'''
@ -151,12 +199,13 @@ class SubsetFonts(object):
'''
rules = defaultdict(dict)
for item in self.oeb.manifest:
if not hasattr(item.data, 'cssRules'): continue
if not hasattr(item.data, 'cssRules'):
continue
for i, rule in enumerate(item.data.cssRules):
if rule.type != rule.STYLE_RULE:
continue
props = {k:v for k,v in
self.get_font_properties(rule).iteritems() if v}
get_font_properties(rule).iteritems() if v}
if not props:
continue
for sel in rule.selectorList:
@ -172,41 +221,17 @@ class SubsetFonts(object):
def find_font_usage(self):
for item in self.oeb.manifest:
if not hasattr(item.data, 'xpath'): continue
if not hasattr(item.data, 'xpath'):
continue
for body in item.data.xpath('//*[local-name()="body"]'):
base = {'font-family':['serif'], 'font-weight': '400',
'font-style':'normal', 'font-stretch':'normal'}
self.find_usage_in(body, base)
def elem_style(self, cls, inherited_style):
'''
Find the effective style for the given element.
'''
classes = cls.split()
style = inherited_style.copy()
for cls in classes:
style.update(self.style_rules.get(cls, {}))
wt = style.get('font-weight', None)
pwt = inherited_style.get('font-weight', '400')
if wt == 'bolder':
style['font-weight'] = {
'100':'400',
'200':'400',
'300':'400',
'400':'700',
'500':'700',
}.get(pwt, '900')
elif wt == 'lighter':
style['font-weight'] = {
'600':'400', '700':'400',
'800':'700', '900':'700'}.get(pwt, '100')
return style
def used_font(self, style):
'''
Given a style find the embedded font that matches it. Returns None if
no match is found ( can happen if not family matches).
no match is found (can happen if no family matches).
'''
ff = style.get('font-family', [])
lnames = {unicode(x).lower() for x in ff}
@ -280,7 +305,7 @@ class SubsetFonts(object):
return ans
def find_usage_in(self, elem, inherited_style):
style = self.elem_style(elem.get('class', '') or '', inherited_style)
style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style)
for child in elem:
self.find_usage_in(child, style)
font = self.used_font(style)
@ -290,3 +315,4 @@ class SubsetFonts(object):
font['chars'] |= chars

View File

@ -253,7 +253,7 @@ class PDFWriter(QObject):
return self.loop.exit(1)
try:
if not self.render_queue:
if self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'):
if self.opts.pdf_add_toc and self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'):
return self.render_inline_toc()
self.loop.exit()
else:

View File

@ -8,9 +8,8 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from future_builtins import map
from urlparse import urlparse, urlunparse
from urllib2 import quote, unquote
from urlparse import urlparse
from urllib2 import unquote
from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String
@ -84,10 +83,8 @@ class Links(object):
action = Dictionary({
'Type':Name('Action'), 'S':Name('URI'),
})
parts = (x.encode('utf-8') if isinstance(x, type(u'')) else
x for x in purl)
url = urlunparse(map(quote, map(unquote,
parts))).decode('ascii')
# Do not try to normalize/quote/unquote this URL as if it
# has a query part, it will get corrupted
action['URI'] = String(url)
annot['A'] = action
if 'A' in annot or 'Dest' in annot:

View File

@ -399,8 +399,7 @@ class EditMetadataAction(InterfaceAction):
if safe_merge:
if not confirm('<p>'+_(
'Book formats and metadata from the selected books '
'will be added to the <b>first selected book</b> (%s). '
'ISBN will <i>not</i> be merged.<br><br> '
'will be added to the <b>first selected book</b> (%s).<br> '
'The second and subsequently selected books will not '
'be deleted or changed.<br><br>'
'Please confirm you want to proceed.')%title
@ -413,7 +412,7 @@ class EditMetadataAction(InterfaceAction):
'Book formats from the selected books will be merged '
'into the <b>first selected book</b> (%s). '
'Metadata in the first selected book will not be changed. '
'Author, Title, ISBN and all other metadata will <i>not</i> be merged.<br><br>'
'Author, Title and all other metadata will <i>not</i> be merged.<br><br>'
'After merger the second and subsequently '
'selected books, with any metadata they have will be <b>deleted</b>. <br><br>'
'All book formats of the first selected book will be kept '
@ -427,8 +426,7 @@ class EditMetadataAction(InterfaceAction):
else:
if not confirm('<p>'+_(
'Book formats and metadata from the selected books will be merged '
'into the <b>first selected book</b> (%s). '
'ISBN will <i>not</i> be merged.<br><br>'
'into the <b>first selected book</b> (%s).<br><br>'
'After merger the second and '
'subsequently selected books will be <b>deleted</b>. <br><br>'
'All book formats of the first selected book will be kept '
@ -490,11 +488,13 @@ class EditMetadataAction(InterfaceAction):
def merge_metadata(self, dest_id, src_ids):
db = self.gui.library_view.model().db
dest_mi = db.get_metadata(dest_id, index_is_id=True)
merged_identifiers = db.get_identifiers(dest_id, index_is_id=True)
orig_dest_comments = dest_mi.comments
dest_cover = db.cover(dest_id, index_is_id=True)
had_orig_cover = bool(dest_cover)
for src_id in src_ids:
src_mi = db.get_metadata(src_id, index_is_id=True)
if src_mi.comments and orig_dest_comments != src_mi.comments:
if not dest_mi.comments:
dest_mi.comments = src_mi.comments
@ -523,7 +523,15 @@ class EditMetadataAction(InterfaceAction):
if not dest_mi.series:
dest_mi.series = src_mi.series
dest_mi.series_index = src_mi.series_index
src_identifiers = db.get_identifiers(src_id, index_is_id=True)
src_identifiers.update(merged_identifiers)
merged_identifiers = src_identifiers.copy()
if merged_identifiers:
dest_mi.set_identifiers(merged_identifiers)
db.set_metadata(dest_id, dest_mi, ignore_errors=False)
if not had_orig_cover and dest_cover:
db.set_cover(dest_id, dest_cover)

View File

@ -45,6 +45,7 @@ class Polish(QDialog): # {{{
ORIGINAL_* format before running it.</p>''')
),
'embed':_('<h3>Embed referenced fonts</h3>%s')%HELP['embed'],
'subset':_('<h3>Subsetting fonts</h3>%s')%HELP['subset'],
'smarten_punctuation':
@ -75,6 +76,7 @@ class Polish(QDialog): # {{{
count = 0
self.all_actions = OrderedDict([
('embed', _('&Embed all referenced fonts')),
('subset', _('&Subset all embedded fonts')),
('smarten_punctuation', _('Smarten &punctuation')),
('metadata', _('Update &metadata in the book files')),

View File

@ -32,7 +32,7 @@ class LookAndFeelWidget(Widget, Ui_Form):
Widget.__init__(self, parent,
['change_justification', 'extra_css', 'base_font_size',
'font_size_mapping', 'line_height', 'minimum_line_height',
'embed_font_family', 'subset_embedded_fonts',
'embed_font_family', 'embed_all_fonts', 'subset_embedded_fonts',
'smarten_punctuation', 'unsmarten_punctuation',
'disable_font_rescaling', 'insert_blank_line',
'remove_paragraph_spacing',

View File

@ -14,6 +14,70 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="12" column="1" colspan="2">
<widget class="QCheckBox" name="opt_keep_ligatures">
<property name="text">
<string>Keep &amp;ligatures</string>
</property>
</widget>
</item>
<item row="12" column="3">
<widget class="QCheckBox" name="opt_linearize_tables">
<property name="text">
<string>&amp;Linearize tables</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_18">
<property name="text">
<string>Base &amp;font size:</string>
</property>
<property name="buddy">
<cstring>opt_base_font_size</cstring>
</property>
</widget>
</item>
<item row="9" column="3">
<widget class="QLabel" name="label_7">
<property name="text">
<string>&amp;Line size:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>opt_insert_blank_line_size</cstring>
</property>
</widget>
</item>
<item row="5" column="1" colspan="2">
<widget class="EncodingComboBox" name="opt_input_encoding">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
<item row="8" column="0" colspan="2">
<widget class="QCheckBox" name="opt_remove_paragraph_spacing">
<property name="text">
<string>Remove &amp;spacing between paragraphs</string>
</property>
</widget>
</item>
<item row="8" column="3">
<widget class="QLabel" name="label_4">
<property name="text">
<string>&amp;Indent size:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>opt_remove_paragraph_spacing_indent_size</cstring>
</property>
</widget>
</item>
<item row="3" column="4">
<widget class="QDoubleSpinBox" name="opt_line_height">
<property name="suffix">
@ -24,6 +88,57 @@
</property>
</widget>
</item>
<item row="9" column="0" colspan="2">
<widget class="QCheckBox" name="opt_insert_blank_line">
<property name="text">
<string>Insert &amp;blank line between paragraphs</string>
</property>
</widget>
</item>
<item row="9" column="4">
<widget class="QDoubleSpinBox" name="opt_insert_blank_line_size">
<property name="suffix">
<string> em</string>
</property>
<property name="decimals">
<number>1</number>
</property>
</widget>
</item>
<item row="10" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Text &amp;justification:</string>
</property>
<property name="buddy">
<cstring>opt_change_justification</cstring>
</property>
</widget>
</item>
<item row="10" column="2" colspan="3">
<widget class="QComboBox" name="opt_change_justification"/>
</item>
<item row="11" column="0">
<widget class="QCheckBox" name="opt_smarten_punctuation">
<property name="text">
<string>Smarten &amp;punctuation</string>
</property>
</widget>
</item>
<item row="11" column="1" colspan="4">
<widget class="QCheckBox" name="opt_asciiize">
<property name="text">
<string>&amp;Transliterate unicode characters to ASCII</string>
</property>
</widget>
</item>
<item row="12" column="0">
<widget class="QCheckBox" name="opt_unsmarten_punctuation">
<property name="text">
<string>&amp;UnSmarten punctuation</string>
</property>
</widget>
</item>
<item row="3" column="3">
<widget class="QLabel" name="label">
<property name="text">
@ -44,51 +159,6 @@
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QDoubleSpinBox" name="opt_minimum_line_height">
<property name="suffix">
<string> %</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>900.000000000000000</double>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QDoubleSpinBox" name="opt_base_font_size">
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>0.000000000000000</double>
</property>
<property name="maximum">
<double>50.000000000000000</double>
</property>
<property name="singleStep">
<double>1.000000000000000</double>
</property>
<property name="value">
<double>15.000000000000000</double>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Font size &amp;key:</string>
</property>
<property name="buddy">
<cstring>opt_font_size_mapping</cstring>
</property>
</widget>
</item>
<item row="2" column="1" colspan="3">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
@ -133,56 +203,72 @@
</property>
</widget>
</item>
<item row="5" column="1" colspan="2">
<widget class="EncodingComboBox" name="opt_input_encoding">
<property name="editable">
<bool>true</bool>
</property>
</widget>
</item>
<item row="7" column="0" colspan="2">
<widget class="QCheckBox" name="opt_remove_paragraph_spacing">
<property name="text">
<string>Remove &amp;spacing between paragraphs</string>
</property>
</widget>
</item>
<item row="7" column="3">
<widget class="QLabel" name="label_4">
<property name="text">
<string>&amp;Indent size:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>opt_remove_paragraph_spacing_indent_size</cstring>
</property>
</widget>
</item>
<item row="7" column="4">
<widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
<property name="toolTip">
<string>&lt;p&gt;When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
</property>
<property name="specialValueText">
<string>No change</string>
</property>
<item row="3" column="1">
<widget class="QDoubleSpinBox" name="opt_minimum_line_height">
<property name="suffix">
<string> em</string>
<string> %</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="maximum">
<double>900.000000000000000</double>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QDoubleSpinBox" name="opt_base_font_size">
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>-0.100000000000000</double>
<double>0.000000000000000</double>
</property>
<property name="maximum">
<double>50.000000000000000</double>
</property>
<property name="singleStep">
<double>0.100000000000000</double>
<double>1.000000000000000</double>
</property>
<property name="value">
<double>15.000000000000000</double>
</property>
</widget>
</item>
<item row="12" column="0" colspan="5">
<item row="0" column="0" colspan="5">
<widget class="QCheckBox" name="opt_disable_font_rescaling">
<property name="text">
<string>&amp;Disable font size rescaling</string>
</property>
</widget>
</item>
<item row="6" column="1" colspan="2">
<widget class="FontFamilyChooser" name="opt_embed_font_family" native="true"/>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Font size &amp;key:</string>
</property>
<property name="buddy">
<cstring>opt_font_size_mapping</cstring>
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QLabel" name="label_10">
<property name="text">
<string>&amp;Embed font family:</string>
</property>
<property name="buddy">
<cstring>opt_embed_font_family</cstring>
</property>
</widget>
</item>
<item row="13" column="0" colspan="5">
<widget class="QTabWidget" name="tabWidget">
<property name="currentIndex">
<number>0</number>
@ -300,121 +386,42 @@
</widget>
</widget>
</item>
<item row="8" column="0" colspan="2">
<widget class="QCheckBox" name="opt_insert_blank_line">
<property name="text">
<string>Insert &amp;blank line between paragraphs</string>
</property>
</widget>
</item>
<item row="8" column="4">
<widget class="QDoubleSpinBox" name="opt_insert_blank_line_size">
<widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
<property name="toolTip">
<string>&lt;p&gt;When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
</property>
<property name="specialValueText">
<string>No change</string>
</property>
<property name="suffix">
<string> em</string>
</property>
<property name="decimals">
<number>1</number>
</property>
</widget>
</item>
<item row="9" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Text &amp;justification:</string>
<property name="minimum">
<double>-0.100000000000000</double>
</property>
<property name="buddy">
<cstring>opt_change_justification</cstring>
<property name="singleStep">
<double>0.100000000000000</double>
</property>
</widget>
</item>
<item row="9" column="2" colspan="3">
<widget class="QComboBox" name="opt_change_justification"/>
</item>
<item row="10" column="0">
<widget class="QCheckBox" name="opt_smarten_punctuation">
<property name="text">
<string>Smarten &amp;punctuation</string>
</property>
</widget>
</item>
<item row="10" column="1" colspan="4">
<widget class="QCheckBox" name="opt_asciiize">
<property name="text">
<string>&amp;Transliterate unicode characters to ASCII</string>
</property>
</widget>
</item>
<item row="11" column="0">
<widget class="QCheckBox" name="opt_unsmarten_punctuation">
<property name="text">
<string>&amp;UnSmarten punctuation</string>
</property>
</widget>
</item>
<item row="11" column="1" colspan="2">
<widget class="QCheckBox" name="opt_keep_ligatures">
<property name="text">
<string>Keep &amp;ligatures</string>
</property>
</widget>
</item>
<item row="11" column="3">
<widget class="QCheckBox" name="opt_linearize_tables">
<property name="text">
<string>&amp;Linearize tables</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_18">
<property name="text">
<string>Base &amp;font size:</string>
</property>
<property name="buddy">
<cstring>opt_base_font_size</cstring>
</property>
</widget>
</item>
<item row="8" column="3">
<widget class="QLabel" name="label_7">
<property name="text">
<string>&amp;Line size:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>opt_insert_blank_line_size</cstring>
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QLabel" name="label_10">
<property name="text">
<string>&amp;Embed font family:</string>
</property>
<property name="buddy">
<cstring>opt_embed_font_family</cstring>
</property>
</widget>
</item>
<item row="0" column="0" colspan="5">
<widget class="QCheckBox" name="opt_disable_font_rescaling">
<property name="text">
<string>&amp;Disable font size rescaling</string>
</property>
</widget>
</item>
<item row="6" column="1" colspan="2">
<widget class="FontFamilyChooser" name="opt_embed_font_family" native="true"/>
</item>
<item row="6" column="3" colspan="2">
<item row="7" column="3">
<widget class="QCheckBox" name="opt_subset_embedded_fonts">
<property name="text">
<string>&amp;Subset all embedded fonts</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="3">
<widget class="QCheckBox" name="opt_embed_all_fonts">
<property name="text">
<string>&amp;Embed referenced fonts</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -92,7 +92,11 @@ class Sendmail(object):
raise worker.exception
def sendmail(self, attachment, aname, to, subject, text, log):
logged = False
while time.time() - self.last_send_time <= self.rate_limit:
if not logged and self.rate_limit > 0:
log('Waiting %s seconds before sending, to avoid being marked as spam.\nYou can control this delay via Preferences->Tweaks' % self.rate_limit)
logged = True
time.sleep(1)
try:
opts = email_config().parse()
@ -204,10 +208,10 @@ class EmailMixin(object): # {{{
if not components:
components = [mi.title]
subjects.append(os.path.join(*components))
a = authors_to_string(mi.authors if mi.authors else \
a = authors_to_string(mi.authors if mi.authors else
[_('Unknown')])
texts.append(_('Attached, you will find the e-book') + \
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
texts.append(_('Attached, you will find the e-book') +
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' +
_('in the %s format.') %
os.path.splitext(f)[1][1:].upper())
prefix = ascii_filename(t+' - '+a)
@ -227,7 +231,7 @@ class EmailMixin(object): # {{{
auto = []
if _auto_ids != []:
for id in _auto_ids:
if specific_format == None:
if specific_format is None:
dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
formats = [f.lower() for f in (dbfmts.split(',') if dbfmts else
[])]
@ -298,8 +302,9 @@ class EmailMixin(object): # {{{
sent_mails = email_news(mi, remove,
get_fmts, self.email_sent, self.job_manager)
if sent_mails:
self.status_bar.show_message(_('Sent news to')+' '+\
self.status_bar.show_message(_('Sent news to')+' '+
', '.join(sent_mails), 3000)
# }}}

View File

@ -1006,7 +1006,7 @@ class FullFetch(QDialog): # {{{
l.addWidget(self.bb)
self.bb.rejected.connect(self.reject)
self.bb.accepted.connect(self.accept)
self.next_button = self.bb.addButton(_('Next'), self.bb.AcceptRole)
self.next_button = self.bb.addButton(_('Next'), self.bb.ActionRole)
self.next_button.setDefault(True)
self.next_button.setEnabled(False)
self.next_button.setIcon(QIcon(I('ok.png')))
@ -1019,7 +1019,7 @@ class FullFetch(QDialog): # {{{
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
self.log_button.clicked.connect(self.view_log)
self.log_button.setIcon(QIcon(I('debug.png')))
self.ok_button.setVisible(False)
self.ok_button.setEnabled(False)
self.prev_button.setVisible(False)
self.identify_widget = IdentifyWidget(self.log, self)
@ -1044,7 +1044,7 @@ class FullFetch(QDialog): # {{{
def book_selected(self, book, caches):
self.next_button.setVisible(False)
self.ok_button.setVisible(True)
self.ok_button.setEnabled(True)
self.prev_button.setVisible(True)
self.book = book
self.stack.setCurrentIndex(1)
@ -1055,8 +1055,9 @@ class FullFetch(QDialog): # {{{
def back_clicked(self):
self.next_button.setVisible(True)
self.ok_button.setVisible(False)
self.ok_button.setEnabled(False)
self.prev_button.setVisible(False)
self.next_button.setFocus()
self.stack.setCurrentIndex(0)
self.covers_widget.cancel()
self.covers_widget.reset_covers()
@ -1081,6 +1082,7 @@ class FullFetch(QDialog): # {{{
self.next_button.setEnabled(True)
def next_clicked(self, *args):
gprefs['metadata_single_gui_geom'] = bytearray(self.saveGeometry())
self.identify_widget.get_result()
def ok_clicked(self, *args):

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 4 # Needed for dynamic plugin loading
store_version = 5 # Needed for dynamic plugin loading
__license__ = 'GPL 3'
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@ -56,20 +56,20 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="book-item backgroundmix"]'):
for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/@href'))
id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
author = ', '.join(data.xpath('.//td[@class="va-t"]/h3/a/text()'))
price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/strong/span/text()'))
cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
author = ', '.join(data.xpath('.//h3[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()'))
price = re.sub('\.', ',', price)
formats = [ form[8:-4].split('.')[0] for form in data.xpath('.//p[3]/img/@src')]
formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()'))
s = SearchResult()
s.cover_url = 'http://woblink.com' + cover_url
@ -77,25 +77,16 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
s.author = author.strip()
s.price = price + ''
s.detail_item = id.strip()
s.formats = formats
if 'epub_drm' in formats:
if 'EPUB DRM' in formats:
s.drm = SearchResult.DRM_LOCKED
s.formats = 'EPUB'
counter -= 1
yield s
elif 'pdf' in formats:
s.drm = SearchResult.DRM_LOCKED
s.formats = 'PDF'
counter -= 1
yield s
else:
s.drm = SearchResult.DRM_UNLOCKED
if 'MOBI_nieb' in formats:
formats.remove('MOBI_nieb')
formats.append('MOBI')
s.formats = ', '.join(formats).upper()
counter -= 1
yield s

View File

@ -334,7 +334,7 @@ class TagBrowserWidget(QWidget): # {{{
search_layout = QHBoxLayout()
self._layout.addLayout(search_layout)
self.item_search = HistoryLineEdit(parent)
self.item_search.setMinimumContentsLength(10)
self.item_search.setMinimumContentsLength(5)
self.item_search.setSizeAdjustPolicy(self.item_search.AdjustToMinimumContentsLengthWithIcon)
try:
self.item_search.lineEdit().setPlaceholderText(

View File

@ -1500,8 +1500,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
stream = lopen(npath, 'rb')
format = check_ebook_format(stream, format)
retval = self.add_format(index, format, stream, replace=replace,
index_is_id=index_is_id, path=path, notify=notify)
id = index if index_is_id else self.id(index)
retval = self.add_format(id, format, stream, replace=replace,
index_is_id=True, path=path, notify=notify)
run_plugins_on_postimport(self, id, format)
return retval

View File

@ -70,9 +70,11 @@ def shorten_components_to(length, components, more_to_take=0):
else:
if x is components[-1]:
b, e = os.path.splitext(x)
if e == '.': e = ''
if e == '.':
e = ''
r = shorten_component(b, delta)+e
if r.startswith('.'): r = x[0]+r
if r.startswith('.'):
r = x[0]+r
else:
r = shorten_component(x, delta)
r = r.strip()
@ -115,7 +117,7 @@ def is_case_sensitive(path):
os.remove(f1)
return is_case_sensitive
def case_preserving_open_file(path, mode='wb', mkdir_mode=0777):
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
'''
Open the file pointed to by path with the specified mode. If any
directories in path do not exist, they are created. Returns the
@ -211,7 +213,8 @@ def samefile_windows(src, dst):
handles = []
def get_fileid(x):
if isbytestring(x): x = x.decode(filesystem_encoding)
if isbytestring(x):
x = x.decode(filesystem_encoding)
try:
h = win32file.CreateFile(x, 0, 0, None, win32file.OPEN_EXISTING,
win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
@ -254,6 +257,24 @@ def samefile(src, dst):
os.path.normcase(os.path.abspath(dst)))
return samestring
def windows_hardlink(src, dest):
import win32file, pywintypes
msg = u'Creating hardlink from %s to %s failed: %%s' % (src, dest)
try:
win32file.CreateHardLink(dest, src)
except pywintypes.error as e:
raise Exception(msg % e)
# We open and close dest, to ensure its directory entry is updated
# see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
h = win32file.CreateFile(
dest, 0, win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE | win32file.FILE_SHARE_DELETE,
None, win32file.OPEN_EXISTING, 0, None)
sz = win32file.GetFileSize(h)
win32file.CloseHandle(h)
if sz != os.path.getsize(src):
raise Exception(msg % ('hardlink size: %d not the same as source size' % sz))
class WindowsAtomicFolderMove(object):
'''
@ -270,14 +291,16 @@ class WindowsAtomicFolderMove(object):
import win32file, winerror
from pywintypes import error
if isbytestring(path): path = path.decode(filesystem_encoding)
if isbytestring(path):
path = path.decode(filesystem_encoding)
if not os.path.exists(path):
return
for x in os.listdir(path):
f = os.path.normcase(os.path.abspath(os.path.join(path, x)))
if not os.path.isfile(f): continue
if not os.path.isfile(f):
continue
try:
# Ensure the file is not read-only
win32file.SetFileAttributes(f, win32file.FILE_ATTRIBUTE_NORMAL)
@ -315,9 +338,7 @@ class WindowsAtomicFolderMove(object):
else:
raise ValueError(u'The file %r does not exist'%path)
try:
win32file.CreateHardLink(dest, path)
if os.path.getsize(dest) != os.path.getsize(path):
raise Exception('This apparently can happen on network shares. Sigh.')
windows_hardlink(path, dest)
return
except:
pass
@ -355,10 +376,8 @@ class WindowsAtomicFolderMove(object):
def hardlink_file(src, dest):
if iswindows:
import win32file
win32file.CreateHardLink(dest, src)
if os.path.getsize(dest) != os.path.getsize(src):
raise Exception('This apparently can happen on network shares. Sigh.')
windows_hardlink(src, dest)
return
os.link(src, dest)

View File

@ -62,7 +62,7 @@ def _parse(source, beautifulsoup, makeelement, **bsargs):
if makeelement is None:
makeelement = html.html_parser.makeelement
if 'convertEntities' not in bsargs:
bsargs['convertEntities'] = 'html'
bsargs['convertEntities'] = 'xhtml' # Changed by Kovid, otherwise &apos; is mangled, see https://bugs.launchpad.net/calibre/+bug/1197585
tree = beautifulsoup(source, **bsargs)
root = _convert_tree(tree, makeelement)
# from ET: wrap the document in a html root element, if necessary

View File

@ -128,6 +128,8 @@ def download_resources(browser, resource_cache, output_dir):
else:
img_counter += 1
ext = what(None, raw) or 'jpg'
if ext == 'jpeg':
ext = 'jpg' # Apparently Moon+ cannot handle .jpeg
href = 'img_%d.%s' % (img_counter, ext)
dest = os.path.join(output_dir, href)
resource_cache[h] = dest