From 8f30c17486701bf2cc29a84c98af5336de75fc56 Mon Sep 17 00:00:00 2001 From: GRiker Date: Mon, 24 Jun 2013 14:14:08 -0700 Subject: [PATCH 01/57] Fixed typo(?) in set_metadata() for touched files. --- src/calibre/utils/podofo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 13c12a9bb3..a0b5d85331 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -36,7 +36,7 @@ def set_metadata(stream, mi): except WorkerError as e: raise Exception('Failed to set PDF metadata: %s'%e.orig_tb) if touched: - with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: + with open(os.path.join(tdir, u'input.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) From 2096dce1cdcea621f314145f71d5ad5a7390a13e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 25 Jun 2013 13:09:51 +0530 Subject: [PATCH 02/57] Move User Manual and staging to the download server --- setup/hosting.py | 113 ++++++++++++++++++++++++++++++++++++++++++++--- setup/upload.py | 11 +++-- 2 files changed, 112 insertions(+), 12 deletions(-) diff --git a/setup/hosting.py b/setup/hosting.py index 76ab3992a0..1e78f4694d 100644 --- a/setup/hosting.py +++ b/setup/hosting.py @@ -7,16 +7,14 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib +import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib, shutil from argparse import ArgumentParser, FileType from subprocess import check_call from tempfile import NamedTemporaryFile from collections import OrderedDict -import mechanize -from lxml import html - def login_to_google(username, password): # {{{ + import mechanize br = mechanize.Browser() br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:9.0) Gecko/20100101 Firefox/9.0')] @@ -246,6 +244,7 @@ class GoogleCode(Base): # {{{ return login_to_google(self.username, self.gmail_password) def get_files_hosted_by_google_code(self): + from lxml import html self.info('Getting existing files in google code:', self.gc_project) raw = urllib2.urlopen(self.files_list).read() root = html.fromstring(raw) @@ -380,11 +379,111 @@ class SourceForge(Base): # {{{ # }}} +def generate_index(): # {{{ + os.chdir('/srv/download') + releases = set() + for x in os.listdir('.'): + if os.path.isdir(x) and '.' in x: + releases.add(tuple((int(y) for y in x.split('.')))) + rmap = OrderedDict() + for rnum in sorted(releases, reverse=True): + series = rnum[:2] if rnum[0] == 0 else rnum[:1] + if series not in rmap: + rmap[series] = [] + rmap[series].append(rnum) + + template = '''\n {title}

{title}

{msg}

{body} ''' # noqa + style = ''' + body { font-family: sans-serif; background-color: #eee; } + a { text-decoration: none; } + a:visited { color: blue } + a:hover { color: red } + ul { list-style-type: none } + li { padding-bottom: 1ex } + dd li { text-indent: 0; margin: 0 } + dd ul { padding: 0; margin: 0 } + dt { font-weight: bold } + dd { margin-bottom: 2ex } + ''' + body = [] + for series in rmap: + body.append('
  • {0}.x\xa0\xa0\xa0[{1} releases]
  • '.format( # noqa + '.'.join(map(type(''), series)), len(rmap[series]))) + body = '
      {0}
    '.format(' '.join(body)) + index = template.format(title='Previous calibre releases', style=style, msg='Choose a series of calibre releases', body=body) + with open('index.html', 'wb') as f: + f.write(index.encode('utf-8')) + + for series, releases in rmap.iteritems(): + sname = '.'.join(map(type(''), series)) + body = [ + '
  • {0}
  • '.format('.'.join(map(type(''), r))) + for r in releases] + body = '
      {0}
    '.format(' '.join(body)) + index = template.format(title='Previous calibre releases (%s.x)' % sname, style=style, + msg='Choose a calibre release', body=body) + with open('%s.html' % sname, 'wb') as f: + f.write(index.encode('utf-8')) + + for r in releases: + rname = '.'.join(map(type(''), r)) + os.chdir(rname) + try: + body = [] + files = os.listdir('.') + windows = [x for x in files if x.endswith('.msi')] + if windows: + windows = ['
  • {1}
  • '.format( + x, 'Windows 64-bit Installer' if '64bit' in x else 'Windows 32-bit Installer') + for x in windows] + body.append('
    Windows
      {0}
    '.format(' '.join(windows))) + portable = [x for x in files if '-portable-' in x] + if portable: + body.append('
    Calibre Portable
    {1}
    '.format( + portable[0], 'Calibre Portable Installer')) + osx = [x for x in files if x.endswith('.dmg')] + if osx: + body.append('
    Apple Mac
    {1}
    '.format( + osx[0], 'OS X Disk Image (.dmg)')) + linux = [x for x in files if x.endswith('.bz2')] + if linux: + linux = ['
  • {1}
  • '.format( + x, 'Linux 64-bit binary' if 'x86_64' in x else 'Linux 32-bit binary') + for x in linux] + body.append('
    Linux
      {0}
    '.format(' '.join(linux))) + source = [x for x in files if x.endswith('.xz') or x.endswith('.gz')] + if source: + body.append('
    Source Code
    {1}
    '.format( + source[0], 'Source code (all platforms)')) + + body = '
    {0}
    '.format(''.join(body)) + index = template.format(title='calibre release (%s)' % rname, style=style, + msg='', body=body) + with open('index.html', 'wb') as f: + f.write(index.encode('utf-8')) + finally: + os.chdir('..') + +# }}} + def upload_to_servers(files, version): # {{{ - for server, rdir in {'files':'/usr/share/nginx/html'}.iteritems(): + base = '/srv/download/' + dest = os.path.join(base, version) + if not os.path.exists(dest): + os.mkdir(dest) + for src in files: + shutil.copyfile(src, os.path.join(dest, os.path.basename(src))) + generate_index() + + for server, rdir in {'files':'/srv/download/'}.iteritems(): print('Uploading to server:', server) server = '%s.calibre-ebook.com' % server - rdir = '%s/%s/' % (rdir, version) + # Copy the generated index files + print ('Copying generated index') + check_call(['rsync', '-hzr', '-e', 'ssh -x', '--include', '*.html', + '--filter', '-! */', base, 'root@%s:%s' % (server, rdir)]) + # Copy the release files + rdir = '%s%s/' % (rdir, version) for x in files: start = time.time() print ('Uploading', x) @@ -400,6 +499,7 @@ def upload_to_servers(files, version): # {{{ else: break print ('Uploaded in', int(time.time() - start), 'seconds\n\n') + # }}} def upload_to_dbs(files, version): # {{{ @@ -530,3 +630,4 @@ if __name__ == '__main__': # }}} + diff --git a/setup/upload.py b/setup/upload.py index 8a4e467dd0..639a2e98d5 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -19,10 +19,9 @@ from setup import Command, __version__, installer_name, __appname__ PREFIX = "/var/www/calibre-ebook.com" DOWNLOADS = PREFIX+"/htdocs/downloads" BETAS = DOWNLOADS +'/betas' -USER_MANUAL = '/var/www/localhost/htdocs/' HTML2LRF = "calibre/ebooks/lrf/html/demo" TXT2LRF = "src/calibre/ebooks/lrf/txt/demo" -STAGING_HOST = '67.207.135.179' +STAGING_HOST = 'download.calibre-ebook.com' STAGING_USER = 'root' STAGING_DIR = '/root/staging' @@ -141,8 +140,8 @@ class UploadInstallers(Command): # {{{ os.mkdir(backup) try: self.upload_to_staging(tdir, backup, files) - self.upload_to_sourceforge() self.upload_to_calibre() + self.upload_to_sourceforge() self.upload_to_dbs() # self.upload_to_google(opts.replace) finally: @@ -219,9 +218,9 @@ class UploadUserManual(Command): # {{{ for x in glob.glob(self.j(path, '*')): self.build_plugin_example(x) - check_call(' '.join(['rsync', '-z', '-r', '--progress', - 'manual/.build/html/', - 'bugs:%s'%USER_MANUAL]), shell=True) + for host in ('download', 'files'): + check_call(' '.join(['rsync', '-z', '-r', '--progress', + 'manual/.build/html/', '%s:/srv/manual/' % host]), shell=True) # }}} class UploadDemo(Command): # {{{ From e4876b579eba58402b507b58557a8c92db5037f4 Mon Sep 17 00:00:00 2001 From: Hakan Tandogan Date: Tue, 25 Jun 2013 19:47:48 +0200 Subject: [PATCH 03/57] On book merge, merge identifiers as well. In case of conflict, the target wins. --- src/calibre/gui2/actions/edit_metadata.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index e5a9bfbc7d..84b1d367c6 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -399,8 +399,7 @@ class EditMetadataAction(InterfaceAction): if safe_merge: if not confirm('

    '+_( 'Book formats and metadata from the selected books ' - 'will be added to the first selected book (%s). ' - 'ISBN will not be merged.

    ' + 'will be added to the first selected book (%s).
    ' 'The second and subsequently selected books will not ' 'be deleted or changed.

    ' 'Please confirm you want to proceed.')%title @@ -413,7 +412,7 @@ class EditMetadataAction(InterfaceAction): 'Book formats from the selected books will be merged ' 'into the first selected book (%s). ' 'Metadata in the first selected book will not be changed. ' - 'Author, Title, ISBN and all other metadata will not be merged.

    ' + 'Author, Title and all other metadata will not be merged.

    ' 'After merger the second and subsequently ' 'selected books, with any metadata they have will be deleted.

    ' 'All book formats of the first selected book will be kept ' @@ -427,8 +426,7 @@ class EditMetadataAction(InterfaceAction): else: if not confirm('

    '+_( 'Book formats and metadata from the selected books will be merged ' - 'into the first selected book (%s). ' - 'ISBN will not be merged.

    ' + 'into the first selected book (%s).
    ' 'After merger the second and ' 'subsequently selected books will be deleted.

    ' 'All book formats of the first selected book will be kept ' @@ -490,11 +488,13 @@ class EditMetadataAction(InterfaceAction): def merge_metadata(self, dest_id, src_ids): db = self.gui.library_view.model().db dest_mi = db.get_metadata(dest_id, index_is_id=True) + merged_identifiers = db.get_identifiers(dest_id, index_is_id=True) orig_dest_comments = dest_mi.comments dest_cover = db.cover(dest_id, index_is_id=True) had_orig_cover = bool(dest_cover) for src_id in src_ids: src_mi = db.get_metadata(src_id, index_is_id=True) + if src_mi.comments and orig_dest_comments != src_mi.comments: if not dest_mi.comments: dest_mi.comments = src_mi.comments @@ -523,7 +523,15 @@ class EditMetadataAction(InterfaceAction): if not dest_mi.series: dest_mi.series = src_mi.series dest_mi.series_index = src_mi.series_index + + src_identifiers = db.get_identifiers(src_id, index_is_id=True) + src_identifiers.update(merged_identifiers) + merged_identifiers = src_identifiers.copy() + db.set_metadata(dest_id, dest_mi, ignore_errors=False) + + db.set_identifiers(dest_id, merged_identifiers) + if not had_orig_cover and dest_cover: db.set_cover(dest_id, dest_cover) From 7b6a742f2542ba5265898785a5dace388c11e5f5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2013 00:07:00 +0530 Subject: [PATCH 04/57] ... --- src/calibre/gui2/actions/edit_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index babd690384..729de33c7f 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -426,7 +426,7 @@ class EditMetadataAction(InterfaceAction): else: if not confirm('

    '+_( 'Book formats and metadata from the selected books will be merged ' - 'into the first selected book (%s).
    ' + 'into the first selected book (%s).

    ' 'After merger the second and ' 'subsequently selected books will be deleted.

    ' 'All book formats of the first selected book will be kept ' From 30cea5df3a8f5b3c125912854b78699f9cbd4219 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2013 10:40:00 +0530 Subject: [PATCH 05/57] ... --- src/calibre/web/fetch/javascript.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/web/fetch/javascript.py b/src/calibre/web/fetch/javascript.py index 56460c18bf..6e9ef86ff1 100644 --- a/src/calibre/web/fetch/javascript.py +++ b/src/calibre/web/fetch/javascript.py @@ -128,6 +128,8 @@ def download_resources(browser, resource_cache, output_dir): else: img_counter += 1 ext = what(None, raw) or 'jpg' + if ext == 'jpeg': + ext = 'jpg' # Apparently Moon+ cannot handle .jpeg href = 'img_%d.%s' % (img_counter, ext) dest = os.path.join(output_dir, href) resource_cache[h] = dest From 03452d2a038873d8df345512c7433019ada7efa2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2013 14:59:46 +0530 Subject: [PATCH 06/57] Conversion: Add option to embed all referenced fonts Conversion: Add an option to embed all fonts that are referenced in the input document but are not already embedded. This will search your system for the referenced font, and if found, the font will be embedded. Only works if the output format supports font embedding (for example: EPUB or AZW3). --- src/calibre/ebooks/conversion/cli.py | 5 +- src/calibre/ebooks/conversion/plumber.py | 17 + .../ebooks/oeb/transforms/embed_fonts.py | 233 +++++++++++ src/calibre/ebooks/oeb/transforms/flatcss.py | 2 +- src/calibre/ebooks/oeb/transforms/subset.py | 208 +++++----- src/calibre/gui2/convert/look_and_feel.py | 2 +- src/calibre/gui2/convert/look_and_feel.ui | 371 +++++++++--------- 7 files changed, 561 insertions(+), 277 deletions(-) create mode 100644 src/calibre/ebooks/oeb/transforms/embed_fonts.py diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index f2e5f4e3c9..a0abebc5fe 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -136,7 +136,7 @@ def add_pipeline_options(parser, plumber): [ 'base_font_size', 'disable_font_rescaling', 'font_size_mapping', 'embed_font_family', - 'subset_embedded_fonts', + 'subset_embedded_fonts', 'embed_all_fonts', 'line_height', 'minimum_line_height', 'linearize_tables', 'extra_css', 'filter_css', @@ -320,7 +320,7 @@ def main(args=sys.argv): opts.search_replace = read_sr_patterns(opts.search_replace, log) recommendations = [(n.dest, getattr(opts, n.dest), - OptionRecommendation.HIGH) \ + OptionRecommendation.HIGH) for n in parser.options_iter() if n.dest] plumber.merge_ui_recommendations(recommendations) @@ -342,3 +342,4 @@ def main(args=sys.argv): if __name__ == '__main__': sys.exit(main()) + diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 1f459229c8..a96574e904 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -205,6 +205,16 @@ OptionRecommendation(name='embed_font_family', 'with some output formats, principally EPUB and AZW3.') ), +OptionRecommendation(name='embed_all_fonts', + recommended_value=False, level=OptionRecommendation.LOW, + help=_( + 'Embed every font that is referenced in the input document ' + 'but not already embedded. This will search your system for the ' + 'fonts, and if found, they will be embedded. Embedding will only work ' + 'if the format you are converting to supports embedded fonts, such as ' + 'EPUB, AZW3 or PDF.' + )), + OptionRecommendation(name='subset_embedded_fonts', recommended_value=False, level=OptionRecommendation.LOW, help=_( @@ -965,6 +975,9 @@ OptionRecommendation(name='search_replace', if self.for_regex_wizard and hasattr(self.opts, 'no_process'): self.opts.no_process = True self.flush() + if self.opts.embed_all_fonts or self.opts.embed_font_family: + # Start the threaded font scanner now, for performance + from calibre.utils.fonts.scanner import font_scanner # noqa import cssutils, logging cssutils.log.setLevel(logging.WARN) get_types_map() # Ensure the mimetypes module is intialized @@ -1129,6 +1142,10 @@ OptionRecommendation(name='search_replace', RemoveFakeMargins()(self.oeb, self.log, self.opts) RemoveAdobeMargins()(self.oeb, self.log, self.opts) + if self.opts.embed_all_fonts: + from calibre.ebooks.oeb.transforms.embed_fonts import EmbedFonts + EmbedFonts()(self.oeb, self.log, self.opts) + if self.opts.subset_embedded_fonts and self.output_plugin.file_type != 'pdf': from calibre.ebooks.oeb.transforms.subset import SubsetFonts SubsetFonts()(self.oeb, self.log, self.opts) diff --git a/src/calibre/ebooks/oeb/transforms/embed_fonts.py b/src/calibre/ebooks/oeb/transforms/embed_fonts.py new file mode 100644 index 0000000000..027b8af1de --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/embed_fonts.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import logging +from collections import defaultdict + +import cssutils +from lxml import etree + +from calibre import guess_type +from calibre.ebooks.oeb.base import XPath, CSS_MIME, XHTML +from calibre.ebooks.oeb.transforms.subset import get_font_properties, find_font_face_rules, elem_style +from calibre.utils.filenames import ascii_filename +from calibre.utils.fonts.scanner import font_scanner, NoFonts + +def used_font(style, embedded_fonts): + ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in { + 'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}] + if not ff: + return False, None + lnames = {unicode(x).lower() for x in ff} + + matching_set = [] + + # Filter on font-family + for ef in embedded_fonts: + flnames = {x.lower() for x in ef.get('font-family', [])} + if not lnames.intersection(flnames): + continue + matching_set.append(ef) + if not matching_set: + return True, None + + # Filter on font-stretch + widths = {x:i for i, x in enumerate(('ultra-condensed', + 'extra-condensed', 'condensed', 'semi-condensed', 'normal', + 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded' + ))} + + width = widths[style.get('font-stretch', 'normal')] + for f in matching_set: + f['width'] = widths[style.get('font-stretch', 'normal')] + + min_dist = min(abs(width-f['width']) for f in matching_set) + if min_dist > 0: + return True, None + nearest = [f for f in matching_set if abs(width-f['width']) == + min_dist] + if width <= 4: + lmatches = [f for f in nearest if f['width'] <= width] + else: + lmatches = [f for f in nearest if f['width'] >= width] + matching_set = (lmatches or nearest) + + # Filter on font-style + fs = style.get('font-style', 'normal') + matching_set = [f for f in matching_set if f.get('font-style', 'normal') == fs] + + # Filter on font weight + fw = int(style.get('font-weight', '400')) + matching_set = [f for f in matching_set if f.get('weight', 400) == fw] + + if not matching_set: + return True, None + return True, matching_set[0] + + +class EmbedFonts(object): + + ''' + Embed all referenced fonts, if found on system. Must be called after CSS flattening. + ''' + + def __call__(self, oeb, log, opts): + self.oeb, self.log, self.opts = oeb, log, opts + self.sheet_cache = {} + self.find_style_rules() + self.find_embedded_fonts() + self.parser = cssutils.CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) + self.warned = set() + self.warned2 = set() + + for item in oeb.spine: + if not hasattr(item.data, 'xpath'): + continue + sheets = [] + for href in XPath('//h:link[@href and @type="text/css"]/@href')(item.data): + sheet = self.oeb.manifest.hrefs.get(item.abshref(href), None) + if sheet is not None: + sheets.append(sheet) + if sheets: + self.process_item(item, sheets) + + def find_embedded_fonts(self): + ''' + Find all @font-face rules and extract the relevant info from them. + ''' + self.embedded_fonts = [] + for item in self.oeb.manifest: + if not hasattr(item.data, 'cssRules'): + continue + self.embedded_fonts.extend(find_font_face_rules(item, self.oeb)) + + def find_style_rules(self): + ''' + Extract all font related style information from all stylesheets into a + dict mapping classes to font properties specified by that class. All + the heavy lifting has already been done by the CSS flattening code. + ''' + rules = defaultdict(dict) + for item in self.oeb.manifest: + if not hasattr(item.data, 'cssRules'): + continue + for i, rule in enumerate(item.data.cssRules): + if rule.type != rule.STYLE_RULE: + continue + props = {k:v for k,v in + get_font_properties(rule).iteritems() if v} + if not props: + continue + for sel in rule.selectorList: + sel = sel.selectorText + if sel and sel.startswith('.'): + # We dont care about pseudo-selectors as the worst that + # can happen is some extra characters will remain in + # the font + sel = sel.partition(':')[0] + rules[sel[1:]].update(props) + + self.style_rules = dict(rules) + + def get_page_sheet(self): + if self.page_sheet is None: + manifest = self.oeb.manifest + id_, href = manifest.generate('page_css', 'page_styles.css') + self.page_sheet = manifest.add(id_, href, CSS_MIME, data=self.parser.parseString('', validate=False)) + head = self.current_item.xpath('//*[local-name()="head"][1]') + if head: + href = self.current_item.relhref(href) + l = etree.SubElement(head[0], XHTML('link'), + rel='stylesheet', type=CSS_MIME, href=href) + l.tail = '\n' + else: + self.log.warn('No cannot embed font rules') + return self.page_sheet + + def process_item(self, item, sheets): + ff_rules = [] + self.current_item = item + self.page_sheet = None + for sheet in sheets: + if 'page_css' in sheet.id: + ff_rules.extend(find_font_face_rules(sheet, self.oeb)) + self.page_sheet = sheet + + base = {'font-family':['serif'], 'font-weight': '400', + 'font-style':'normal', 'font-stretch':'normal'} + + for body in item.data.xpath('//*[local-name()="body"]'): + self.find_usage_in(body, base, ff_rules) + + def find_usage_in(self, elem, inherited_style, ff_rules): + style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style) + for child in elem: + self.find_usage_in(child, style, ff_rules) + has_font, existing = used_font(style, ff_rules) + if not has_font: + return + if existing is None: + in_book = used_font(style, self.embedded_fonts)[1] + if in_book is None: + # Try to find the font in the system + added = self.embed_font(style) + if added is not None: + ff_rules.append(added) + self.embedded_fonts.append(added) + else: + # TODO: Create a page rule from the book rule (cannot use it + # directly as paths might be different) + item = in_book['item'] + sheet = self.parser.parseString(in_book['rule'].cssText, validate=False) + rule = sheet.cssRules[0] + page_sheet = self.get_page_sheet() + href = page_sheet.abshref(item.href) + rule.style.setProperty('src', 'url(%s)' % href) + ff_rules.append(find_font_face_rules(sheet, self.oeb)[0]) + page_sheet.data.insertRule(rule, len(page_sheet.data.cssRules)) + + def embed_font(self, style): + ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in { + 'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}] + if not ff: + return + ff = ff[0] + if ff in self.warned: + return + try: + fonts = font_scanner.fonts_for_family(ff) + except NoFonts: + self.log.warn('Failed to find fonts for family:', ff, 'not embedding') + self.warned.add(ff) + return + try: + weight = int(style.get('font-weight', '400')) + except (ValueError, TypeError, AttributeError): + w = style['font-weight'] + if w not in self.warned2: + self.log.warn('Invalid weight in font style: %r' % w) + self.warned2.add(w) + return + for f in fonts: + if f['weight'] == weight and f['font-style'] == style.get('font-style', 'normal') and f['font-stretch'] == style.get('font-stretch', 'normal'): + self.log('Embedding font %s from %s' % (f['full_name'], f['path'])) + data = font_scanner.get_font_data(f) + name = f['full_name'] + ext = 'otf' if f['is_otf'] else 'ttf' + name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '') + fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext)) + item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data) + item.unload_data_from_memory() + page_sheet = self.get_page_sheet() + href = page_sheet.relhref(item.href) + css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % ( + f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href) + sheet = self.parser.parseString(css, validate=False) + page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules)) + return find_font_face_rules(sheet, self.oeb)[0] + diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index dd2d20333d..9c08934938 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -194,7 +194,7 @@ class CSSFlattener(object): for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate(id=u'font', - href=u'%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) + href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=font_scanner.get_font_data(font)) diff --git a/src/calibre/ebooks/oeb/transforms/subset.py b/src/calibre/ebooks/oeb/transforms/subset.py index 744e37b193..96170bd49c 100644 --- a/src/calibre/ebooks/oeb/transforms/subset.py +++ b/src/calibre/ebooks/oeb/transforms/subset.py @@ -12,6 +12,111 @@ from collections import defaultdict from calibre.ebooks.oeb.base import urlnormalize from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont +def get_font_properties(rule, default=None): + ''' + Given a CSS rule, extract normalized font properties from + it. Note that shorthand font property should already have been expanded + by the CSS flattening code. + ''' + props = {} + s = rule.style + for q in ('font-family', 'src', 'font-weight', 'font-stretch', + 'font-style'): + g = 'uri' if q == 'src' else 'value' + try: + val = s.getProperty(q).propertyValue[0] + val = getattr(val, g) + if q == 'font-family': + val = [x.value for x in s.getProperty(q).propertyValue] + if val and val[0] == 'inherit': + val = None + except (IndexError, KeyError, AttributeError, TypeError, ValueError): + val = None if q in {'src', 'font-family'} else default + if q in {'font-weight', 'font-stretch', 'font-style'}: + val = unicode(val).lower() if (val or val == 0) else val + if val == 'inherit': + val = default + if q == 'font-weight': + val = {'normal':'400', 'bold':'700'}.get(val, val) + if val not in {'100', '200', '300', '400', '500', '600', '700', + '800', '900', 'bolder', 'lighter'}: + val = default + if val == 'normal': + val = '400' + elif q == 'font-style': + if val not in {'normal', 'italic', 'oblique'}: + val = default + elif q == 'font-stretch': + if val not in {'normal', 'ultra-condensed', 'extra-condensed', + 'condensed', 'semi-condensed', 'semi-expanded', + 'expanded', 'extra-expanded', 'ultra-expanded'}: + val = default + props[q] = val + return props + + +def find_font_face_rules(sheet, oeb): + ''' + Find all @font-face rules in the given sheet and extract the relevant info from them. + sheet can be either a ManifestItem or a CSSStyleSheet. + ''' + ans = [] + try: + rules = sheet.data.cssRules + except AttributeError: + rules = sheet.cssRules + + for i, rule in enumerate(rules): + if rule.type != rule.FONT_FACE_RULE: + continue + props = get_font_properties(rule, default='normal') + if not props['font-family'] or not props['src']: + continue + + try: + path = sheet.abshref(props['src']) + except AttributeError: + path = props['src'] + ff = oeb.manifest.hrefs.get(urlnormalize(path), None) + if not ff: + continue + props['item'] = ff + if props['font-weight'] in {'bolder', 'lighter'}: + props['font-weight'] = '400' + props['weight'] = int(props['font-weight']) + props['rule'] = rule + props['chars'] = set() + ans.append(props) + + return ans + + +def elem_style(style_rules, cls, inherited_style): + ''' + Find the effective style for the given element. + ''' + classes = cls.split() + style = inherited_style.copy() + for cls in classes: + style.update(style_rules.get(cls, {})) + wt = style.get('font-weight', None) + pwt = inherited_style.get('font-weight', '400') + if wt == 'bolder': + style['font-weight'] = { + '100':'400', + '200':'400', + '300':'400', + '400':'700', + '500':'700', + }.get(pwt, '900') + elif wt == 'lighter': + style['font-weight'] = { + '600':'400', '700':'400', + '800':'700', '900':'700'}.get(pwt, '100') + + return style + + class SubsetFonts(object): ''' @@ -76,72 +181,15 @@ class SubsetFonts(object): self.log('Reduced total font size to %.1f%% of original'% (totals[0]/totals[1] * 100)) - def get_font_properties(self, rule, default=None): - ''' - Given a CSS rule, extract normalized font properties from - it. Note that shorthand font property should already have been expanded - by the CSS flattening code. - ''' - props = {} - s = rule.style - for q in ('font-family', 'src', 'font-weight', 'font-stretch', - 'font-style'): - g = 'uri' if q == 'src' else 'value' - try: - val = s.getProperty(q).propertyValue[0] - val = getattr(val, g) - if q == 'font-family': - val = [x.value for x in s.getProperty(q).propertyValue] - if val and val[0] == 'inherit': - val = None - except (IndexError, KeyError, AttributeError, TypeError, ValueError): - val = None if q in {'src', 'font-family'} else default - if q in {'font-weight', 'font-stretch', 'font-style'}: - val = unicode(val).lower() if (val or val == 0) else val - if val == 'inherit': - val = default - if q == 'font-weight': - val = {'normal':'400', 'bold':'700'}.get(val, val) - if val not in {'100', '200', '300', '400', '500', '600', '700', - '800', '900', 'bolder', 'lighter'}: - val = default - if val == 'normal': val = '400' - elif q == 'font-style': - if val not in {'normal', 'italic', 'oblique'}: - val = default - elif q == 'font-stretch': - if val not in { 'normal', 'ultra-condensed', 'extra-condensed', - 'condensed', 'semi-condensed', 'semi-expanded', - 'expanded', 'extra-expanded', 'ultra-expanded'}: - val = default - props[q] = val - return props - def find_embedded_fonts(self): ''' Find all @font-face rules and extract the relevant info from them. ''' self.embedded_fonts = [] for item in self.oeb.manifest: - if not hasattr(item.data, 'cssRules'): continue - for i, rule in enumerate(item.data.cssRules): - if rule.type != rule.FONT_FACE_RULE: - continue - props = self.get_font_properties(rule, default='normal') - if not props['font-family'] or not props['src']: - continue - - path = item.abshref(props['src']) - ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None) - if not ff: - continue - props['item'] = ff - if props['font-weight'] in {'bolder', 'lighter'}: - props['font-weight'] = '400' - props['weight'] = int(props['font-weight']) - props['chars'] = set() - props['rule'] = rule - self.embedded_fonts.append(props) + if not hasattr(item.data, 'cssRules'): + continue + self.embedded_fonts.extend(find_font_face_rules(item, self.oeb)) def find_style_rules(self): ''' @@ -151,12 +199,13 @@ class SubsetFonts(object): ''' rules = defaultdict(dict) for item in self.oeb.manifest: - if not hasattr(item.data, 'cssRules'): continue + if not hasattr(item.data, 'cssRules'): + continue for i, rule in enumerate(item.data.cssRules): if rule.type != rule.STYLE_RULE: continue props = {k:v for k,v in - self.get_font_properties(rule).iteritems() if v} + get_font_properties(rule).iteritems() if v} if not props: continue for sel in rule.selectorList: @@ -172,41 +221,17 @@ class SubsetFonts(object): def find_font_usage(self): for item in self.oeb.manifest: - if not hasattr(item.data, 'xpath'): continue + if not hasattr(item.data, 'xpath'): + continue for body in item.data.xpath('//*[local-name()="body"]'): base = {'font-family':['serif'], 'font-weight': '400', 'font-style':'normal', 'font-stretch':'normal'} self.find_usage_in(body, base) - def elem_style(self, cls, inherited_style): - ''' - Find the effective style for the given element. - ''' - classes = cls.split() - style = inherited_style.copy() - for cls in classes: - style.update(self.style_rules.get(cls, {})) - wt = style.get('font-weight', None) - pwt = inherited_style.get('font-weight', '400') - if wt == 'bolder': - style['font-weight'] = { - '100':'400', - '200':'400', - '300':'400', - '400':'700', - '500':'700', - }.get(pwt, '900') - elif wt == 'lighter': - style['font-weight'] = { - '600':'400', '700':'400', - '800':'700', '900':'700'}.get(pwt, '100') - - return style - def used_font(self, style): ''' Given a style find the embedded font that matches it. Returns None if - no match is found ( can happen if not family matches). + no match is found (can happen if no family matches). ''' ff = style.get('font-family', []) lnames = {unicode(x).lower() for x in ff} @@ -222,7 +247,7 @@ class SubsetFonts(object): return None # Filter on font-stretch - widths = {x:i for i, x in enumerate(( 'ultra-condensed', + widths = {x:i for i, x in enumerate(('ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed', 'normal', 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded' ))} @@ -280,7 +305,7 @@ class SubsetFonts(object): return ans def find_usage_in(self, elem, inherited_style): - style = self.elem_style(elem.get('class', '') or '', inherited_style) + style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style) for child in elem: self.find_usage_in(child, style) font = self.used_font(style) @@ -290,3 +315,4 @@ class SubsetFonts(object): font['chars'] |= chars + diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index 24ee288cc6..a3e364b9ca 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -32,7 +32,7 @@ class LookAndFeelWidget(Widget, Ui_Form): Widget.__init__(self, parent, ['change_justification', 'extra_css', 'base_font_size', 'font_size_mapping', 'line_height', 'minimum_line_height', - 'embed_font_family', 'subset_embedded_fonts', + 'embed_font_family', 'embed_all_fonts', 'subset_embedded_fonts', 'smarten_punctuation', 'unsmarten_punctuation', 'disable_font_rescaling', 'insert_blank_line', 'remove_paragraph_spacing', diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index 43736fb1f2..e9d9caeed7 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -14,6 +14,70 @@ Form + + + + Keep &ligatures + + + + + + + &Linearize tables + + + + + + + Base &font size: + + + opt_base_font_size + + + + + + + &Line size: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + opt_insert_blank_line_size + + + + + + + true + + + + + + + Remove &spacing between paragraphs + + + + + + + &Indent size: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + opt_remove_paragraph_spacing_indent_size + + + @@ -24,6 +88,57 @@ + + + + Insert &blank line between paragraphs + + + + + + + em + + + 1 + + + + + + + Text &justification: + + + opt_change_justification + + + + + + + + + + Smarten &punctuation + + + + + + + &Transliterate unicode characters to ASCII + + + + + + + &UnSmarten punctuation + + + @@ -44,51 +159,6 @@ - - - - % - - - 1 - - - 900.000000000000000 - - - - - - - pt - - - 1 - - - 0.000000000000000 - - - 50.000000000000000 - - - 1.000000000000000 - - - 15.000000000000000 - - - - - - - Font size &key: - - - opt_font_size_mapping - - - @@ -133,56 +203,72 @@ - - - - true - - - - - - - Remove &spacing between paragraphs - - - - - - - &Indent size: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - opt_remove_paragraph_spacing_indent_size - - - - - - - <p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent. - - - No change - + + - em + % + + + 1 + + + 900.000000000000000 + + + + + + + pt 1 - -0.100000000000000 + 0.000000000000000 + + + 50.000000000000000 - 0.100000000000000 + 1.000000000000000 + + + 15.000000000000000 - + + + + &Disable font size rescaling + + + + + + + + + + Font size &key: + + + opt_font_size_mapping + + + + + + + &Embed font family: + + + opt_embed_font_family + + + + 0 @@ -300,121 +386,42 @@ - - - - Insert &blank line between paragraphs - - - - + + + <p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent. + + + No change + em 1 - - - - - - Text &justification: + + -0.100000000000000 - - opt_change_justification + + 0.100000000000000 - - - - - - - Smarten &punctuation - - - - - - - &Transliterate unicode characters to ASCII - - - - - - - &UnSmarten punctuation - - - - - - - Keep &ligatures - - - - - - - &Linearize tables - - - - - - - Base &font size: - - - opt_base_font_size - - - - - - - &Line size: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - opt_insert_blank_line_size - - - - - - - &Embed font family: - - - opt_embed_font_family - - - - - - - &Disable font size rescaling - - - - - - - + &Subset all embedded fonts + + + + &Embed referenced fonts + + + From 63d133ea5c1a3ee8d4949e95ce9cf8e9e9c9d644 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2013 15:46:41 +0530 Subject: [PATCH 07/57] AZW3 Input: Add support for page-progression-direction AZW3 Input: Add support for the page-progression-direction that is used to indicate page turns should happen from right to left. The attribute is passed into EPUB when converting. Fixes #1194766 [Incorrect conversion japanese MOBI](https://bugs.launchpad.net/calibre/+bug/1194766) --- src/calibre/ebooks/metadata/opf2.py | 11 +++++++++++ src/calibre/ebooks/mobi/reader/mobi8.py | 12 +++++++++--- src/calibre/ebooks/mobi/utils.py | 11 ++++++++++- src/calibre/ebooks/oeb/base.py | 3 +++ src/calibre/ebooks/oeb/reader.py | 3 +++ 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index fb80cc8bfe..77e334dd3e 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1047,6 +1047,14 @@ class OPF(object): # {{{ if raw: return raw.rpartition(':')[-1] + @property + def page_progression_direction(self): + spine = self.XPath('descendant::*[re:match(name(), "spine", "i")][1]')(self.root) + if spine: + for k, v in spine[0].attrib.iteritems(): + if k == 'page-progression-direction' or k.endswith('}page-progression-direction'): + return v + def guess_cover(self): ''' Try to guess a cover. Needed for some old/badly formed OPF files. @@ -1185,6 +1193,7 @@ class OPFCreator(Metadata): ''' Metadata.__init__(self, title='', other=other) self.base_path = os.path.abspath(base_path) + self.page_progression_direction = None if self.application_id is None: self.application_id = str(uuid.uuid4()) if not isinstance(self.toc, TOC): @@ -1356,6 +1365,8 @@ class OPFCreator(Metadata): spine = E.spine() if self.toc is not None: spine.set('toc', 'ncx') + if self.page_progression_direction is not None: + spine.set('page-progression-direction', self.page_progression_direction) if self.spine is not None: for ref in self.spine: if ref.id is not None: diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index aff79d65c2..97d38a9660 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -20,7 +20,7 @@ from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup from calibre.ebooks.metadata.opf2 import Guide, OPFCreator from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.mobi.utils import read_font_record +from calibre.ebooks.mobi.utils import read_font_record, read_resc_record from calibre.ebooks.oeb.parse_utils import parse_html from calibre.ebooks.oeb.base import XPath, XHTML, xml2text from calibre.utils.imghdr import what @@ -65,6 +65,7 @@ class Mobi8Reader(object): self.mobi6_reader, self.log = mobi6_reader, log self.header = mobi6_reader.book_header self.encrypted_fonts = [] + self.resc_data = {} def __call__(self): self.mobi6_reader.check_for_drm() @@ -389,9 +390,11 @@ class Mobi8Reader(object): data = sec[0] typ = data[:4] href = None - if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', - b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}: + if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN', + b'FDST', b'DATP', b'AUDI', b'VIDE'}: pass # Ignore these records + elif typ == b'RESC': + self.resc_data = read_resc_record(data) elif typ == b'FONT': font = read_font_record(data) href = "fonts/%05d.%s" % (fname_idx, font['ext']) @@ -452,6 +455,9 @@ class Mobi8Reader(object): opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) opf.create_spine(spine) opf.set_toc(toc) + ppd = self.resc_data.get('page-progression-direction', None) + if ppd: + opf.page_progression_direction = ppd with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index e9bc4f669f..008b33a0ff 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import struct, string, zlib, os +import struct, string, zlib, os, re from collections import OrderedDict from io import BytesIO @@ -393,6 +393,15 @@ def mobify_image(data): data = im.export('gif') return data +def read_resc_record(data): + ans = {} + match = re.search(br''']*page-progression-direction=['"](.+?)['"]''', data) + if match is not None: + ppd = match.group(1).lower() + if ppd in {b'ltr', b'rtl'}: + ans['page-progression-direction'] = ppd.decode('ascii') + return ans + # Font records {{{ def read_font_record(data, extent=1040): ''' diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index d4b3a2b7ab..29fc27ee3f 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1210,6 +1210,7 @@ class Spine(object): def __init__(self, oeb): self.oeb = oeb self.items = [] + self.page_progression_direction = None def _linear(self, linear): if isinstance(linear, basestring): @@ -1896,4 +1897,6 @@ class OEBBook(object): attrib={'media-type': PAGE_MAP_MIME}) spine.attrib['page-map'] = id results[PAGE_MAP_MIME] = (href, self.pages.to_page_map()) + if self.spine.page_progression_direction in {'ltr', 'rtl'}: + spine.attrib['page-progression-direction'] = self.spine.page_progression_direction return results diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index eb7e2eca4c..cb10b4ccce 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -330,6 +330,9 @@ class OEBReader(object): if len(spine) == 0: raise OEBError("Spine is empty") self._spine_add_extra() + for val in xpath(opf, '/o2:package/o2:spine/@page-progression-direction'): + if val in {'ltr', 'rtl'}: + spine.page_progression_direction = val def _guide_from_opf(self, opf): guide = self.oeb.guide From f63f142618a503acd4d8597bc97117d69a93840b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 26 Jun 2013 16:55:12 +0530 Subject: [PATCH 08/57] PDF Output: Fix add ToC option not being used PDF Output: Fix Table of Contents being added tot he end of the PDF even without the Add Table of Contents option being enabled. Fixes #1194836 [When convert to PDF, it always create TOC at the end](https://bugs.launchpad.net/calibre/+bug/1194836) --- src/calibre/ebooks/pdf/render/from_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/render/from_html.py b/src/calibre/ebooks/pdf/render/from_html.py index 5b9f58e326..8ea1d8203e 100644 --- a/src/calibre/ebooks/pdf/render/from_html.py +++ b/src/calibre/ebooks/pdf/render/from_html.py @@ -253,7 +253,7 @@ class PDFWriter(QObject): return self.loop.exit(1) try: if not self.render_queue: - if self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'): + if self.opts.pdf_add_toc and self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'): return self.render_inline_toc() self.loop.exit() else: From 3743d26d35badcc978a93db0d3d2aa53eae032ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 10:14:10 +0530 Subject: [PATCH 09/57] Save dist file sizes for bandwidth calculation Also fix a typo in copying dist files to tdir and backup. --- setup/upload.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/setup/upload.py b/setup/upload.py index 639a2e98d5..dd59067c0c 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -134,6 +134,8 @@ class UploadInstallers(Command): # {{{ available = set(glob.glob('dist/*')) files = {x:installer_description(x) for x in all_possible.intersection(available)} + sizes = {os.path.basename(x):os.path.getsize(x) for x in files} + self.record_sizes(sizes) tdir = mkdtemp() backup = os.path.join('/mnt/external/calibre/%s' % __version__) if not os.path.exists(backup): @@ -147,6 +149,11 @@ class UploadInstallers(Command): # {{{ finally: shutil.rmtree(tdir, ignore_errors=True) + def record_sizes(self, sizes): + print ('\nRecording dist sizes') + args = ['%s:%s:%s' % (__version__, fname, size) for fname, size in sizes.iteritems()] + check_call(['ssh', 'divok', 'dist_sizes'] + args) + def upload_to_staging(self, tdir, backup, files): os.mkdir(tdir+'/dist') hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)), @@ -154,9 +161,9 @@ class UploadInstallers(Command): # {{{ shutil.copyfile(hosting, os.path.join(tdir, 'hosting.py')) for f in files: - for x in (tdir, backup): - dest = os.path.join(x, f) - shutil.copyfile(f, dest) + for x in (tdir+'/dist', backup): + dest = os.path.join(x, os.path.basename(f)) + shutil.copy2(f, x) os.chmod(dest, stat.S_IREAD|stat.S_IWRITE|stat.S_IRGRP|stat.S_IROTH) with open(os.path.join(tdir, 'fmap'), 'wb') as fo: From 87dda89378e8a95663897eba720b0ae04d958d7d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 12:41:46 +0530 Subject: [PATCH 10/57] Add notes on provisioning a file hosting server --- setup/file_hosting_servers.rst | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 setup/file_hosting_servers.rst diff --git a/setup/file_hosting_servers.rst b/setup/file_hosting_servers.rst new file mode 100644 index 0000000000..7121628744 --- /dev/null +++ b/setup/file_hosting_servers.rst @@ -0,0 +1,32 @@ +Provisioning a file hosting server +==================================== + +Create the ssh authorized keys file. + +Edit /etc/ssh/sshd_config and change PermitRootLogin to without-password. +Restart sshd. + +apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools +chsh -s /bin/zsh + +mkdir -p /root/staging /root/work/vim /srv/download /srv/manual + +scp .zshrc .vimrc server: +scp -r ~/work/vim/zsh-syntax-highlighting server:work/vim + +If the server has a backup hard-disk, mount it at /mnt/backup and edit /etc/fstab so that it is auto-mounted. +Then, add the following to crontab +@daily /usr/bin/rsync -ha /srv /mnt/backup +@daily /usr/bin/rsync -ha /etc /mnt/backup + +Nginx +------ + +Copy over /etc/nginx/sites-available/default from another file server. When +copying, remember to use cat instead of cp to preserve hardlinks (the file is a +hardlink to /etc/nginx/sites-enabled/default) + +rsync /srv from another file server + +service nginx start + From c3009256c498893370e0cc6f61bd018abe17a38e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 13:01:52 +0530 Subject: [PATCH 11/57] ToC Editor: Use filenames when generating from files ToC Editor: When generating a ToC from files, if the file has no text, do not skip it. Instead create an entry using the filename of the file. --- src/calibre/ebooks/oeb/polish/toc.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 8be23bdc38..a364da58f5 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -281,15 +281,18 @@ def find_text(node): def from_files(container): toc = TOC() - for spinepath in container.spine_items: + for i, spinepath in enumerate(container.spine_items): name = container.abspath_to_name(spinepath) root = container.parsed(name) body = XPath('//h:body')(root) if not body: continue text = find_text(body[0]) - if text: - toc.add(text, name) + if not text: + text = name.rpartition('/')[-1] + if i == 0 and text.rpartition('.')[0].lower() in {'titlepage', 'cover'}: + text = _('Cover') + toc.add(text, name) return toc def add_id(container, name, loc): From f8509fe8260e409c6e9f21d309d7ce8b9fd6a529 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 13:27:31 +0530 Subject: [PATCH 12/57] Log the wait before sending email When waiting before sending email, log the wait. Fixes #1195173 [Feature Request - Multiple books emailed to device](https://bugs.launchpad.net/calibre/+bug/1195173) --- src/calibre/gui2/email.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index 9ebb94b00a..9b077fa39f 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -92,7 +92,11 @@ class Sendmail(object): raise worker.exception def sendmail(self, attachment, aname, to, subject, text, log): + logged = False while time.time() - self.last_send_time <= self.rate_limit: + if not logged: + log('Waiting %s seconds before sending, to avoid being marked as spam.\nYou can control this delay via Preferences->Tweaks' % self.rate_limit) + logged = True time.sleep(1) try: opts = email_config().parse() From 86691f22a24c96061b1a78f1c332d58b9b52b6db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 13:28:06 +0530 Subject: [PATCH 13/57] ... --- src/calibre/gui2/email.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index 9b077fa39f..52da1909fe 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -94,7 +94,7 @@ class Sendmail(object): def sendmail(self, attachment, aname, to, subject, text, log): logged = False while time.time() - self.last_send_time <= self.rate_limit: - if not logged: + if not logged and self.rate_limit > 0: log('Waiting %s seconds before sending, to avoid being marked as spam.\nYou can control this delay via Preferences->Tweaks' % self.rate_limit) logged = True time.sleep(1) From 32fccdb9010d161f6e2acbb4b1d66cf99162f57b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 13:29:09 +0530 Subject: [PATCH 14/57] pep8 --- src/calibre/gui2/email.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index 52da1909fe..f8c7552437 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -32,7 +32,7 @@ class Worker(Thread): self.func, self.args = func, args def run(self): - #time.sleep(1000) + # time.sleep(1000) try: self.func(*self.args) except Exception as e: @@ -46,7 +46,7 @@ class Worker(Thread): class Sendmail(object): MAX_RETRIES = 1 - TIMEOUT = 15 * 60 # seconds + TIMEOUT = 15 * 60 # seconds def __init__(self): self.calculate_rate_limit() @@ -166,7 +166,7 @@ def email_news(mi, remove, get_fmts, done, job_manager): plugboard_email_value = 'email' plugboard_email_formats = ['epub', 'mobi', 'azw3'] -class EmailMixin(object): # {{{ +class EmailMixin(object): # {{{ def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, do_auto_convert=True, specific_format=None): @@ -208,10 +208,10 @@ class EmailMixin(object): # {{{ if not components: components = [mi.title] subjects.append(os.path.join(*components)) - a = authors_to_string(mi.authors if mi.authors else \ + a = authors_to_string(mi.authors if mi.authors else [_('Unknown')]) - texts.append(_('Attached, you will find the e-book') + \ - '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \ + texts.append(_('Attached, you will find the e-book') + + '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) prefix = ascii_filename(t+' - '+a) @@ -231,7 +231,7 @@ class EmailMixin(object): # {{{ auto = [] if _auto_ids != []: for id in _auto_ids: - if specific_format == None: + if specific_format is None: dbfmts = self.library_view.model().db.formats(id, index_is_id=True) formats = [f.lower() for f in (dbfmts.split(',') if dbfmts else [])] @@ -302,8 +302,9 @@ class EmailMixin(object): # {{{ sent_mails = email_news(mi, remove, get_fmts, self.email_sent, self.job_manager) if sent_mails: - self.status_bar.show_message(_('Sent news to')+' '+\ + self.status_bar.show_message(_('Sent news to')+' '+ ', '.join(sent_mails), 3000) # }}} + From 13f31c7839ca98b1663c34a5bf7daef359b21f61 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 17:11:23 +0530 Subject: [PATCH 15/57] ... --- setup/file_hosting_servers.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/setup/file_hosting_servers.rst b/setup/file_hosting_servers.rst index 7121628744..563c7bc64a 100644 --- a/setup/file_hosting_servers.rst +++ b/setup/file_hosting_servers.rst @@ -6,6 +6,11 @@ Create the ssh authorized keys file. Edit /etc/ssh/sshd_config and change PermitRootLogin to without-password. Restart sshd. +hostname whatever +Edit /etc/hosts and put in FQDN in the appropriate places, for example:: + 27.0.1.1 download.calibre-ebook.com download + 46.28.49.116 download.calibre-ebook.com download + apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools chsh -s /bin/zsh @@ -15,9 +20,9 @@ scp .zshrc .vimrc server: scp -r ~/work/vim/zsh-syntax-highlighting server:work/vim If the server has a backup hard-disk, mount it at /mnt/backup and edit /etc/fstab so that it is auto-mounted. -Then, add the following to crontab -@daily /usr/bin/rsync -ha /srv /mnt/backup -@daily /usr/bin/rsync -ha /etc /mnt/backup +Then, add the following to crontab:: + @daily /usr/bin/rsync -ha /srv /mnt/backup + @daily /usr/bin/rsync -ha /etc /mnt/backup Nginx ------ From 836074e37d9b3780093b89548c035cdb29603349 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 17:15:52 +0530 Subject: [PATCH 16/57] ... --- setup/file_hosting_servers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/file_hosting_servers.rst b/setup/file_hosting_servers.rst index 563c7bc64a..8dd0afe098 100644 --- a/setup/file_hosting_servers.rst +++ b/setup/file_hosting_servers.rst @@ -11,7 +11,7 @@ Edit /etc/hosts and put in FQDN in the appropriate places, for example:: 27.0.1.1 download.calibre-ebook.com download 46.28.49.116 download.calibre-ebook.com download -apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools +apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools mosh chsh -s /bin/zsh mkdir -p /root/staging /root/work/vim /srv/download /srv/manual From 952b95d3ad566dca005863f9403e3a846b5e1e8e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Jun 2013 17:34:01 +0530 Subject: [PATCH 17/57] pep8 --- recipes/miradasalsur.recipe | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/recipes/miradasalsur.recipe b/recipes/miradasalsur.recipe index 4794503384..b931fcb1d7 100644 --- a/recipes/miradasalsur.recipe +++ b/recipes/miradasalsur.recipe @@ -4,9 +4,7 @@ sur.infonews.com ''' import datetime -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class MiradasAlSur(BasicNewsRecipe): title = 'Miradas al Sur' @@ -25,7 +23,7 @@ class MiradasAlSur(BasicNewsRecipe): extra_css = """ body{font-family: Arial,Helvetica,sans-serif} h1{font-family: Georgia,Times,serif} - .field-field-story-author{color: gray; font-size: small} + .field-field-story-author{color: gray; font-size: small} """ conversion_options = { 'comment' : description @@ -34,22 +32,22 @@ class MiradasAlSur(BasicNewsRecipe): , 'language' : language , 'series' : title } - + keep_only_tags = [dict(name='div', attrs={'id':['content-header', 'content-area']})] - remove_tags = [ - dict(name=['link','meta','iframe','embed','object']), + remove_tags = [ + dict(name=['link','meta','iframe','embed','object']), dict(name='form', attrs={'class':'fivestar-widget'}), dict(attrs={'class':lambda x: x and 'terms-inline' in x.split()}) ] feeds = [ - (u'Politica' , u'http://sur.infonews.com/taxonomy/term/1/0/feed' ), - (u'Internacional' , u'http://sur.infonews.com/taxonomy/term/2/0/feed' ), + (u'Politica' , u'http://sur.infonews.com/taxonomy/term/1/0/feed'), + (u'Internacional' , u'http://sur.infonews.com/taxonomy/term/2/0/feed'), (u'Informe Especial' , u'http://sur.infonews.com/taxonomy/term/14/0/feed'), - (u'Delitos y pesquisas', u'http://sur.infonews.com/taxonomy/term/6/0/feed' ), - (u'Lesa Humanidad' , u'http://sur.infonews.com/taxonomy/term/7/0/feed' ), - (u'Cultura' , u'http://sur.infonews.com/taxonomy/term/8/0/feed' ), - (u'Deportes' , u'http://sur.infonews.com/taxonomy/term/9/0/feed' ), + (u'Delitos y pesquisas', u'http://sur.infonews.com/taxonomy/term/6/0/feed'), + (u'Lesa Humanidad' , u'http://sur.infonews.com/taxonomy/term/7/0/feed'), + (u'Cultura' , u'http://sur.infonews.com/taxonomy/term/8/0/feed'), + (u'Deportes' , u'http://sur.infonews.com/taxonomy/term/9/0/feed'), (u'Contratapa' , u'http://sur.infonews.com/taxonomy/term/10/0/feed'), ] @@ -60,10 +58,10 @@ class MiradasAlSur(BasicNewsRecipe): cdate = datetime.date.today() todayweekday = cdate.isoweekday() if (todayweekday != 7): - cdate -= datetime.timedelta(days=todayweekday) - cover_page_url = cdate.strftime('http://sur.infonews.com/ediciones/%Y-%m-%d/tapa'); + cdate -= datetime.timedelta(days=todayweekday) + cover_page_url = cdate.strftime('http://sur.infonews.com/ediciones/%Y-%m-%d/tapa') soup = self.index_to_soup(cover_page_url) cover_item = soup.find('img', attrs={'class':lambda x: x and 'imagecache-tapa_edicion_full' in x.split()}) if cover_item: - cover_url = cover_item['src'] + cover_url = cover_item['src'] return cover_url From 986ab2a0787a538bc5fe0c512da97f982e331dbc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 Jun 2013 08:14:25 +0530 Subject: [PATCH 18/57] ebook-convert: Add option to read metadata from OPF --- src/calibre/ebooks/conversion/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index a0abebc5fe..f2795005d8 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -94,6 +94,8 @@ def option_recommendation_to_cli_option(add_option, rec): if opt.long_switch == 'verbose': attrs['action'] = 'count' attrs.pop('type', '') + if opt.name == 'read_metadata_from_opf': + switches.append('--from-opf') if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True: switches = ['--disable-'+opt.long_switch] add_option(Option(*switches, **attrs)) @@ -190,7 +192,7 @@ def add_pipeline_options(parser, plumber): ), 'METADATA' : (_('Options to set metadata in the output'), - plumber.metadata_option_names, + plumber.metadata_option_names + ['read_metadata_from_opf'], ), 'DEBUG': (_('Options to help with debugging the conversion'), [ From 837adb4eabda66083840b3fb15b165946b31376a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 Jun 2013 08:50:14 +0530 Subject: [PATCH 19/57] version 0.9.37 --- Changelog.yaml | 44 ++++++++++++++++++++++++++++++++++++++++ src/calibre/constants.py | 2 +- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/Changelog.yaml b/Changelog.yaml index f952b961e2..7439a02986 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -20,6 +20,50 @@ # new recipes: # - title: +- version: 0.9.37 + date: 2013-06-28 + + new features: + - title: "Conversion: Add option to embed all referenced fonts" + type: major + description: "Add an option to embed all fonts that are referenced in the input document but are not already embedded. This will search your system for the referenced font, and if found, the font will be embedded. Only works if the output format supports font embedding (for example: EPUB or AZW3). The option is under the Look & Feel section of the conversion dialog." + + - title: "ToC Editor: When generating a ToC from files, if the file has no text, do not skip it. Instead create an entry using the filename of the file." + + - title: "AZW3 Input: Add support for the page-progression-direction that is used to indicate page turns should happen from right to left. The attribute is passed into EPUB when converting." + tickets: [1194766] + + - title: "ebook-convert: Add a --from-opf option to read metadata from OPF files directly, instead of having to run ebook-meta --from-opf after conversion" + + bug fixes: + - title: "PDF Output: Fix Table of Contents being added to the end of the PDF even without the Add Table of Contents option being enabled." + tickets: [1194836] + + - title: "When auto-merging books on add, also merge identifiers." + + - title: "Fix an error when using the Template Editor to create a template that uses custom columns." + tickets: [1193763] + + - title: "LRF Output: Fix " entities in attribute values causing problems" + + - title: "News download: Apply the default page margin conversion settings. Also, when converting to PDF, apply the pdf conversion defaults." + tickets: [1193912] + + - title: "Fix a regression that broke scanning for books on all devices that used the Aluratek Color driver." + tickets: [1192940] + + - title: "fetch-ebbok-metadata: Fix --opf argument erroneously requiring a value" + + - title: "When waiting before sending email, log the wait." + tickets: [1195173] + + improved recipes: + - taz.de (RSS) + - Miradas al sur + - Frontline + - La Nacion (Costa Rica) + + - version: 0.9.36 date: 2013-06-21 diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 6834a4e66d..a4edca6bd5 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -4,7 +4,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = u'calibre' -numeric_version = (0, 9, 36) +numeric_version = (0, 9, 37) __version__ = u'.'.join(map(unicode, numeric_version)) __author__ = u"Kovid Goyal " From 6579327a6d99635411ee8a7dcf1d143f5fb5a789 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 Jun 2013 12:26:56 +0530 Subject: [PATCH 20/57] Various minor fixes in the publish process --- setup/file_hosting_servers.rst | 10 ++++++++++ setup/hosting.py | 8 ++++++-- setup/upload.py | 2 -- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/setup/file_hosting_servers.rst b/setup/file_hosting_servers.rst index 8dd0afe098..261241e24d 100644 --- a/setup/file_hosting_servers.rst +++ b/setup/file_hosting_servers.rst @@ -35,3 +35,13 @@ rsync /srv from another file server service nginx start +Services +--------- + +SSH into sourceforge and downloadbestsoftware so that their host keys are +stored. + + ssh -oStrictHostKeyChecking=no kovid@www.downloadbestsoft-mirror1.com + ssh -oStrictHostKeyChecking=no kovidgoyal,calibre@frs.sourceforge.net + ssh -oStrictHostKeyChecking=no files.calibre-ebook.com (and whatever other mirrors are present) + diff --git a/setup/hosting.py b/setup/hosting.py index 1e78f4694d..d97373cdbc 100644 --- a/setup/hosting.py +++ b/setup/hosting.py @@ -473,14 +473,18 @@ def upload_to_servers(files, version): # {{{ os.mkdir(dest) for src in files: shutil.copyfile(src, os.path.join(dest, os.path.basename(src))) - generate_index() + cwd = os.getcwd() + try: + generate_index() + finally: + os.chdir(cwd) for server, rdir in {'files':'/srv/download/'}.iteritems(): print('Uploading to server:', server) server = '%s.calibre-ebook.com' % server # Copy the generated index files print ('Copying generated index') - check_call(['rsync', '-hzr', '-e', 'ssh -x', '--include', '*.html', + check_call(['rsync', '-hza', '-e', 'ssh -x', '--include', '*.html', '--filter', '-! */', base, 'root@%s:%s' % (server, rdir)]) # Copy the release files rdir = '%s%s/' % (rdir, version) diff --git a/setup/upload.py b/setup/upload.py index dd59067c0c..0475773e01 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -255,8 +255,6 @@ class UploadToServer(Command): # {{{ description = 'Upload miscellaneous data to calibre server' def run(self, opts): - check_call('ssh divok rm -f %s/calibre-\*.tar.xz'%DOWNLOADS, shell=True) - # check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True) check_call('gpg --armor --detach-sign dist/calibre-*.tar.xz', shell=True) check_call('scp dist/calibre-*.tar.xz.asc divok:%s/signatures/'%DOWNLOADS, From 54a9a7c98e7bbf995201fc6f323ad45cb0ca20f2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 Jun 2013 12:44:44 +0530 Subject: [PATCH 21/57] pep8 and small perf improvement for human_readable() --- src/calibre/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 5e940efcd9..07ad906247 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -310,9 +310,9 @@ def get_parsed_proxy(typ='http', debug=True): proxy = proxies.get(typ, None) if proxy: pattern = re.compile(( - '(?:ptype://)?' \ - '(?:(?P\w+):(?P.*)@)?' \ - '(?P[\w\-\.]+)' \ + '(?:ptype://)?' + '(?:(?P\w+):(?P.*)@)?' + '(?P[\w\-\.]+)' '(?::(?P\d+))?').replace('ptype', typ) ) @@ -535,7 +535,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252', ent = match.group(1) if ent in exceptions: return '&'+ent+';' - if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software + if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software return check("'") if ent == 'hellips': ent = 'hellip' @@ -565,7 +565,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252', return '&'+ent+';' _ent_pat = re.compile(r'&(\S+?);') -xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = { +xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions={ '"' : '"', "'" : ''', '<' : '<', @@ -670,8 +670,8 @@ def human_readable(size, sep=' '): """ Convert a size in bytes into a human readable form """ divisor, suffix = 1, "B" for i, candidate in enumerate(('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')): - if size < 1024**(i+1): - divisor, suffix = 1024**(i), candidate + if size < (1 << ((i + 1) * 10)): + divisor, suffix = (1 << (i * 10)), candidate break size = str(float(size)/divisor) if size.find(".") > -1: From 4b4b89bc6ebf58602ab2dfca1a3c076298b1deed Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Jun 2013 00:15:38 +0530 Subject: [PATCH 22/57] Update iprofessional Fixes #1195826 [Updated recipe for iprofessional](https://bugs.launchpad.net/calibre/+bug/1195826) --- recipes/iprofesional.recipe | 66 +++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/recipes/iprofesional.recipe b/recipes/iprofesional.recipe index e8edbbc7a3..82d1b81674 100644 --- a/recipes/iprofesional.recipe +++ b/recipes/iprofesional.recipe @@ -1,5 +1,4 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' +__copyright__ = '2011-2013, Darko Miletic ' ''' www.iprofesional.com ''' @@ -19,13 +18,15 @@ class iProfesional(BasicNewsRecipe): use_embedded_content = False language = 'es_AR' remove_empty_feeds = True - publication_type = 'nesportal' - masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png' + publication_type = 'newsportal' + masthead_url = 'http://www.iprofesional.com/img/header/logoiprofesional.png' extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } + body{font-family: 'Droid Sans',Arial,sans-serif } img{margin-bottom: 0.4em; display:block} - .titulo-interior{font-family: Georgia,"Times New Roman",Times,serif} - .autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray} + .titulo{font-family: WhitneyBoldWhitneyBold,Arial,Helvetica,sans-serif; color: blue} + .fecha-archivo{font-weight: bold; color: rgb(205, 150, 24)} + .description{font-weight: bold; color: gray } + .firma{font-size: small} """ conversion_options = { @@ -35,27 +36,21 @@ class iProfesional(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})] - - remove_tags = [ - dict(name=['meta','link','base','embed','object','iframe']) - ,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']}) - ] - remove_attributes=['lang','xmlns:og','xmlns:fb'] - + keep_only_tags = [dict(attrs={'class':'desarrollo'})] + remove_tags = [dict(name=['meta','link','base','embed','object','iframe'])] feeds = [ (u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias') - ,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas' ) - ,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos' ) - ,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia' ) - ,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior' ) - ,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia' ) - ,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment' ) - ,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing' ) - ,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales' ) - ,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos' ) - ,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas' ) + ,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas') + ,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos') + ,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia') + ,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior') + ,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia') + ,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment') + ,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing') + ,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales') + ,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos') + ,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas') ] def preprocess_html(self, soup): @@ -64,16 +59,17 @@ class iProfesional(BasicNewsRecipe): for item in soup.findAll('a'): limg = item.find('img') if item.string is not None: - str = item.string - item.replaceWith(str) + str = item.string + item.replaceWith(str) else: - if limg: - item.name = 'div' - item.attrs = [] - else: - str = self.tag_to_string(item) - item.replaceWith(str) + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) for item in soup.findAll('img'): - if not item.has_key('alt'): - item['alt'] = 'image' + if 'alt' not in item: + item['alt'] = 'image' return soup + From 9e857d1ed7a44bc6e56d125faae626486a0d6830 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Jun 2013 09:39:36 +0530 Subject: [PATCH 23/57] DOCX Input: Support horizontal rules DOCX Input: Add support for horizontal rules created by typing three hyphens and pressing enter. --- src/calibre/ebooks/docx/cleanup.py | 13 +++++++++++++ src/calibre/ebooks/docx/images.py | 22 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index a55f8449d8..10bfd9a78f 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -8,6 +8,8 @@ __copyright__ = '2013, Kovid Goyal ' import os +from calibre.ebooks.docx.names import ancestor + def mergeable(previous, current): if previous.tail or current.tail: return False @@ -97,6 +99,16 @@ def before_count(root, tag, limit=10): return limit def cleanup_markup(log, root, styles, dest_dir, detect_cover): + # Move


    s outside paragraphs, if possible. + for hr in root.xpath('//span/hr'): + p = ancestor(hr, 'p') + descendants = tuple(p.iterdescendants()) + if descendants[-1] is hr: + parent = p.getparent() + idx = parent.index(p) + parent.insert(idx+1, hr) + hr.tail = '\n\t' + # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath('//span'): @@ -165,3 +177,4 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover): return path + diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 85e957a589..b0a5348d90 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal ' import os -from lxml.html.builder import IMG +from lxml.html.builder import IMG, HR from calibre.ebooks.docx.names import XPath, get, barename from calibre.utils.filenames import ascii_filename @@ -163,6 +163,26 @@ class Images(object): yield ans def pict_to_html(self, pict, page): + # First see if we have an
    + is_hr = len(pict) == 1 and get(pict[0], 'o:hr') in {'t', 'true'} + if is_hr: + style = {} + hr = HR() + try: + pct = float(get(pict[0], 'o:hrpct')) + except (ValueError, TypeError, AttributeError): + pass + else: + if pct > 0: + style['width'] = '%.3g%%' % pct + align = get(pict[0], 'o:hralign', 'center') + if align in {'left', 'right'}: + style['margin-left'] = '0' if align == 'left' else 'auto' + style['margin-right'] = 'auto' if align == 'left' else '0' + if style: + hr.set('style', '; '.join(('%s:%s' % (k, v) for k, v in style.iteritems()))) + yield hr + for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict): rid = get(imagedata, 'r:id') if rid in self.rid_map: From fa2f92e96cb17740e19630bd290c47cc3f902e47 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Jun 2013 10:42:18 +0530 Subject: [PATCH 24/57] ... --- src/calibre/ebooks/conversion/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index f2795005d8..45b7841347 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -27,7 +27,7 @@ specified as the first two arguments to the command. The output ebook format is guessed from the file extension of \ output_file. output_file can also be of the special format .EXT where \ EXT is the output file extension. In this case, the name of the output \ -file is derived the name of the input file. Note that the filenames must \ +file is derived from the name of the input file. Note that the filenames must \ not start with a hyphen. Finally, if output_file has no extension, then \ it is treated as a directory and an "open ebook" (OEB) consisting of HTML \ files is written to that directory. These files are the files that would \ From 373b3c7a222f11e59b40dee03327d8e0599f065c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Jun 2013 12:30:56 +0530 Subject: [PATCH 25/57] Fix #22 (Remove double article entries which are already part in other rss feeds of the same recipe) --- recipes/taz_rss.recipe | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/recipes/taz_rss.recipe b/recipes/taz_rss.recipe index 3ccbe2a4f1..0535b6ef3a 100644 --- a/recipes/taz_rss.recipe +++ b/recipes/taz_rss.recipe @@ -1,23 +1,43 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Alexander Schremmer ' +__license__ = 'GPL v3' +__copyright__ = '2013, Alexander Schremmer , Robert Riemann ' +import re from calibre.web.feeds.news import BasicNewsRecipe class TazRSSRecipe(BasicNewsRecipe): - title = u'Taz.de (die tageszeitung) RSS Feed - German' - __author__ = 'Alexander Schremmer' + title = u'Taz - die Tageszeitung' + description = u'Taz.de - die tageszeitung' + __author__ = 'Alexander Schremmer, Robert Riemann' language = 'de' lang = 'de-DE' oldest_article = 7 max_articles_per_feed = 100 publisher = 'taz Entwicklungs GmbH & Co. Medien KG' + # masthead_url = u'http://galeria-autonomica.de/wp-content/uploads/a_taz-logo.gif' + masthead_url = u'http://upload.wikimedia.org/wikipedia/de/thumb/1/15/Die-Tageszeitung-Logo.svg/500px-Die-Tageszeitung-Logo.svg.png' conversion_options = {'publisher': publisher, 'language': lang, } - - feeds = [(u'TAZ main feed', u'http://www.taz.de/rss.xml')] + feeds = [ + (u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'), + (u'Politik', u'http://www.taz.de/Politik/!p2;rss/'), + (u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'), + (u'Netz', u'http://www.taz.de/Netz/!p5;rss/'), + (u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'), + (u'Leben', u'http://www.taz.de/Leben/!p10;rss/'), + (u'Sport', u'http://www.taz.de/Sport/!p12;rss/'), + (u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'), + (u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'), + (u'Nord', u'http://www.taz.de/Nord/!p11;rss/') + ] keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})] remove_tags = [ - dict(name=['div'], attrs={'class': 'artikelwerbung'}), - dict(name=['ul'], attrs={'class': 'toolbar'}),] + dict(name=['div'], attrs={'class': 'artikelwerbung'}), + dict(name=['ul'], attrs={'class': 'toolbar'}), + # remove: taz paywall + dict(name=['div'], attrs={'id': 'tzi_paywall'}), + # remove: Artikel zum Thema (not working on Kindle) + dict(name=['div'], attrs={'class': re.compile(r".*\bsect_seealso\b.*")}), + dict(name=['div'], attrs={'class': 'sectfoot'}) + ] From e8ada93ddad20094425cfd037a96f675a4f9e70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 30 Jun 2013 00:58:24 +0200 Subject: [PATCH 26/57] deduplicate code snippet --- recipes/gosc_niedzielny.recipe | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe index ba2280c2a5..7ff2d48f84 100644 --- a/recipes/gosc_niedzielny.recipe +++ b/recipes/gosc_niedzielny.recipe @@ -47,13 +47,7 @@ class GN(BasicNewsRecipe): return feeds def find_articles(self, main_block): - for a in main_block.findAll('div', attrs={'class':'prev_doc2'}): - art = a.find('a') - yield { - 'title' : self.tag_to_string(art), - 'url' : 'http://www.gosc.pl' + art['href'] - } - for a in main_block.findAll('div', attrs={'class':'sr-document'}): + for a in main_block.findAll('div', attrs={'class':['prev_doc2', 'sr-document']}): art = a.find('a') yield { 'title' : self.tag_to_string(art), From 3db0f1895a2547d75d3015fcea54f1647b5f2afd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Jun 2013 10:45:57 +0530 Subject: [PATCH 27/57] ... --- src/calibre/ebooks/oeb/transforms/embed_fonts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/embed_fonts.py b/src/calibre/ebooks/oeb/transforms/embed_fonts.py index 027b8af1de..879e05da8f 100644 --- a/src/calibre/ebooks/oeb/transforms/embed_fonts.py +++ b/src/calibre/ebooks/oeb/transforms/embed_fonts.py @@ -197,7 +197,7 @@ class EmbedFonts(object): if not ff: return ff = ff[0] - if ff in self.warned: + if ff in self.warned or ff == 'inherit': return try: fonts = font_scanner.fonts_for_family(ff) From 0223f60c9d28fc4433e63233e91230b8ddb6f708 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Jun 2013 20:03:57 +0530 Subject: [PATCH 28/57] ... --- src/calibre/ebooks/conversion/plumber.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index a96574e904..14b5482a04 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -134,8 +134,7 @@ OptionRecommendation(name='output_profile', help=_('Specify the output profile. The output profile ' 'tells the conversion system how to optimize the ' 'created document for the specified device. In some cases, ' - 'an output profile is required to produce documents that ' - 'will work on a device. For example EPUB on the SONY reader. ' + 'an output profile can be used to optimize the output for a particular device, but this is rarely necessary. ' 'Choices are:') + ', '.join([x.short_name for x in output_profiles()]) ), From 47626ee0cccb1ae7199b254ed01b7cea1f26f5b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 09:03:55 +0530 Subject: [PATCH 29/57] ... --- setup/file_hosting_servers.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup/file_hosting_servers.rst b/setup/file_hosting_servers.rst index 261241e24d..c72998958e 100644 --- a/setup/file_hosting_servers.rst +++ b/setup/file_hosting_servers.rst @@ -11,6 +11,10 @@ Edit /etc/hosts and put in FQDN in the appropriate places, for example:: 27.0.1.1 download.calibre-ebook.com download 46.28.49.116 download.calibre-ebook.com download +dpkg-reconfigure tzdata +set timezone to Asia/Kolkata +service cron restart + apt-get install vim nginx zsh python-lxml python-mechanize iotop htop smartmontools mosh chsh -s /bin/zsh From 10c4c8f69f7b2904350e7f474ef14ce77da8b172 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 12:05:28 +0530 Subject: [PATCH 30/57] ... --- src/calibre/gui2/tag_browser/ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/tag_browser/ui.py b/src/calibre/gui2/tag_browser/ui.py index 79d4a85f84..8760ccd23b 100644 --- a/src/calibre/gui2/tag_browser/ui.py +++ b/src/calibre/gui2/tag_browser/ui.py @@ -334,7 +334,7 @@ class TagBrowserWidget(QWidget): # {{{ search_layout = QHBoxLayout() self._layout.addLayout(search_layout) self.item_search = HistoryLineEdit(parent) - self.item_search.setMinimumContentsLength(10) + self.item_search.setMinimumContentsLength(5) self.item_search.setSizeAdjustPolicy(self.item_search.AdjustToMinimumContentsLengthWithIcon) try: self.item_search.lineEdit().setPlaceholderText( From 59346348c5ab9db1510e6d022d36b8a19078aa44 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 13:46:43 +0530 Subject: [PATCH 31/57] ... --- src/calibre/library/catalogs/epub_mobi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py index 4984fbf9e2..673d764593 100644 --- a/src/calibre/library/catalogs/epub_mobi.py +++ b/src/calibre/library/catalogs/epub_mobi.py @@ -149,7 +149,7 @@ class EPUB_MOBI(CatalogPlugin): default=None, dest='output_profile', action=None, - help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" + help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" "Default: '%default'\n" "Applies to: AZW3, ePub, MOBI output formats")), Option('--prefix-rules', From 144f66c13b6e020dee1a6b225a728967d7dc21a1 Mon Sep 17 00:00:00 2001 From: GRiker Date: Mon, 1 Jul 2013 03:04:32 -0600 Subject: [PATCH 32/57] Added mkdir() to libiMobileDevice --- .../devices/idevice/libimobiledevice.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/calibre/devices/idevice/libimobiledevice.py b/src/calibre/devices/idevice/libimobiledevice.py index ca6ed57a77..ba0f4d0f38 100644 --- a/src/calibre/devices/idevice/libimobiledevice.py +++ b/src/calibre/devices/idevice/libimobiledevice.py @@ -418,6 +418,14 @@ class libiMobileDevice(): if False: self._idevice_set_debug_level(DEBUG) + def mkdir(self, path): + ''' + Mimic mkdir(), creating a directory at path. Does not create + intermediate folders + ''' + self._log_location("'%s'" % path) + return self._afc_make_directory(path) + def mount_ios_app(self, app_name=None, app_id=None): ''' Convenience method to get iDevice ready to talk to app_name or app_id @@ -1007,6 +1015,27 @@ class libiMobileDevice(): self.log(" %s: %s" % (key, file_stats[key])) return file_stats + def _afc_make_directory(self, path): + ''' + Creates a directory on the device. Does not create intermediate dirs. + + Args: + client: (AFC_CLIENT_T) The client to use to make a directory + dir: (const char *) The directory's fully-qualified path + + Result: + error: AFC_E_SUCCESS on success or an AFC_E_* error value + ''' + self._log_location("%s" % repr(path)) + + error = self.lib.afc_make_directory(byref(self.afc), + str(path)) & 0xFFFF + if error: + if self.verbose: + self.log(" ERROR: %s" % self._afc_error(error)) + + return error + def _afc_read_directory(self, directory=''): ''' Gets a directory listing of the directory requested From 9952abad4aa8a973a5ed06a3a9575ecda1a446e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 15:08:54 +0530 Subject: [PATCH 33/57] Polish: Add option to embed referenced fonts Book polishing: Add option to embed all referenced fonts when polishing books using the "Polish Books" tool. Fixes #1196038 [[enhancement] embed font without conversion](https://bugs.launchpad.net/calibre/+bug/1196038) --- resources/compiled_coffeescript.zip | Bin 71177 -> 71881 bytes src/calibre/ebooks/oeb/polish/embed.py | 158 ++++++++++++++++++ .../ebooks/oeb/polish/font_stats.coffee | 12 ++ src/calibre/ebooks/oeb/polish/main.py | 18 +- src/calibre/ebooks/oeb/polish/stats.py | 70 ++++++-- src/calibre/gui2/actions/polish.py | 2 + 6 files changed, 245 insertions(+), 15 deletions(-) create mode 100644 src/calibre/ebooks/oeb/polish/embed.py diff --git a/resources/compiled_coffeescript.zip b/resources/compiled_coffeescript.zip index cb48a58bd2714e22fdeb5e827bef753f51998001..e092b53157f3c30f95712db0b597358b744dc08a 100644 GIT binary patch delta 454 zcmeBN!*X&Xi$s7oGm8iV2+S(D=k%>F=<5+a28JVi+qKvkr*co#KP6R1p{O(mN*3!V6qi)yq)u<(W0dBy=28HI#JtH91>`25 zwG*3se=7H6PAi+~feRRi?{vGRjS#z{MzK1~FPeAu~-u1L7P_g=(mUFuBCM;%JCmEYO;gqSDmK z{)eR||49^{-0+=!^80f3=@SGPg{H3)Vq891w}q#XZz~@o8@n_klL#}mFkxVjpI#u$ zC^Oxfol#_ZurQ+*6C?k02T?}R=~ILmO_^@1KsXPC87-JL=z%$tAINh|Hx~gaHRyy0 zBvf%tZvqMEOoj-=v~Wy64HCG$VET6vMt3HK6(9*$W&=*f>5ig|B9jw-aftxjQrDU#ThxKFBD@mWx6Fgy+E8%boyH{Mhm9(@(_-VIHM7heiex0 z$)w!^5xf8ryftz1_XV=k6(tz8nB-?dB$6Z;4Vd(QK{zWV7 + ans = {} + for node in document.getElementsByTagName('*') + rules = document.defaultView.getMatchedCSSRules(node, '') + if rules + for rule in rules + style = rule.style + family = style.getPropertyValue('font-family') + if family + ans[family] = true + py_bridge.value = ans + if window? window.font_stats = new FontStats() diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 08b5004c91..ff46288643 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -14,6 +14,7 @@ from functools import partial from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.subset import subset_all_fonts +from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.replace import smarten_punctuation from calibre.ebooks.oeb.polish.jacket import ( @@ -21,6 +22,7 @@ from calibre.ebooks.oeb.polish.jacket import ( from calibre.utils.logging import Log ALL_OPTS = { + 'embed': False, 'subset': False, 'opf': None, 'cover': None, @@ -47,6 +49,12 @@ changes needed for the desired effect.

    Note that polishing only works on files in the %s formats.

    \ ''')%_(' or ').join('%s'%x for x in SUPPORTED), +'embed': _('''\ +

    Embed all fonts that are referenced in the document and are not already embedded. +This will scan your computer for the fonts, and if they are found, they will be +embedded into the document.

    +'''), + 'subset': _('''\

    Subsetting fonts means reducing an embedded font to contain only the characters used from that font in the book. This @@ -118,8 +126,8 @@ def polish(file_map, opts, log, report): ebook = get_container(inbook, log) jacket = None - if opts.subset: - stats = StatsCollector(ebook) + if opts.subset or opts.embed: + stats = StatsCollector(ebook, do_embed=opts.embed) if opts.opf: rt(_('Updating metadata')) @@ -159,6 +167,11 @@ def polish(file_map, opts, log, report): smarten_punctuation(ebook, report) report('') + if opts.embed: + rt(_('Embedding referenced fonts')) + embed_all_fonts(ebook, stats, report) + report('') + if opts.subset: rt(_('Subsetting embedded fonts')) subset_all_fonts(ebook, stats.font_stats, report) @@ -197,6 +210,7 @@ def option_parser(): parser = OptionParser(usage=USAGE) a = parser.add_option o = partial(a, default=False, action='store_true') + o('--embed-fonts', '-e', dest='embed', help=CLI_HELP['embed']) o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset']) a('--cover', '-c', help=_( 'Path to a cover image. Changes the cover specified in the ebook. ' diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index d4a5c96111..77b99ff9b6 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -7,10 +7,11 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import json, sys, os +import json, sys, os, logging from urllib import unquote +from collections import defaultdict -from cssutils import parseStyle +from cssutils import CSSParser from PyQt4.Qt import (pyqtProperty, QString, QEventLoop, Qt, QSize, QTimer, pyqtSlot) from PyQt4.QtWebKit import QWebPage, QWebView @@ -41,14 +42,14 @@ def normalize_font_properties(font): 'extra-expanded', 'ultra-expanded'}: val = 'normal' font['font-stretch'] = val + return font -widths = {x:i for i, x in enumerate(( 'ultra-condensed', +widths = {x:i for i, x in enumerate(('ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed', 'normal', 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded' ))} def get_matching_rules(rules, font): - normalize_font_properties(font) matches = [] # Filter on family @@ -100,7 +101,7 @@ def get_matching_rules(rules, font): return m return [] -class Page(QWebPage): # {{{ +class Page(QWebPage): # {{{ def __init__(self, log): self.log = log @@ -157,10 +158,12 @@ class Page(QWebPage): # {{{ class StatsCollector(object): - def __init__(self, container): + def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log + self.do_embed = do_embed must_use_qt() + self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() @@ -173,6 +176,10 @@ class StatsCollector(object): self.render_queue = list(container.spine_items) self.font_stats = {} + self.font_usage_map = {} + self.font_spec_map = {} + self.font_rule_map = {} + self.all_font_rules = {} QTimer.singleShot(0, self.render_book) @@ -235,27 +242,35 @@ class StatsCollector(object): rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) - if not ff: continue - style = parseStyle('font-family:%s'%ff, validate=False) + if not ff: + continue + style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) - if not src: continue - style = parseStyle('background-image:%s'%src, validate=False) + if not src: + continue + style = self.parser.parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') + if name is None: + continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) - if not rules: + if not rules and not self.do_embed: return + self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules + for rule in rules: + self.all_font_rules[rule['src']] = rule + for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() @@ -265,19 +280,48 @@ class StatsCollector(object): if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) + bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) - if not text: continue + if not text: + continue + normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text + if self.do_embed: + ff = [icu_lower(x) for x in font.get('font-family', [])] + if ff and ff[0] not in bad_fonts: + keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} + key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) + val = fu[key] + if not val: + val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) + val['text'] = set() + val['text'] |= text + self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) + + if self.do_embed: + self.page.evaljs('window.font_stats.get_font_families()') + font_families = self.page.bridge_value + if not isinstance(font_families, dict): + raise Exception('Unknown error occurred while reading font families') + self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() + for raw in font_families.iterkeys(): + style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') + for x in style.propertyValue: + x = x.value + if x and x.lower() not in bad_fonts: + fs.add(x) if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) - print (StatsCollector(ebook).font_stats) + print (StatsCollector(ebook, do_embed=True).font_stats) + diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index eb21fb2626..0f21807afb 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -45,6 +45,7 @@ class Polish(QDialog): # {{{ ORIGINAL_* format before running it.

    ''') ), + 'embed':_('

    Embed referenced fonts

    %s')%HELP['embed'], 'subset':_('

    Subsetting fonts

    %s')%HELP['subset'], 'smarten_punctuation': @@ -75,6 +76,7 @@ class Polish(QDialog): # {{{ count = 0 self.all_actions = OrderedDict([ + ('embed', _('&Embed all referenced fonts')), ('subset', _('&Subset all embedded fonts')), ('smarten_punctuation', _('Smarten &punctuation')), ('metadata', _('Update &metadata in the book files')), From c3cc6a2278facf35cfbc260e8b1e9fd978008913 Mon Sep 17 00:00:00 2001 From: GRiker Date: Mon, 1 Jul 2013 03:46:32 -0600 Subject: [PATCH 34/57] Revert "Fixed typo(?) in set_metadata() for touched files." This reverts commit 8f30c17486701bf2cc29a84c98af5336de75fc56. --- src/calibre/utils/podofo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index a0b5d85331..13c12a9bb3 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -36,7 +36,7 @@ def set_metadata(stream, mi): except WorkerError as e: raise Exception('Failed to set PDF metadata: %s'%e.orig_tb) if touched: - with open(os.path.join(tdir, u'input.pdf'), 'rb') as f: + with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) From 6af87c05e31d886101feb065cd373021472ba888 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Jul 2013 15:33:20 +0530 Subject: [PATCH 35/57] Driver for PocketBook Surfpad 2 Fixes #1182850 [Private bug](https://bugs.launchpad.net/calibre/+bug/1182850) --- src/calibre/devices/android/driver.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index a39c190d05..31b60389ad 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -96,7 +96,7 @@ class ANDROID(USBMS): # Google 0x18d1 : { - 0x0001 : [0x0223, 0x230, 0x9999], + 0x0001 : [0x0222, 0x0223, 0x230, 0x9999], 0x0002 : [0x9999], 0x0003 : [0x0230, 0x9999], 0x4e11 : [0x0100, 0x226, 0x227], @@ -219,7 +219,7 @@ class ANDROID(USBMS): 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD', 'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0', 'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12', - 'MEDIATEK', 'KEENHI', 'TECLAST', 'SURFTAB', 'XENTA',] + 'MEDIATEK', 'KEENHI', 'TECLAST', 'SURFTAB', 'XENTA', 'OBREEY_S'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'A953', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -241,7 +241,7 @@ class ANDROID(USBMS): 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS', 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB', - 'PROD_TAB13-201', + 'PROD_TAB13-201', 'URFPAD2', ] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', @@ -254,7 +254,7 @@ class ANDROID(USBMS): 'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E', 'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894', - '_USB', 'PROD_TAB13-201', + '_USB', 'PROD_TAB13-201', 'URFPAD2' ] OSX_MAIN_MEM = 'Android Device Main Memory' From e83738653ca3688569f37ef1b8f5bf3fe70060ad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 09:14:21 +0530 Subject: [PATCH 36/57] Fix #25 (bugfix: pass book_id to FileType plugins on postimport instead of __builtins__.id()) --- src/calibre/library/database2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 98e0190ecd..435d8edeeb 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1500,8 +1500,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() stream = lopen(npath, 'rb') format = check_ebook_format(stream, format) - retval = self.add_format(index, format, stream, replace=replace, - index_is_id=index_is_id, path=path, notify=notify) + id = index if index_is_id else self.id(index) + retval = self.add_format(id, format, stream, replace=replace, + index_is_id=True, path=path, notify=notify) run_plugins_on_postimport(self, id, format) return retval From acbd785af90eaac58432bceae8e9fb9171b8d5c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 09:24:09 +0530 Subject: [PATCH 37/57] Fix docstring of is_image_collection --- src/calibre/customize/conversion.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index fe0d563206..38ffcef71f 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -77,7 +77,7 @@ class OptionRecommendation(object): self.option.choices: raise ValueError('OpRec: %s: Recommended value not in choices'% self.option.name) - if not (isinstance(self.recommended_value, (int, float, str, unicode))\ + if not (isinstance(self.recommended_value, (int, float, str, unicode)) or self.recommended_value is None): raise ValueError('OpRec: %s:'%self.option.name + repr(self.recommended_value) + @@ -139,8 +139,10 @@ class InputFormatPlugin(Plugin): file_types = set([]) #: If True, this input plugin generates a collection of images, - #: one per HTML file. You can obtain access to the images via - #: convenience method, :meth:`get_image_collection`. + #: one per HTML file. This can be set dynamically, in the convert method + #: if the input files can be both image collections and non-image collections. + #: If you set this to True, you must implement the get_images() method that returns + #: a list of images. is_image_collection = False #: Number of CPU cores used by this plugin @@ -238,7 +240,6 @@ class InputFormatPlugin(Plugin): ret = self.convert(stream, options, file_ext, log, accelerators) - return ret def postprocess_book(self, oeb, opts, log): @@ -313,7 +314,6 @@ class OutputFormatPlugin(Plugin): Plugin.__init__(self, *args) self.report_progress = DummyReporter() - def convert(self, oeb_book, output, input_plugin, opts, log): ''' Render the contents of `oeb_book` (which is an instance of @@ -363,3 +363,4 @@ class OutputFormatPlugin(Plugin): + From 3c81080eadc76b5fd40a5320ee207e65f96017fb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 11:17:08 +0530 Subject: [PATCH 38/57] Democracy Now by Antoine Beaupre --- recipes/democracy_now.recipe | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 recipes/democracy_now.recipe diff --git a/recipes/democracy_now.recipe b/recipes/democracy_now.recipe new file mode 100644 index 0000000000..f7868c19dd --- /dev/null +++ b/recipes/democracy_now.recipe @@ -0,0 +1,45 @@ +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe + +class DemocracyNowRecipe(BasicNewsRecipe): + title = u'Democracy now!' + __author__ = u'Antoine Beaupré' + description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.' # noqa + language = 'en' + cover_url = 'http://www.democracynow.org/images/dn-logo-for-podcast.png' + + oldest_article = 1 + max_articles_per_feed = 10 + publication_type = 'magazine' + + auto_cleanup = False + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + + feeds = [ + (u'Daily news', u'http://www.democracynow.org/democracynow.rss')] + + keep_only_tags = [dict(name='div', attrs={'id': 'page'}), ] + remove_tags = [dict(name='div', attrs={'id': 'topics_list'}), + dict(name='div', attrs={'id': 'header'}), + dict(name='div', attrs={'id': 'footer'}), + dict(name='div', attrs={'id': 'right'}), + dict(name='div', attrs={'id': 'left-panel'}), + dict(name='div', attrs={'id': 'top-video-content'}), + dict(name='div', attrs={'id': 'google-news-date'}), + dict(name='div', attrs={'id': 'story-donate'}), + dict( + name='div', attrs={'id': 'transcript-expand-collapse'}), + dict(name='span', attrs={'class': 'show-links'}), + dict(name='span', attrs={'class': 'storyNav'}), + dict(name='div', attrs={'class': 'headline_share'}), + dict(name='div', attrs={'class': 'mediaBar'}), + dict(name='div', attrs={'class': 'shareAndPrinterBar'}), + dict(name='div', attrs={'class': 'utility-navigation'}), + dict(name='div', attrs={'class': 'bottomContentNav'}), + dict(name='div', attrs={'class': 'recentShows'}), + dict( + name='div', attrs={'class': 'printer-and-transcript-links'}), + ] + From 19016a109d651c93a50beee6a2743de5f24fe737 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 11:22:53 +0530 Subject: [PATCH 39/57] Add warning about font licensing to embed options --- src/calibre/ebooks/conversion/plumber.py | 3 ++- src/calibre/ebooks/oeb/polish/main.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 14b5482a04..5778bbbabc 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -211,7 +211,8 @@ OptionRecommendation(name='embed_all_fonts', 'but not already embedded. This will search your system for the ' 'fonts, and if found, they will be embedded. Embedding will only work ' 'if the format you are converting to supports embedded fonts, such as ' - 'EPUB, AZW3 or PDF.' + 'EPUB, AZW3 or PDF. Please ensure that you have the proper license for embedding ' + 'the fonts used in this book.' )), OptionRecommendation(name='subset_embedded_fonts', diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index ff46288643..69d03098c7 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -53,6 +53,7 @@ changes needed for the desired effect.

    Embed all fonts that are referenced in the document and are not already embedded. This will scan your computer for the fonts, and if they are found, they will be embedded into the document.

    +

    Please ensure that you have the proper license for embedding the fonts used in this book.

    '''), 'subset': _('''\ From 9394f87c14df4f866d50f32350863cafb145a96b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 11:23:59 +0530 Subject: [PATCH 40/57] ... --- src/calibre/ebooks/conversion/plumber.py | 2 +- src/calibre/ebooks/oeb/polish/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 5778bbbabc..d4bdeb4562 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -212,7 +212,7 @@ OptionRecommendation(name='embed_all_fonts', 'fonts, and if found, they will be embedded. Embedding will only work ' 'if the format you are converting to supports embedded fonts, such as ' 'EPUB, AZW3 or PDF. Please ensure that you have the proper license for embedding ' - 'the fonts used in this book.' + 'the fonts used in this document.' )), OptionRecommendation(name='subset_embedded_fonts', diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index 69d03098c7..c5a7d4db6d 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -53,7 +53,7 @@ changes needed for the desired effect.

    Embed all fonts that are referenced in the document and are not already embedded. This will scan your computer for the fonts, and if they are found, they will be embedded into the document.

    -

    Please ensure that you have the proper license for embedding the fonts used in this book.

    +

    Please ensure that you have the proper license for embedding the fonts used in this document.

    '''), 'subset': _('''\ From f5db5d9c00fcbc47358d992b7e9d8b62ca7fbf76 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 2 Jul 2013 18:37:21 +0530 Subject: [PATCH 41/57] Driver for Trekstor Pyrus Maxi Fixes #1196931 [Device not recogized](https://bugs.launchpad.net/calibre/+bug/1196931) --- src/calibre/devices/misc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index b20ec3ca6e..e35db8f03d 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -227,16 +227,17 @@ class TREKSTOR(USBMS): VENDOR_ID = [0x1e68] PRODUCT_ID = [0x0041, 0x0042, 0x0052, 0x004e, 0x0056, 0x0067, # This is for the Pyrus Mini + 0x006f, # This is for the Pyrus Maxi 0x003e, # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091 0x5cL, # This is for the 4ink http://www.mobileread.com/forums/showthread.php?t=191318 ] - BCD = [0x0002, 0x100] + BCD = [0x0002, 0x100, 0x0222] EBOOK_DIR_MAIN = 'Ebooks' VENDOR_NAME = 'TREKSTOR' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7', - 'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS', 'PYRUS_MINI'] + 'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS', 'PYRUS_MINI', 'PYRUS_MAXI'] SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS_DEFAULT = False From e8839bc8dc5bbf96ead2c12bc303c99ed9447c88 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 08:12:33 +0530 Subject: [PATCH 42/57] Prefix version tags with v --- setup/publish.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup/publish.py b/setup/publish.py index ac56942273..40d002ee8a 100644 --- a/setup/publish.py +++ b/setup/publish.py @@ -113,6 +113,6 @@ class TagRelease(Command): def run(self, opts): self.info('Tagging release') - subprocess.check_call('git tag -a {0} -m "version-{0}"'.format(__version__).split()) - subprocess.check_call('git push origin {0}'.format(__version__).split()) + subprocess.check_call('git tag -a v{0} -m "version-{0}"'.format(__version__).split()) + subprocess.check_call('git push origin v{0}'.format(__version__).split()) From 3b4094a890120df33b78d073bfa61b41261d30ff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 10:58:48 +0530 Subject: [PATCH 43/57] DOCX: Handle hyperlinks created as fields See https://bugs.launchpad.net/calibre/+bug/1196728 for an example. --- src/calibre/ebooks/docx/fields.py | 106 +++++++++++++++++++++++++++++ src/calibre/ebooks/docx/to_html.py | 22 ++++++ 2 files changed, 128 insertions(+) create mode 100644 src/calibre/ebooks/docx/fields.py diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py new file mode 100644 index 0000000000..9b0d053cd0 --- /dev/null +++ b/src/calibre/ebooks/docx/fields.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import re + +from calibre.ebooks.docx.names import XPath, get + +class Field(object): + + def __init__(self, start): + self.start = start + self.end = None + self.contents = [] + self.instructions = [] + + def add_instr(self, elem): + raw = elem.text + if not raw: + return + name, rest = raw.strip().partition(' ')[0::2] + self.instructions.append((name, rest.strip())) + +WORD, FLAG = 0, 1 +scanner = re.Scanner([ + (r'\\\S{1}', lambda s, t: (t, FLAG)), # A flag of the form \x + (r'"[^"]*"', lambda s, t: (t[1:-1], WORD)), # Quoted word + (r'[^\s\\"]\S*', lambda s, t: (t, WORD)), # A non-quoted word, must not start with a backslash or a space or a quote + (r'\s+', None), +], flags=re.DOTALL) + + +def parse_hyperlink(raw, log): + ans = {} + last_option = None + for token, token_type in scanner.scan(raw)[0]: + if not ans: + if token_type is not WORD: + log('Invalid hyperlink, first token is not a URL (%s)' % raw) + return ans + ans['url'] = token + if token_type is FLAG: + last_option = {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}.get(token[1], None) + if last_option is not None: + ans[last_option] = None + elif token_type is WORD: + if last_option is not None: + ans[last_option] = token + return ans + + +class Fields(object): + + def __init__(self): + self.fields = [] + + def __call__(self, doc, log): + stack = [] + for elem in XPath( + '//*[name()="w:p" or name()="w:r" or name()="w:instrText" or (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end"))]')(doc): + if elem.tag.endswith('}fldChar'): + typ = get(elem, 'w:fldCharType') + if typ == 'begin': + stack.append(Field(elem)) + self.fields.append(stack[-1]) + else: + try: + stack.pop().end = elem + except IndexError: + pass + elif elem.tag.endswith('}instrText'): + if stack: + stack[-1].add_instr(elem) + else: + if stack: + stack[-1].contents.append(elem) + + # Parse hyperlink fields + self.hyperlink_fields = [] + for field in self.fields: + if len(field.instructions) == 1 and field.instructions[0][0] == 'HYPERLINK': + hl = parse_hyperlink(field.instructions[0][1], log) + if hl: + if 'target' in hl and hl['target'] is None: + hl['target'] = '_blank' + all_runs = [] + current_runs = [] + # We only handle spans in a single paragraph + # being wrapped in + for x in field.contents: + if x.tag.endswith('}p'): + if current_runs: + all_runs.append(current_runs) + current_runs = [] + elif x.tag.endswith('}r'): + current_runs.append(x) + if current_runs: + all_runs.append(current_runs) + for runs in all_runs: + self.hyperlink_fields.append((hl, runs)) + + diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 79020d9c0a..647b021205 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -26,6 +26,7 @@ from calibre.ebooks.docx.footnotes import Footnotes from calibre.ebooks.docx.cleanup import cleanup_markup from calibre.ebooks.docx.theme import Theme from calibre.ebooks.docx.toc import create_toc +from calibre.ebooks.docx.fields import Fields from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 @@ -52,6 +53,7 @@ class Convert(object): self.body = BODY() self.theme = Theme() self.tables = Tables() + self.fields = Fields() self.styles = Styles(self.tables) self.images = Images() self.object_map = OrderedDict() @@ -79,6 +81,7 @@ class Convert(object): def __call__(self): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships + self.fields(doc, self.log) self.read_styles(relationships_by_type) self.images(relationships_by_id) self.layers = OrderedDict() @@ -396,6 +399,25 @@ class Convert(object): # hrefs that point nowhere give epubcheck a hernia. The element # should be styled explicitly by Word anyway. # span.set('href', '#') + rmap = {v:k for k, v in self.object_map.iteritems()} + for hyperlink, runs in self.fields.hyperlink_fields: + spans = [rmap[r] for r in runs if r in rmap] + if not spans: + continue + if len(spans) > 1: + span = self.wrap_elems(spans, SPAN()) + span.tag = 'a' + tgt = hyperlink.get('target', None) + if tgt: + span.set('target', tgt) + tt = hyperlink.get('title', None) + if tt: + span.set('title', tt) + url = hyperlink['url'] + if url in self.anchor_map: + span.set('href', '#' + self.anchor_map[url]) + continue + span.set('href', url) def convert_run(self, run): ans = SPAN() From 80f3e7f8674a9069ca6894e033d9afe279c44e96 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 11:23:26 +0530 Subject: [PATCH 44/57] DOCX: Insert page breaks at the start of every new section See https://bugs.launchpad.net/calibre/+bug/1196728 for an example --- src/calibre/ebooks/docx/styles.py | 5 +++++ src/calibre/ebooks/docx/to_html.py | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 21f45616fa..4572eb59f2 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -403,6 +403,11 @@ class Styles(object): ps.margin_top = 0 last_para = p + def apply_section_page_breaks(self, paras): + for p in paras: + ps = self.resolve_paragraph(p) + ps.pageBreakBefore = True + def register(self, css, prefix): h = hash(frozenset(css.iteritems())) ans, _ = self.classes.get(h, (None, None)) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 647b021205..1fdd24267a 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -100,6 +100,9 @@ class Convert(object): self.body.append(p) paras.append(wp) self.styles.apply_contextual_spacing(paras) + # Apply page breaks at the start of every section, except the first + # section (since that will be the start of the file) + self.styles.apply_section_page_breaks(self.section_starts[1:]) notes_header = None if self.footnotes.has_notes: @@ -180,6 +183,7 @@ class Convert(object): def read_page_properties(self, doc): current = [] self.page_map = OrderedDict() + self.section_starts = [] for p in descendants(doc, 'w:p', 'w:tbl'): if p.tag.endswith('}tbl'): @@ -189,8 +193,10 @@ class Convert(object): sect = tuple(descendants(p, 'w:sectPr')) if sect: pr = PageProperties(sect) - for x in current + [p]: + paras = current + [p] + for x in paras: self.page_map[x] = pr + self.section_starts.append(paras[0]) current = [] else: current.append(p) From 584beceee347f2a18c70728bdd4830381fabe85c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 11:24:12 +0530 Subject: [PATCH 45/57] DOCX: handle bookmarks defined at the paragraph level See https://bugs.launchpad.net/calibre/+bug/1196728 for an example. --- src/calibre/ebooks/docx/to_html.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 1fdd24267a..fae521d807 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -99,6 +99,7 @@ class Convert(object): p = self.convert_p(wp) self.body.append(p) paras.append(wp) + self.read_block_anchors(doc) self.styles.apply_contextual_spacing(paras) # Apply page breaks at the start of every section, except the first # section (since that will be the start of the file) @@ -296,6 +297,22 @@ class Convert(object): opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf') + def read_block_anchors(self, doc): + doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc)) + if doc_anchors: + current_bm = None + rmap = {v:k for k, v in self.object_map.iteritems()} + for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'): + if p.tag.endswith('}p'): + if current_bm and p in rmap: + para = rmap[p] + if 'id' not in para.attrib: + para.set('id', generate_anchor(current_bm, frozenset(self.anchor_map.itervalues()))) + self.anchor_map[current_bm] = para.get('id') + current_bm = None + elif p in doc_anchors: + current_bm = get(p, 'w:name') + def convert_p(self, p): dest = P() self.object_map[dest] = p From 26e23ac7a6f793bde281517ea8ade1e4c878263f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 12:15:55 +0530 Subject: [PATCH 46/57] Splitting: Handle the tail of the split point correctly EPUB/AZW3 Output: Fix splitting on page-break-after with plain text immediately following the split point causing the text to be added before rather than after the split point. --- src/calibre/ebooks/oeb/transforms/split.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 605a58a31f..36fe6b3167 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -339,6 +339,8 @@ class FlowSplitter(object): # We want to keep the descendants of the split point in # Tree 1 keep_descendants = True + # We want the split point element, but not its tail + elem.tail = '\n' continue if hit_split_point: @@ -357,6 +359,18 @@ class FlowSplitter(object): for elem in tuple(body2.iterdescendants()): if elem is split_point2: if not before: + # Keep the split point element's tail, if it contains non-whitespace + # text + tail = elem.tail + if tail and not tail.isspace(): + parent = elem.getparent() + idx = parent.index(elem) + if idx == 0: + parent.text = (parent.text or '') + tail + else: + sib = parent[idx-1] + sib.tail = (sib.tail or '') + tail + # Remove the element itself nix_element(elem) break if elem in ancestors: From bbaf2ff574d19981e79a6bd2fcd6bf147a44d4ac Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 13:05:47 +0530 Subject: [PATCH 47/57] DOCX: Hyperlinked images DOCX Input: Add support for clickable (hyperlinked) images --- src/calibre/ebooks/docx/container.py | 2 +- src/calibre/ebooks/docx/images.py | 19 ++++++++++++++++--- src/calibre/ebooks/docx/to_html.py | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index 68f74a3c82..deaf5bd4d0 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -183,7 +183,7 @@ class DOCX(object): root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') - if item.get('TargetMode', None) != 'External': + if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index b0a5348d90..3be3d51c05 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -96,6 +96,7 @@ class Images(object): self.used = {} self.names = set() self.all_images = set() + self.links = [] def __call__(self, relationships_by_id): self.rid_map = relationships_by_id @@ -125,8 +126,18 @@ class Images(object): self.all_images.add('images/' + name) return name - def pic_to_img(self, pic, alt=None): + def pic_to_img(self, pic, alt, parent): name = None + link = None + for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent): + link = {'id':get(hl, 'r:id')} + tgt = hl.get('tgtFrame', None) + if tgt: + link['target'] = tgt + title = hl.get('tooltip', None) + if title: + link['title'] = title + for pr in XPath('descendant::pic:cNvPr')(pic): name = pr.get('name', None) if name: @@ -138,6 +149,8 @@ class Images(object): src = self.generate_filename(rid, name) img = IMG(src='images/%s' % src) img.set('alt', alt or 'Image') + if link is not None: + self.links.append((img, link)) return img def drawing_to_html(self, drawing, page): @@ -145,7 +158,7 @@ class Images(object): for inline in XPath('./wp:inline')(drawing): style, alt = get_image_properties(inline) for pic in XPath('descendant::pic:pic')(inline): - ans = self.pic_to_img(pic, alt) + ans = self.pic_to_img(pic, alt, inline) if ans is not None: if style: ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems())) @@ -156,7 +169,7 @@ class Images(object): style, alt = get_image_properties(anchor) self.get_float_properties(anchor, style, page) for pic in XPath('descendant::pic:pic')(anchor): - ans = self.pic_to_img(pic, alt) + ans = self.pic_to_img(pic, alt, anchor) if ans is not None: if style: ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems())) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index fae521d807..26e50f9b9d 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -442,6 +442,27 @@ class Convert(object): continue span.set('href', url) + for img, link in self.images.links: + parent = img.getparent() + idx = parent.index(img) + a = A(img) + a.tail, img.tail = img.tail, None + parent.insert(idx, a) + tgt = link.get('target', None) + if tgt: + a.set('target', tgt) + tt = link.get('title', None) + if tt: + a.set('title', tt) + rid = link['id'] + if rid in relationships_by_id: + dest = relationships_by_id[rid] + if dest.startswith('#'): + if dest[1:] in self.anchor_map: + a.set('href', '#' + self.anchor_map[dest[1:]]) + else: + a.set('href', dest) + def convert_run(self, run): ans = SPAN() self.object_map[ans] = run From c8c3741d342cd644781137bc26811f08e1efcb0d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 13:36:30 +0530 Subject: [PATCH 48/57] DOCX: Handle redundant bookmarks DOCX Input: Fix links pointing to locations in the same document that contain multiple, redundant bookmarks not working. --- src/calibre/ebooks/docx/to_html.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 26e50f9b9d..01808657ea 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -342,7 +342,13 @@ class Convert(object): elif x.tag.endswith('}bookmarkStart'): anchor = get(x, 'w:name') if anchor and anchor not in self.anchor_map: + old_anchor = current_anchor self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues())) + if old_anchor is not None: + # The previous anchor was not applied to any element + for a, t in tuple(self.anchor_map.iteritems()): + if t == old_anchor: + self.anchor_map[a] = current_anchor elif x.tag.endswith('}hyperlink'): current_hyperlink = x From 36386d06cc954909a173736f1e08a6460084ef0f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 14:42:08 +0530 Subject: [PATCH 49/57] PDF Output: Fix mangling of links PDF Output: Fix links that point to URLs with query parameters being mangled by the conversion process. Fixes #1197006 [Broken links in PDF in Adobe reader.](https://bugs.launchpad.net/calibre/+bug/1197006) --- src/calibre/ebooks/pdf/render/links.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index 500bbbf6c1..6ec7c500a8 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -8,9 +8,8 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os -from future_builtins import map -from urlparse import urlparse, urlunparse -from urllib2 import quote, unquote +from urlparse import urlparse +from urllib2 import unquote from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String @@ -84,10 +83,6 @@ class Links(object): action = Dictionary({ 'Type':Name('Action'), 'S':Name('URI'), }) - parts = (x.encode('utf-8') if isinstance(x, type(u'')) else - x for x in purl) - url = urlunparse(map(quote, map(unquote, - parts))).decode('ascii') action['URI'] = String(url) annot['A'] = action if 'A' in annot or 'Dest' in annot: From a149790b93f554038fb6b28a2dfe229d3ac9c9c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 15:17:46 +0530 Subject: [PATCH 50/57] ... --- src/calibre/ebooks/pdf/render/links.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index 6ec7c500a8..4d0d588771 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -83,6 +83,8 @@ class Links(object): action = Dictionary({ 'Type':Name('Action'), 'S':Name('URI'), }) + # Do not try to normalize/quote/unquote this URL as if it + # has a query part, it will get corrupted action['URI'] = String(url) annot['A'] = action if 'A' in annot or 'Dest' in annot: From a3adb69d944dc2535895eedf9d0159302a07e839 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 21:18:32 +0530 Subject: [PATCH 51/57] Metadata download dialog: Prevent the buttons from being re-ordered when the Next button is clicked. --- src/calibre/gui2/metadata/single_download.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index 3e9bb87687..ed378745a5 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -1006,7 +1006,7 @@ class FullFetch(QDialog): # {{{ l.addWidget(self.bb) self.bb.rejected.connect(self.reject) self.bb.accepted.connect(self.accept) - self.next_button = self.bb.addButton(_('Next'), self.bb.AcceptRole) + self.next_button = self.bb.addButton(_('Next'), self.bb.ActionRole) self.next_button.setDefault(True) self.next_button.setEnabled(False) self.next_button.setIcon(QIcon(I('ok.png'))) @@ -1019,7 +1019,7 @@ class FullFetch(QDialog): # {{{ self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole) self.log_button.clicked.connect(self.view_log) self.log_button.setIcon(QIcon(I('debug.png'))) - self.ok_button.setVisible(False) + self.ok_button.setEnabled(False) self.prev_button.setVisible(False) self.identify_widget = IdentifyWidget(self.log, self) @@ -1044,7 +1044,7 @@ class FullFetch(QDialog): # {{{ def book_selected(self, book, caches): self.next_button.setVisible(False) - self.ok_button.setVisible(True) + self.ok_button.setEnabled(True) self.prev_button.setVisible(True) self.book = book self.stack.setCurrentIndex(1) @@ -1055,8 +1055,9 @@ class FullFetch(QDialog): # {{{ def back_clicked(self): self.next_button.setVisible(True) - self.ok_button.setVisible(False) + self.ok_button.setEnabled(False) self.prev_button.setVisible(False) + self.next_button.setFocus() self.stack.setCurrentIndex(0) self.covers_widget.cancel() self.covers_widget.reset_covers() @@ -1081,6 +1082,7 @@ class FullFetch(QDialog): # {{{ self.next_button.setEnabled(True) def next_clicked(self, *args): + gprefs['metadata_single_gui_geom'] = bytearray(self.saveGeometry()) self.identify_widget.get_result() def ok_clicked(self, *args): From 509cc82d805e7c49057ddba6aaf1109e982b7226 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 21:58:44 +0530 Subject: [PATCH 52/57] Allow running python setup.py develop froma git checkout --- setup/install.py | 2 +- setup/translations.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/setup/install.py b/setup/install.py index b1698d88ed..d1bb2058f1 100644 --- a/setup/install.py +++ b/setup/install.py @@ -56,7 +56,7 @@ class Develop(Command): short_description = 'Setup a development environment for calibre' MODE = 0o755 - sub_commands = ['build', 'resources', 'iso639', 'gui',] + sub_commands = ['build', 'resources', 'gui',] def add_postinstall_options(self, parser): parser.add_option('--make-errors-fatal', action='store_true', default=False, diff --git a/setup/translations.py b/setup/translations.py index 17c8d10018..e8b0fecdf1 100644 --- a/setup/translations.py +++ b/setup/translations.py @@ -21,7 +21,12 @@ def qt_sources(): class POT(Command): # {{{ description = 'Update the .pot translation template and upload it' - LP_BASE = os.path.join(os.path.dirname(os.path.dirname(Command.SRC)), 'calibre-translations') + LP_BASE = os.path.join(os.path.dirname(Command.SRC)) + if not os.path.exists(os.path.join(LP_BASE, 'setup', 'iso_639')): + # We are in a git checkout, translations are assumed to be in a + # directory called calibre-translations at the same level as the + # calibre directory. + LP_BASE = os.path.join(os.path.dirname(os.path.dirname(Command.SRC)), 'calibre-translations') LP_SRC = os.path.join(LP_BASE, 'src') LP_PATH = os.path.join(LP_SRC, os.path.join(__appname__, 'translations')) LP_ISO_PATH = os.path.join(LP_BASE, 'setup', 'iso_639') From bd4e828668d2e76dae900ce3530db27482787dda Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Jul 2013 22:23:49 +0530 Subject: [PATCH 53/57] Restore building iso639 in develop as it is needed --- setup/install.py | 2 +- setup/iso_639_3.xml | 39178 ++++++++++++++++++++++++++++++++++++++++ setup/translations.py | 11 +- 3 files changed, 39186 insertions(+), 5 deletions(-) create mode 100644 setup/iso_639_3.xml diff --git a/setup/install.py b/setup/install.py index d1bb2058f1..b1698d88ed 100644 --- a/setup/install.py +++ b/setup/install.py @@ -56,7 +56,7 @@ class Develop(Command): short_description = 'Setup a development environment for calibre' MODE = 0o755 - sub_commands = ['build', 'resources', 'gui',] + sub_commands = ['build', 'resources', 'iso639', 'gui',] def add_postinstall_options(self, parser): parser.add_option('--make-errors-fatal', action='store_true', default=False, diff --git a/setup/iso_639_3.xml b/setup/iso_639_3.xml new file mode 100644 index 0000000000..6b94a3850b --- /dev/null +++ b/setup/iso_639_3.xml @@ -0,0 +1,39178 @@ + + + + + + + +]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/setup/translations.py b/setup/translations.py index e8b0fecdf1..786d44a6d6 100644 --- a/setup/translations.py +++ b/setup/translations.py @@ -322,21 +322,24 @@ class GetTranslations(Translations): # {{{ class ISO639(Command): # {{{ - description = 'Compile translations for ISO 639 codes' + description = 'Compile language code maps for performance' DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization', 'iso639.pickle') def run(self, opts): - src = POT.LP_ISO_PATH + src = self.j(self.d(self.SRC), 'setup', 'iso_639_3.xml') if not os.path.exists(src): raise Exception(src + ' does not exist') dest = self.DEST + base = self.d(dest) + if not os.path.exists(base): + os.makedirs(base) if not self.newer(dest, [src, __file__]): self.info('Pickled code is up to date') return self.info('Pickling ISO-639 codes to', dest) from lxml import etree - root = etree.fromstring(open(self.j(src, 'iso_639_3.xml'), 'rb').read()) + root = etree.fromstring(open(src, 'rb').read()) by_2 = {} by_3b = {} by_3t = {} @@ -350,7 +353,7 @@ class ISO639(Command): # {{{ threet = x.get('id') threeb = x.get('part2_code', None) if threeb is None: - # Only recognize langauges in ISO-639-2 + # Only recognize languages in ISO-639-2 continue name = x.get('name') From 32fa93d5847ab3daf3ec0c9d2ade969b812c827d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Thu, 4 Jul 2013 00:02:19 +0200 Subject: [PATCH 54/57] update Woblink plugin for website changes --- .../gui2/store/stores/woblink_plugin.py | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/calibre/gui2/store/stores/woblink_plugin.py b/src/calibre/gui2/store/stores/woblink_plugin.py index 9b99271192..6eb598d65b 100644 --- a/src/calibre/gui2/store/stores/woblink_plugin.py +++ b/src/calibre/gui2/store/stores/woblink_plugin.py @@ -56,20 +56,20 @@ class WoblinkStore(BasicStoreConfig, StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="book-item backgroundmix"]'): + for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'): if counter <= 0: break - id = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/@href')) + id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href')) if not id: continue - cover_url = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/img/@src')) - title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()')) - author = ', '.join(data.xpath('.//td[@class="va-t"]/h3/a/text()')) - price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/strong/span/text()')) + cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src')) + title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()')) + author = ', '.join(data.xpath('.//h3[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()')) + price = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_opcjezakupu_cena"]/span/text()')) price = re.sub('\.', ',', price) - formats = [ form[8:-4].split('.')[0] for form in data.xpath('.//p[3]/img/@src')] + formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_formaty"]/span/text()')) s = SearchResult() s.cover_url = 'http://woblink.com' + cover_url @@ -77,25 +77,15 @@ class WoblinkStore(BasicStoreConfig, StorePlugin): s.author = author.strip() s.price = price + ' zł' s.detail_item = id.strip() + s.formats = formats - if 'epub_drm' in formats: + if 'EPUB DRM' in formats: s.drm = SearchResult.DRM_LOCKED - s.formats = 'EPUB' - - counter -= 1 - yield s - elif 'pdf' in formats: - s.drm = SearchResult.DRM_LOCKED - s.formats = 'PDF' counter -= 1 yield s else: s.drm = SearchResult.DRM_UNLOCKED - if 'MOBI_nieb' in formats: - formats.remove('MOBI_nieb') - formats.append('MOBI') - s.formats = ', '.join(formats).upper() counter -= 1 yield s From 2aa8f23d6c0b3ab585897d148c086899d80d02c6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Jul 2013 09:13:37 +0530 Subject: [PATCH 55/57] When parsing fields, handle escapes --- src/calibre/ebooks/docx/fields.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py index 9b0d053cd0..91dcd87596 100644 --- a/src/calibre/ebooks/docx/fields.py +++ b/src/calibre/ebooks/docx/fields.py @@ -37,7 +37,9 @@ scanner = re.Scanner([ def parse_hyperlink(raw, log): ans = {} last_option = None + raw = raw.replace('\\\\', '\x01').replace('\\"', '\x02') for token, token_type in scanner.scan(raw)[0]: + token = token.replace('\x01', '\\').replace('\x02', '"') if not ans: if token_type is not WORD: log('Invalid hyperlink, first token is not a URL (%s)' % raw) From 852bd4945394242940948e4ed2e0efb759445ce4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Jul 2013 09:54:53 +0530 Subject: [PATCH 56/57] MOBI Input: Fix regression parsing ' MOBI Input: Fix a regression that broke parsing of MOBI files with malformed markup that also used entities for apostrophes. Fixes #1197585 [Private bug](https://bugs.launchpad.net/calibre/+bug/1197585) --- src/calibre/utils/soupparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/soupparser.py b/src/calibre/utils/soupparser.py index 403f57baad..efbcf8b970 100644 --- a/src/calibre/utils/soupparser.py +++ b/src/calibre/utils/soupparser.py @@ -62,7 +62,7 @@ def _parse(source, beautifulsoup, makeelement, **bsargs): if makeelement is None: makeelement = html.html_parser.makeelement if 'convertEntities' not in bsargs: - bsargs['convertEntities'] = 'html' + bsargs['convertEntities'] = 'xhtml' # Changed by Kovid, otherwise ' is mangled, see https://bugs.launchpad.net/calibre/+bug/1197585 tree = beautifulsoup(source, **bsargs) root = _convert_tree(tree, makeelement) # from ET: wrap the document in a html root element, if necessary From ef4efd57688b3cebe801557a6fe28f9997af04a5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Jul 2013 11:11:22 +0530 Subject: [PATCH 57/57] Refactor creation of hardlinks on windows After creating the hardlink, open and close the file, to ensure that the directory entry for the file contains the correct file size, see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx --- src/calibre/utils/filenames.py | 45 ++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index 8c4b7edfe5..54ce568539 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -70,9 +70,11 @@ def shorten_components_to(length, components, more_to_take=0): else: if x is components[-1]: b, e = os.path.splitext(x) - if e == '.': e = '' + if e == '.': + e = '' r = shorten_component(b, delta)+e - if r.startswith('.'): r = x[0]+r + if r.startswith('.'): + r = x[0]+r else: r = shorten_component(x, delta) r = r.strip() @@ -115,7 +117,7 @@ def is_case_sensitive(path): os.remove(f1) return is_case_sensitive -def case_preserving_open_file(path, mode='wb', mkdir_mode=0777): +def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777): ''' Open the file pointed to by path with the specified mode. If any directories in path do not exist, they are created. Returns the @@ -211,7 +213,8 @@ def samefile_windows(src, dst): handles = [] def get_fileid(x): - if isbytestring(x): x = x.decode(filesystem_encoding) + if isbytestring(x): + x = x.decode(filesystem_encoding) try: h = win32file.CreateFile(x, 0, 0, None, win32file.OPEN_EXISTING, win32file.FILE_FLAG_BACKUP_SEMANTICS, 0) @@ -254,6 +257,24 @@ def samefile(src, dst): os.path.normcase(os.path.abspath(dst))) return samestring +def windows_hardlink(src, dest): + import win32file, pywintypes + msg = u'Creating hardlink from %s to %s failed: %%s' % (src, dest) + try: + win32file.CreateHardLink(dest, src) + except pywintypes.error as e: + raise Exception(msg % e) + # We open and close dest, to ensure its directory entry is updated + # see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx + h = win32file.CreateFile( + dest, 0, win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE | win32file.FILE_SHARE_DELETE, + None, win32file.OPEN_EXISTING, 0, None) + sz = win32file.GetFileSize(h) + win32file.CloseHandle(h) + + if sz != os.path.getsize(src): + raise Exception(msg % ('hardlink size: %d not the same as source size' % sz)) + class WindowsAtomicFolderMove(object): ''' @@ -270,14 +291,16 @@ class WindowsAtomicFolderMove(object): import win32file, winerror from pywintypes import error - if isbytestring(path): path = path.decode(filesystem_encoding) + if isbytestring(path): + path = path.decode(filesystem_encoding) if not os.path.exists(path): return for x in os.listdir(path): f = os.path.normcase(os.path.abspath(os.path.join(path, x))) - if not os.path.isfile(f): continue + if not os.path.isfile(f): + continue try: # Ensure the file is not read-only win32file.SetFileAttributes(f, win32file.FILE_ATTRIBUTE_NORMAL) @@ -315,9 +338,7 @@ class WindowsAtomicFolderMove(object): else: raise ValueError(u'The file %r does not exist'%path) try: - win32file.CreateHardLink(dest, path) - if os.path.getsize(dest) != os.path.getsize(path): - raise Exception('This apparently can happen on network shares. Sigh.') + windows_hardlink(path, dest) return except: pass @@ -355,10 +376,8 @@ class WindowsAtomicFolderMove(object): def hardlink_file(src, dest): if iswindows: - import win32file - win32file.CreateHardLink(dest, src) - if os.path.getsize(dest) != os.path.getsize(src): - raise Exception('This apparently can happen on network shares. Sigh.') + windows_hardlink(src, dest) return os.link(src, dest) +