This commit is contained in:
Sengian 2010-12-05 23:59:53 +01:00
commit 085e0af936
50 changed files with 1359 additions and 502 deletions

View File

@ -217,3 +217,15 @@ generate_cover_foot_font = None
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
# Example: doubleclick_on_library_view = 'do_nothing'
doubleclick_on_library_view = 'open_viewer'
# Language to use when sorting. Setting this tweak will force sorting to use the
# collating order for the specified language. This might be useful if you run
# calibre in English but want sorting to work in the language where you live.
# Set the tweak to the desired ISO 639-1 language code, in lower case.
# You can find the list of supported locales at
# http://publib.boulder.ibm.com/infocenter/iseries/v5r3/topic/nls/rbagsicusortsequencetables.htm
# Default: locale_for_sorting = '' -- use the language calibre displays in
# Example: locale_for_sorting = 'fr' -- sort using French rules.
# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules.
locale_for_sorting = ''

View File

@ -91,11 +91,15 @@ podofo_inc = '/usr/include/podofo'
podofo_lib = '/usr/lib'
chmlib_inc_dirs = chmlib_lib_dirs = []
sqlite_inc_dirs = []
icu_inc_dirs = []
icu_lib_dirs = []
if iswindows:
prefix = r'C:\cygwin\home\kovid\sw'
sw_inc_dir = os.path.join(prefix, 'include')
sw_lib_dir = os.path.join(prefix, 'lib')
icu_inc_dirs = [sw_inc_dir]
icu_lib_dirs = [sw_lib_dir]
sqlite_inc_dirs = [sw_inc_dir]
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
fc_lib = sw_lib_dir

View File

@ -18,7 +18,8 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
icu_lib_dirs
MT
isunix = islinux or isosx or isfreebsd
@ -56,8 +57,25 @@ pdfreflow_libs = []
if iswindows:
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
icu_libs = ['icudata', 'icui18n', 'icuuc', 'icuio']
icu_cflags = []
if iswindows:
icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio']
if isosx:
icu_libs = ['icucore']
icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib
extensions = [
Extension('icu',
['calibre/utils/icu.c'],
libraries=icu_libs,
lib_dirs=icu_lib_dirs,
inc_dirs=icu_inc_dirs,
cflags=icu_cflags
),
Extension('sqlite_custom',
['calibre/library/sqlite_custom.c'],
inc_dirs=sqlite_inc_dirs

View File

@ -14,7 +14,8 @@ from setup import Command, modules, basenames, functions, __version__, \
SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize',
'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml',
'sipconfig.py', 'xdg']
'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py',
'_dbus_glib_bindings.so']
QTDIR = '/usr/lib/qt4'
QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus')
@ -49,6 +50,10 @@ binary_includes = [
'/lib/libreadline.so.6',
'/usr/lib/libchm.so.0',
'/usr/lib/liblcms2.so.2',
'/usr/lib/libicudata.so.46',
'/usr/lib/libicui18n.so.46',
'/usr/lib/libicuuc.so.46',
'/usr/lib/libicuio.so.46',
]
binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]

View File

@ -199,7 +199,7 @@ class Win32Freeze(Command, WixMixIn):
for pat in ('*.dll',):
for f in glob.glob(os.path.join(bindir, pat)):
ok = True
for ex in ('expatw',):
for ex in ('expatw', 'testplug'):
if ex in f.lower():
ok = False
if not ok: continue

View File

@ -77,6 +77,15 @@ Test it on the target system with
calibre-debug -c "import _imaging, _imagingmath, _imagingft, _imagingcms"
ICU
-------
Download the win32 msvc9 binary from http://www.icu-project.org/download/4.4.html
Note that 4.4 is the last version of ICU that can be compiled (is precompiled) with msvc9
Put the dlls into sw/bin and the unicode dir into sw/include and the contents of lib int sw/lib
Libunrar
----------

View File

@ -67,7 +67,8 @@ if plugins is None:
'pdfreflow',
'progress_indicator',
'chmlib',
'chm_extra'
'chm_extra',
'icu',
] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):

View File

@ -13,6 +13,7 @@ from calibre.devices.interface import BookList as _BookList
from calibre.constants import preferred_encoding
from calibre import isbytestring
from calibre.utils.config import prefs, tweaks
from calibre.utils.icu import sort_key
class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None):
@ -215,14 +216,17 @@ class CollectionsBookList(BookList):
elif is_series:
if doing_dc:
collections[cat_name][lpath] = \
(book, book.get('series_index', sys.maxint), '')
(book, book.get('series_index', sys.maxint),
book.get('title_sort', 'zzzz'))
else:
collections[cat_name][lpath] = \
(book, book.get(attr+'_index', sys.maxint), '')
(book, book.get(attr+'_index', sys.maxint),
book.get('title_sort', 'zzzz'))
else:
if lpath not in collections[cat_name]:
collections[cat_name][lpath] = \
(book, book.get('title_sort', 'zzzz'), '')
(book, book.get('title_sort', 'zzzz'),
book.get('title_sort', 'zzzz'))
# Sort collections
result = {}
@ -230,14 +234,19 @@ class CollectionsBookList(BookList):
x = xx[1]
y = yy[1]
if x is None and y is None:
# No sort_key needed here, because defaults are ascii
return cmp(xx[2], yy[2])
if x is None:
return 1
if y is None:
return -1
c = cmp(x, y)
if isinstance(x, (unicode, str)):
c = cmp(sort_key(x), sort_key(y))
else:
c = cmp(x, y)
if c != 0:
return c
# same as above -- no sort_key needed here
return cmp(xx[2], yy[2])
for category, lpaths in collections.items():

View File

@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin):
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
#from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
#UniqueFilenames()(oeb, opts)
self.workaround_ade_quirks()
self.workaround_webkit_quirks()
self.upshift_markup()

View File

@ -8,15 +8,11 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into FB2 markup
'''
import cStringIO
from base64 import b64encode
from datetime import datetime
from mimetypes import types_map
import re
try:
from PIL import Image
Image
except ImportError:
import Image
import uuid
from lxml import etree
@ -25,40 +21,12 @@ from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
TAG_MAP = {
'b' : 'strong',
'i' : 'emphasis',
'p' : 'p',
'li' : 'p',
'div': 'p',
'br' : 'p',
}
TAG_SPACE = []
TAG_IMAGES = [
'img',
]
TAG_LINKS = [
'a',
]
BLOCK = [
'p',
]
STYLES = [
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
('font-style', {'italic' : 'emphasis'}),
]
from calibre.utils.magick import Image
class FB2MLizer(object):
'''
Todo: * Ensure all style tags are inside of the p tags.
* Include more FB2 specific tags in the conversion.
* Handle reopening of a tag properly.
Todo: * Include more FB2 specific tags in the conversion.
* Handle a tags.
* Figure out some way to turn oeb_book.toc items into <section><title>
<p> to allow for readers to generate toc from the document.
'''
@ -66,29 +34,36 @@ class FB2MLizer(object):
def __init__(self, log):
self.log = log
self.image_hrefs = {}
self.link_hrefs = {}
self.reset_state()
def reset_state(self):
# Used to ensure text and tags are always within <p> and </p>
self.in_p = False
# Mapping of image names. OEB allows for images to have the same name but be stored
# in different directories. FB2 images are all in a flat layout so we rename all images
# into a sequential numbering system to ensure there are no collisions between image names.
self.image_hrefs = {}
def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to FB2 markup...')
self.oeb_book = oeb_book
self.opts = opts
return self.fb2mlize_spine()
def fb2mlize_spine(self):
self.image_hrefs = {}
self.link_hrefs = {}
self.reset_state()
output = [self.fb2_header()]
output.append(self.get_cover_page())
output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
output.append(self.get_text())
output.append(self.fb2_body_footer())
output.append(self.fb2mlize_images())
output.append(self.fb2_footer())
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
output = self.clean_text(output)
if self.opts.sectionize_chapters:
output = self.sectionize_chapters(output)
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
output = self.clean_text(u''.join(output))
if self.opts.pretty_print:
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
else:
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
def clean_text(self, text):
text = re.sub(r'(?miu)<section>\s*</section>', '', text)
@ -101,113 +76,85 @@ class FB2MLizer(object):
return text
def fb2_header(self):
author_first = u''
author_middle = u''
author_last = u''
metadata = {}
metadata['author_first'] = u''
metadata['author_middle'] = u''
metadata['author_last'] = u''
metadata['title'] = self.oeb_book.metadata.title[0].value
metadata['appname'] = __appname__
metadata['version'] = __version__
metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
metadata['id'] = '%s' % uuid.uuid4()
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
if len(author_parts) == 1:
author_last = author_parts[0]
metadata['author_last'] = author_parts[0]
elif len(author_parts) == 2:
author_first = author_parts[0]
author_last = author_parts[1]
metadata['author_first'] = author_parts[0]
metadata['author_last'] = author_parts[1]
else:
author_first = author_parts[0]
author_middle = ' '.join(author_parts[1:-2])
author_last = author_parts[-1]
metadata['author_first'] = author_parts[0]
metadata['author_middle'] = ' '.join(author_parts[1:-2])
metadata['author_last'] = author_parts[-1]
return u'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">\n' \
'<description>\n<title-info>\n ' \
'<author>\n<first-name>%s</first-name>\n<middle-name>%s' \
'</middle-name>\n<last-name>%s</last-name>\n</author>\n' \
'<book-title>%s</book-title> ' \
'</title-info><document-info> ' \
'<program-used>%s - %s</program-used></document-info>\n' \
'</description>\n<body>\n<section>' % tuple(map(prepare_string_for_xml,
(author_first, author_middle,
author_last, self.oeb_book.metadata.title[0].value,
__appname__, __version__)))
for key, value in metadata.items():
metadata[key] = prepare_string_for_xml(value)
def get_cover_page(self):
output = u''
if 'cover' in self.oeb_book.guide:
output += '<image xlink:href="#cover.jpg" />'
self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.jpg'
if 'titlepage' in self.oeb_book.guide:
self.log.debug('Generating cover page...')
href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output
def get_toc(self):
toc = []
if self.opts.inline_toc:
self.log.debug('Generating table of contents...')
toc.append(u'<p>%s</p>' % _('Table of Contents:'))
for item in self.oeb_book.toc:
if item.href in self.link_hrefs.keys():
toc.append('<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title))
else:
self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
return ''.join(toc)
def sectionize_chapters(self, text):
def remove_p(t):
t = t.replace('<p>', '')
t = t.replace('</p>', '')
return t
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(</p>)\s*(<p>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(</p>)\s*(?P<strong><strong>.+?</strong>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
text = re.sub(r'(?imsu)(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(<p>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
text = re.sub(r'(?imsu)(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(?P<strong><strong>.+?</strong>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
return text
def get_text(self):
text = []
for i, item in enumerate(self.oeb_book.spine):
if self.opts.sectionize_chapters_using_file_structure and i is not 0:
text.append('<section>')
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
text.append('</section>')
return ''.join(text)
def fb2_body_footer(self):
return u'\n</section>\n</body>'
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
'<description>' \
'<title-info>' \
'<genre>antique</genre>' \
'<author>' \
'<first-name>%(author_first)s</first-name>' \
'<middle-name>%(author_middle)s</middle-name>' \
'<last-name>%(author_last)s</last-name>' \
'</author>' \
'<book-title>%(title)s</book-title>' \
'<lang>%(lang)s</lang>' \
'</title-info>' \
'<document-info>' \
'<author>' \
'<first-name></first-name>' \
'<middle-name></middle-name>' \
'<last-name></last-name>' \
'</author>' \
'<program-used>%(appname)s %(version)s</program-used>' \
'<date>%(date)s</date>' \
'<id>%(id)s</id>' \
'<version>1.0</version>' \
'</document-info>' \
'</description>' % metadata
def fb2_footer(self):
return u'</FictionBook>'
def add_page_anchor(self, page):
return self.get_anchor(page, '')
def get_anchor(self, page, aid):
aid = prepare_string_for_xml(aid)
aid = '%s#%s' % (page.href, aid)
if aid not in self.link_hrefs.keys():
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
aid = self.link_hrefs[aid]
return '<a id="%s" />' % aid
def get_text(self):
text = ['<body>']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append('<section>')
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
text.append('</section>')
return ''.join(text) + '</body>'
def fb2mlize_images(self):
'''
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
'''
images = []
for item in self.oeb_book.manifest:
# Don't write the image if it's not referenced in the document's text.
if item.href not in self.image_hrefs:
continue
if item.media_type in OEB_RASTER_IMAGES:
try:
im = Image.open(cStringIO.StringIO(item.data)).convert('RGB')
data = cStringIO.StringIO()
im.save(data, 'JPEG')
data = data.getvalue()
if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']:
im = Image()
im.load(item.data)
im.set_compression_quality(70)
data = im.export('jpg')
raw_data = b64encode(data)
# Don't put the encoded image on a single line.
data = ''
@ -218,114 +165,167 @@ class FB2MLizer(object):
col = 1
col += 1
data += char
images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data))
images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
except Exception as e:
self.log.error('Error: Could not include file %s becuase ' \
self.log.error('Error: Could not include file %s because ' \
'%s.' % (item.href, e))
return ''.join(images)
def dump_text(self, elem, stylizer, page, tag_stack=[]):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
def ensure_p(self):
if self.in_p:
return [], []
else:
self.in_p = True
return ['<p>'], ['p']
def close_open_p(self, tags):
text = ['']
added_p = False
if self.in_p:
# Close all up to p. Close p. Reopen all closed tags including p.
closed_tags = []
tags.reverse()
for t in tags:
text.append('</%s>' % t)
closed_tags.append(t)
if t == 'p':
break
closed_tags.reverse()
for t in closed_tags:
text.append('<%s>' % t)
else:
text.append('<p>')
added_p = True
self.in_p = True
return text, added_p
def handle_simple_tag(self, tag, tags):
s_out = []
s_tags = []
if tag not in tags:
p_out, p_tags = self.ensure_p()
s_out += p_out
s_tags += p_tags
s_out.append('<%s>' % tag)
s_tags.append(tag)
return s_out, s_tags
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
'''
This function is intended to be used in a recursive manner. dump_text will
run though all elements in the elem_tree and call itself on each element.
self.image_hrefs will be populated by calling this function.
@param elem_tree: etree representation of XHTML content to be transformed.
@param stylizer: Used to track the style of elements within the tree.
@param page: OEB page used to determine absolute urls.
@param tag_stack: List of open FB2 tags to take into account.
@return: List of string representing the XHTML converted to FB2 markup.
'''
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
return []
style = stylizer.style(elem)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
style = stylizer.style(elem_tree)
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
return []
fb2_text = []
# FB2 generated output.
fb2_out = []
# FB2 tags in the order they are opened. This will be used to close the tags.
tags = []
# First tag in tree
tag = barename(elem_tree.tag)
tag = barename(elem.tag)
if tag in TAG_IMAGES:
if elem.attrib.get('src', None):
if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys())
fb2_text.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])])
if tag in TAG_LINKS:
href = elem.get('href')
if href:
href = prepare_string_for_xml(page.abshref(href))
href = href.replace('"', '&quot;')
if '://' in href:
fb2_text.append('<a xlink:href="%s">' % href)
else:
if href.startswith('#'):
href = href[1:]
if href not in self.link_hrefs.keys():
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
href = self.link_hrefs[href]
fb2_text.append('<a xlink:href="#%s">' % href)
tags.append('a')
# Anchor ids
id_name = elem.get('id')
if id_name:
fb2_text.append(self.get_anchor(page, id_name))
# Process the XHTML tag if it needs to be converted to an FB2 tag.
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
fb2_text.append('<title>')
fb2_out.append('<title>')
tags.append('title')
fb2_tag = TAG_MAP.get(tag, None)
if fb2_tag == 'p':
if 'p' in tag_stack+tags:
# Close all up to p. Close p. Reopen all closed tags including p.
all_tags = tag_stack+tags
if tag == 'img':
if elem_tree.attrib.get('src', None):
# Only write the image tag if it is in the manifest.
if page.abshref(elem_tree.attrib['src']) in self.oeb_book.manifest.hrefs.keys():
if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys():
self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys())
p_txt, p_tag = self.ensure_p()
fb2_out += p_txt
tags += p_tag
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
elif tag == 'br':
if self.in_p:
closed_tags = []
all_tags.reverse()
for t in all_tags:
fb2_text.append('</%s>' % t)
open_tags = tag_stack+tags
open_tags.reverse()
for t in open_tags:
fb2_out.append('</%s>' % t)
closed_tags.append(t)
if t == 'p':
break
fb2_out.append('<empty-line />')
closed_tags.reverse()
for t in closed_tags:
fb2_text.append('<%s>' % t)
fb2_out.append('<%s>' % t)
else:
fb2_text.append('<p>')
fb2_out.append('<empty-line />')
elif tag in ('div', 'li', 'p'):
p_text, added_p = self.close_open_p(tag_stack+tags)
fb2_out += p_text
if added_p:
tags.append('p')
elif fb2_tag and fb2_tag not in tag_stack+tags:
fb2_text.append('<%s>' % fb2_tag)
tags.append(fb2_tag)
elif tag == 'b':
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
fb2_out += s_out
tags += s_tags
elif tag == 'i':
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
fb2_out += s_out
tags += s_tags
# Processes style information
for s in STYLES:
style_tag = s[1].get(style[s[0]], None)
if style_tag and style_tag not in tag_stack+tags:
fb2_text.append('<%s>' % style_tag)
tags.append(style_tag)
# Processes style information.
if style['font-style'] == 'italic':
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
fb2_out += s_out
tags += s_tags
elif style['font-weight'] in ('bold', 'bolder'):
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
fb2_out += s_out
tags += s_tags
if tag in TAG_SPACE:
if not fb2_text or fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '):
fb2_text.append(' ')
# Process element text.
if hasattr(elem_tree, 'text') and elem_tree.text:
if not self.in_p:
fb2_out.append('<p>')
fb2_out.append(prepare_string_for_xml(elem_tree.text))
if not self.in_p:
fb2_out.append('</p>')
if hasattr(elem, 'text') and elem.text:
if 'p' not in tag_stack+tags:
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
else:
fb2_text.append(prepare_string_for_xml(elem.text))
for item in elem:
fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags)
# Process sub-elements.
for item in elem_tree:
fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags)
# Close open FB2 tags.
tags.reverse()
fb2_text += self.close_tags(tags)
fb2_out += self.close_tags(tags)
if hasattr(elem, 'tail') and elem.tail:
if 'p' not in tag_stack:
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail))
else:
fb2_text.append(prepare_string_for_xml(elem.tail))
# Process element text that comes after the close of the XHTML tag but before the next XHTML tag.
if hasattr(elem_tree, 'tail') and elem_tree.tail:
if not self.in_p:
fb2_out.append('<p>')
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
if not self.in_p:
fb2_out.append('</p>')
return fb2_text
return fb2_out
def close_tags(self, tags):
text = []
for tag in tags:
text.append('</%s>' % tag)
if tag == 'p':
self.in_p = False
return text

View File

@ -16,20 +16,6 @@ class FB2Output(OutputFormatPlugin):
file_type = 'fb2'
options = set([
OptionRecommendation(name='inline_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Add Table of Contents to beginning of the book.')),
OptionRecommendation(name='sectionize_chapters',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Try to turn chapters into individual sections. ' \
'WARNING: ' \
'This option is experimental. It can cause conversion ' \
'to fail. It can also produce unexpected output.')),
OptionRecommendation(name='sectionize_chapters_using_file_structure',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Try to turn chapters into individual sections using the ' \
'internal structure of the ebook. This works well for EPUB ' \
'books that have been internally split by chapter.')),
OptionRecommendation(name='h1_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h1 tags with fb2 title elements.')),
@ -43,6 +29,14 @@ class FB2Output(OutputFormatPlugin):
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
try:
rasterizer = SVGRasterizer()
rasterizer(oeb_book, opts)
except Unavailable:
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
linearize_jacket(oeb_book)
fb2mlizer = FB2MLizer(log)

View File

@ -3,12 +3,11 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en'
import sys, textwrap, re
import sys, textwrap, re, traceback, socket
from urllib import urlencode
from lxml import html, etree
from lxml.html import soupparser
from lxml.etree import tostring
from lxml import html
from lxml.html import soupparser, tostring
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
@ -18,6 +17,7 @@ from calibre.library.comments import sanitize_comments_html
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import OptionParser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_char
class Fictionwise(MetadataSource): # {{{
@ -37,10 +37,11 @@ class Fictionwise(MetadataSource): # {{{
# }}}
class FictionwiseError(Exception):
pass
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class Query(object):
@ -86,18 +87,20 @@ class Query(object):
q = q.encode('utf-8')
self.urldata = urlencode(q)
def __call__(self, browser, verbose):
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
print 'Query:', self.BASE_URL+self.urldata
print _('Query: %s') % self.BASE_URL+self.urldata
try:
raw = browser.open_novisit(self.BASE_URL, self.urldata).read()
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
except Exception, e:
report(verbose)
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
raise
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
raise FictionwiseError(_('Fictionwise encountered an error.'))
if '<title>404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
@ -105,7 +108,11 @@ class Query(object):
try:
feed = soupparser.fromstring(raw)
except:
return
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_char(raw))
except:
return None
# get list of results as links
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
@ -139,12 +146,41 @@ class ResultList(list):
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
def strip_tags_etree(self, etreeobj, invalid_tags):
for itag in invalid_tags:
for elt in etreeobj.getiterator(itag):
elt.drop_tag()
return etreeobj
for (itag, rmv) in invalid_tags.iteritems():
if rmv:
for elts in etreeobj.getiterator(itag):
elts.drop_tree()
else:
for elts in etreeobj.getiterator(itag):
elts.drop_tag()
def clean_entry(self, entry,
def clean_entry(self, entry, invalid_tags = {'script': True},
invalid_id = (), invalid_class=(), invalid_xpath = ()):
#invalid_tags: remove tag and keep content if False else remove
#remove tags
if invalid_tags:
self.strip_tags_etree(entry, invalid_tags)
#remove xpath
if invalid_xpath:
for eltid in invalid_xpath:
elt = entry.xpath(eltid)
for el in elt:
el.drop_tree()
#remove id
if invalid_id:
for eltid in invalid_id:
elt = entry.get_element_by_id(eltid)
if elt is not None:
elt.drop_tree()
#remove class
if invalid_class:
for eltclass in invalid_class:
elts = entry.find_class(eltclass)
if elts is not None:
for elt in elts:
elt.drop_tree()
def clean_entry_dffdfbdjbf(self, entry,
invalid_tags = ('font', 'strong', 'b', 'ul', 'span', 'a'),
remove_tags_trees = ('script',)):
for it in entry[0].iterchildren(tag='table'):
@ -170,7 +206,6 @@ class ResultList(list):
authortext = entry.find('./br').tail
if not self.rechkauth.search(authortext):
return []
#TODO: parse all tag if necessary
authortext = self.rechkauth.sub('', authortext)
return [a.strip() for a in authortext.split('&')]
@ -185,7 +220,7 @@ class ResultList(list):
float(image.get('height', default=0))) \
for image in entrytable.getiterator('img'))
#ratings as x/5
return 1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues())
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
def get_description(self, entry):
description = self.output_entry(entry.find('./p'),htmlrm="")
@ -221,7 +256,6 @@ class ResultList(list):
self.resplitbr.split(date))
if not len(date):
return None
#TODO: parse all tag if necessary
try:
d = self.redate.sub('', date[0])
if d:
@ -279,9 +313,14 @@ class ResultList(list):
return feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
def populate(self, entries, browser, verbose=False):
for x in entries:
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
'ul': False, 'span': False, 'table': True}
inv_xpath =('descendant-or-self::p[1]',)
#single entry
if len(entries) == 1 and not isinstance(entries[0], str):
try:
entry = self.get_individual_metadata(browser, x, verbose)
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
entry = self.clean_entry(entry)
title = self.get_title(entry)
#ratings: get table for rating then drop
@ -292,28 +331,29 @@ class ResultList(list):
authors = self.get_authors(entry)
except Exception, e:
if verbose:
print 'Failed to get all details for an entry'
print _('Failed to get all details for an entry')
print e
continue
return
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
def populate_single(self, feed, verbose=False):
try:
entry = feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
entry = self.clean_entry(entry)
title = self.get_title(entry)
#ratings: get table for rating then drop
for elt in entry.getiterator('table'):
ratings = self.get_rating(elt, verbose)
elt.getprevious().drop_tree()
elt.drop_tree()
authors = self.get_authors(entry)
except Exception, e:
if verbose:
print 'Failed to get all details for an entry'
print e
return
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
else:
#multiple entries
for x in entries:
try:
entry = self.get_individual_metadata(browser, x, verbose)
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
title = self.get_title(entry)
#ratings: get table for rating then drop
for elt in entry.getiterator('table'):
ratings = self.get_rating(elt, verbose)
elt.getprevious().drop_tree()
elt.drop_tree()
authors = self.get_authors(entry)
except Exception, e:
if verbose:
print _('Failed to get all details for an entry')
print e
continue
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
def search(title=None, author=None, publisher=None, isbn=None,
@ -321,35 +361,32 @@ def search(title=None, author=None, publisher=None, isbn=None,
keywords=None):
br = browser()
entries = Query(title=title, author=author, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose)
keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.)
#List of entry
ans = ResultList()
if len(entries) > 1:
ans.populate(entries, br, verbose)
else:
ans.populate_single(entries[0], verbose)
ans.populate(entries, br, verbose)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(\
'''\
_('''\
%prog [options]
Fetch book metadata from Fictionwise. You must specify one of title, author,
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
'''
''')
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-k', '--keywords', help='Keywords')
parser.add_option('-t', '--title', help=_('Book title'))
parser.add_option('-a', '--author', help=_('Book author(s)'))
parser.add_option('-p', '--publisher', help=_('Book publisher'))
parser.add_option('-k', '--keywords', help=_('Keywords'))
parser.add_option('-m', '--max-results', default=20,
help='Maximum number of results to fetch')
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
@ -362,6 +399,9 @@ def main(args=sys.argv):
report(True)
parser.print_help()
return 1
if results is None or len(results) == 0:
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
print

View File

@ -10,7 +10,8 @@ from copy import deepcopy
from lxml.html import soupparser
from calibre.utils.date import parse_date, utcnow
from calibre.utils.date import parse_date, utcnow, replace_months
from calibre.utils.cleantext import clean_ascii_char
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
traceback.format_exc(), self.name))
class NiceBooksError(Exception):
pass
class ISBNNotFound(NiceBooksError):
pass
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
def replace_monthsfr(datefr):
# Replace french months by english equivalent for parse_date
frtoen = {
u'[jJ]anvier': u'jan',
u'[fF].vrier': u'feb',
u'[mM]ars': u'mar',
u'[aA]vril': u'apr',
u'[mM]ai': u'may',
u'[jJ]uin': u'jun',
u'[jJ]uillet': u'jul',
u'[aA]o.t': u'aug',
u'[sS]eptembre': u'sep',
u'[Oo]ctobre': u'oct',
u'[nN]ovembre': u'nov',
u'[dD].cembre': u'dec' }
for k in frtoen.iterkeys():
tmp = re.sub(k, frtoen[k], datefr)
if tmp <> datefr: break
return tmp
class Query(object):
BASE_URL = 'http://fr.nicebooks.com/'
@ -119,7 +105,7 @@ class Query(object):
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
print 'Query:', self.BASE_URL+self.urldata
print _('Query: %s') % self.BASE_URL+self.urldata
try:
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
@ -128,7 +114,9 @@ class Query(object):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
raise
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '<title>404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
@ -136,7 +124,11 @@ class Query(object):
try:
feed = soupparser.fromstring(raw)
except:
return
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_char(raw))
except:
return None
#nb of page to call
try:
@ -161,7 +153,11 @@ class Query(object):
try:
feed = soupparser.fromstring(raw)
except:
continue
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_char(raw))
except:
continue
pages.append(feed)
results = []
@ -180,14 +176,12 @@ class ResultList(list):
self.reautclean = re.compile(u'\s*\(.*\)\s*')
def get_title(self, entry):
# title = deepcopy(entry.find("div[@id='book-info']"))
title = deepcopy(entry)
title.remove(title.find("dl[@title='Informations sur le livre']"))
title = ' '.join([i.text_content() for i in title.iterchildren()])
return unicode(title.replace('\n', ''))
def get_authors(self, entry):
# author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
author = entry.find("dl[@title='Informations sur le livre']")
authortext = []
for x in author.getiterator('dt'):
@ -223,7 +217,7 @@ class ResultList(list):
d = x.getnext().text_content()
try:
default = utcnow().replace(day=15)
d = replace_monthsfr(d)
d = replace_months(d, 'fr')
d = parse_date(d, assume_utc=True, default=default)
mi.pubdate = d
except:
@ -234,11 +228,6 @@ class ResultList(list):
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
mi.comments = self.get_description(entry, verbose)
# entry = entry.find("dl[@title='Informations sur le livre']")
# mi.publisher = self.get_publisher(entry)
# mi.pubdate = self.get_date(entry, verbose)
# mi.isbn = self.get_ISBN(entry)
# mi.language = self.get_language(entry)
return self.get_book_info(entry, mi, verbose)
def get_individual_metadata(self, browser, linkdata, verbose):
@ -249,7 +238,9 @@ class ResultList(list):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
raise
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise NiceBooksError(_('Nicebooks encountered an error.'))
if '<title>404 - ' in raw:
report(verbose)
return
@ -258,7 +249,11 @@ class ResultList(list):
try:
feed = soupparser.fromstring(raw)
except:
return
try:
#remove ASCII invalid chars
feed = soupparser.fromstring(clean_ascii_char(raw))
except:
return None
# get results
return feed.xpath("//div[@id='container']")[0]
@ -292,13 +287,6 @@ class ResultList(list):
continue
self.append(self.fill_MI(entry, title, authors, verbose))
class NiceBooksError(Exception):
pass
class ISBNNotFound(NiceBooksError):
pass
class Covers(object):
def __init__(self, isbn = None):
@ -329,11 +317,10 @@ class Covers(object):
return cover, ext if ext else 'jpg'
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
raise err
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
if not len(self.urlimg):
if not self.isbnf:
raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None):
br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
keywords=keywords, max_results=max_results)(br, verbose)
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
if entries is None or len(entries) == 0:
return
return None
#List of entry
ans = ResultList()
@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
def option_parser():
parser = OptionParser(textwrap.dedent(\
'''\
_('''\
%prog [options]
Fetch book metadata from Nicebooks. You must specify one of title, author,
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
It can also get covers if the option is activated.
'''
''')
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-k', '--keywords', help='Keywords')
parser.add_option('-t', '--title', help=_('Book title'))
parser.add_option('-a', '--author', help=_('Book author(s)'))
parser.add_option('-p', '--publisher', help=_('Book publisher'))
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
parser.add_option('-k', '--keywords', help=_('Keywords'))
parser.add_option('-c', '--covers', default=0,
help='Covers: 1-Check/ 2-Download')
help=_('Covers: 1-Check/ 2-Download'))
parser.add_option('-p', '--coverspath', default='',
help='Covers files path')
help=_('Covers files path'))
parser.add_option('-m', '--max-results', default=20,
help='Maximum number of results to fetch')
help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
@ -400,15 +387,15 @@ def main(args=sys.argv):
parser.print_help()
return 1
if results is None or len(results) == 0:
print 'No result found for this search!'
print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
covact = int(opts.covers)
if covact == 1:
textcover = 'No cover found!'
textcover = _('No cover found!')
if check_for_cover(result.isbn):
textcover = 'A cover was found for this book'
textcover = _('A cover was found for this book')
print textcover
elif covact == 2:
cover_data, ext = cover_from_isbn(result.isbn)
@ -417,7 +404,7 @@ def main(args=sys.argv):
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
oname = os.path.abspath(cpath+'.'+ext)
open(oname, 'wb').write(cover_data)
print 'Cover saved to file ', oname
print _('Cover saved to file '), oname
print
if __name__ == '__main__':

View File

@ -775,6 +775,7 @@ class Manifest(object):
return u'Item(id=%r, href=%r, media_type=%r)' \
% (self.id, self.href, self.media_type)
# Parsing {{{
def _parse_xml(self, data):
data = xml_to_unicode(data, strip_encoding_pats=True,
assume_utf8=True, resolve_entities=True)[0]
@ -1035,6 +1036,8 @@ class Manifest(object):
data = item.data.cssText
return ('utf-8', data)
# }}}
@dynamic_property
def data(self):
doc = """Provides MIME type sensitive access to the manifest

View File

@ -0,0 +1,130 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import posixpath
from urlparse import urldefrag
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
class RenameFiles(object):
'''
Rename files and adjust all links pointing to them. Note that the spine
and manifest are not touched by this transform.
'''
def __init__(self, rename_map):
self.rename_map = rename_map
def __call__(self, oeb, opts):
self.log = oeb.logger
self.opts = opts
self.oeb = oeb
for item in oeb.manifest.items:
self.current_item = item
if etree.iselement(item.data):
rewrite_links(self.current_item.data, self.url_replacer)
elif hasattr(item.data, 'cssText'):
cssutils.replaceUrls(item.data, self.url_replacer)
if self.oeb.guide:
for ref in self.oeb.guide.values():
href = urlnormalize(ref.href)
href, frag = urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
nhref = replacement
if frag:
nhref += '#' + frag
ref.href = nhref
if self.oeb.toc:
self.fix_toc_entry(self.oeb.toc)
def fix_toc_entry(self, toc):
if toc.href:
href = urlnormalize(toc.href)
href, frag = urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
nhref = replacement
if frag:
nhref = '#'.join((nhref, frag))
toc.href = nhref
for x in toc:
self.fix_toc_entry(x)
def url_replacer(self, orig_url):
url = urlnormalize(orig_url)
path, frag = urldefrag(url)
href = self.current_item.abshref(path)
replacement = self.rename_map.get(href, None)
if replacement is None:
return orig_url
replacement = self.current_item.relhref(replacement)
if frag:
replacement += '#' + frag
return replacement
class UniqueFilenames(object):
'Ensure that every item in the manifest has a unique filename'
def __call__(self, oeb, opts):
self.log = oeb.logger
self.opts = opts
self.oeb = oeb
self.seen_filenames = set([])
self.rename_map = {}
for item in list(oeb.manifest.items):
fname = posixpath.basename(item.href)
if fname in self.seen_filenames:
suffix = self.unique_suffix(fname)
data = item.data
base, ext = posixpath.splitext(item.href)
nhref = base + suffix + ext
nhref = oeb.manifest.generate(href=nhref)[1]
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
fallback=item.fallback)
self.seen_filenames.add(posixpath.basename(nhref))
self.rename_map[item.href] = nhref
if item.spine_position is not None:
oeb.spine.insert(item.spine_position, nitem, item.linear)
oeb.spine.remove(item)
oeb.manifest.remove(item)
else:
self.seen_filenames.add(fname)
if self.rename_map:
self.log('Found non-unique filenames, renaming to support broken'
' EPUB readers like FBReader, Aldiko and Stanza...')
from pprint import pformat
self.log.debug(pformat(self.rename_map))
renamer = RenameFiles(self.rename_map)
renamer(oeb, opts)
def unique_suffix(self, fname):
base, ext = posixpath.splitext(fname)
c = 0
while True:
c += 1
suffix = '_u%d'%c
candidate = base + suffix + ext
if candidate not in self.seen_filenames:
return suffix

View File

@ -179,8 +179,7 @@ class TXTMLizer(object):
text.append(u'\n\n')
if tag in SPACE_TAGS:
if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
text.append(u' ')
text.append(u' ')
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:

View File

@ -16,6 +16,7 @@ from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
from calibre.gui2.actions import InterfaceAction
from calibre.utils.icu import sort_key
class EditMetadataAction(InterfaceAction):
@ -363,8 +364,7 @@ class EditMetadataAction(InterfaceAction):
def edit_device_collections(self, view, oncard=None):
model = view.model()
result = model.get_collections_with_ids()
compare = (lambda x,y:cmp(x.lower(), y.lower()))
d = TagListEditor(self.gui, tag_to_match=None, data=result, compare=compare)
d = TagListEditor(self.gui, tag_to_match=None, data=result, key=sort_key)
d.exec_()
if d.result() == d.Accepted:
to_rename = d.to_rename # dict of new text to old ids

View File

@ -19,6 +19,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
from calibre.constants import preferred_encoding
from calibre.library.comments import comments_to_html
from calibre.gui2 import config, open_local_file
from calibre.utils.icu import sort_key
# render_rows(data) {{{
WEIGHTS = collections.defaultdict(lambda : 100)
@ -31,8 +32,8 @@ WEIGHTS[_('Tags')] = 4
def render_rows(data):
keys = data.keys()
# First sort by name. The WEIGHTS sort will preserve this sub-order
keys.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
keys.sort(cmp=lambda x, y: cmp(WEIGHTS[x], WEIGHTS[y]))
keys.sort(key=sort_key)
keys.sort(key=lambda x: WEIGHTS[x])
rows = []
for key in keys:
txt = data[key]

View File

@ -17,8 +17,6 @@ class PluginWidget(Widget, Ui_Form):
ICON = I('mimetypes/fb2.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters',
'sectionize_chapters_using_file_structure', 'h1_to_title',
'h2_to_title', 'h3_to_title'])
Widget.__init__(self, parent, ['h1_to_title', 'h2_to_title', 'h3_to_title'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="6" column="0">
<item row="3" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -28,41 +28,20 @@
</spacer>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_inline_toc">
<property name="text">
<string>&amp;Inline TOC</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_sectionize_chapters">
<property name="text">
<string>Sectionize Chapters (Use with care!)</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure">
<property name="text">
<string>Sectionize Chapters using file structure</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_h1_to_title">
<property name="text">
<string>Wrap h1 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="4" column="0">
<item row="1" column="0">
<widget class="QCheckBox" name="opt_h2_to_title">
<property name="text">
<string>Wrap h2 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="5" column="0">
<item row="2" column="0">
<widget class="QCheckBox" name="opt_h3_to_title">
<property name="text">
<string>Wrap h3 tags with &lt;title&gt; elements</string>

View File

@ -17,6 +17,7 @@ from calibre.ebooks.metadata import authors_to_string, string_to_authors, \
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ptempfile import PersistentTemporaryFile
from calibre.gui2.convert import Widget
from calibre.utils.icu import sort_key
def create_opf_file(db, book_id):
mi = db.get_metadata(book_id, index_is_id=True)
@ -102,7 +103,7 @@ class MetadataWidget(Widget, Ui_Form):
def initalize_authors(self):
all_authors = self.db.all_authors()
all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_authors.sort(key=lambda x : sort_key(x[1]))
for i in all_authors:
id, name = i
@ -117,7 +118,7 @@ class MetadataWidget(Widget, Ui_Form):
def initialize_series(self):
all_series = self.db.all_series()
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_series.sort(key=lambda x : sort_key(x[1]))
for i in all_series:
id, name = i
@ -126,7 +127,7 @@ class MetadataWidget(Widget, Ui_Form):
def initialize_publisher(self):
all_publishers = self.db.all_publishers()
all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_publishers.sort(key=lambda x : sort_key(x[1]))
for i in all_publishers:
id, name = i

View File

@ -17,6 +17,7 @@ from calibre.utils.date import qt_to_dt, now
from calibre.gui2.widgets import TagsLineEdit, EnComboBox
from calibre.gui2 import UNDEFINED_QDATE, error_dialog
from calibre.utils.config import tweaks
from calibre.utils.icu import sort_key
class Base(object):
@ -207,7 +208,7 @@ class Text(Base):
def setup_ui(self, parent):
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower()))
values.sort(key=sort_key)
if self.col_metadata['is_multiple']:
w = TagsLineEdit(parent, values)
w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
@ -256,7 +257,7 @@ class Series(Base):
def setup_ui(self, parent):
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower()))
values.sort(key=sort_key)
w = EnComboBox(parent)
w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon)
w.setMinimumContentsLength(25)
@ -365,11 +366,10 @@ widgets = {
'enumeration': Enumeration
}
def field_sort(y, z, x=None):
m1, m2 = x[y], x[z]
def field_sort_key(y, x=None):
m1 = x[y]
n1 = 'zzzzz' if m1['datatype'] == 'comments' else m1['name']
n2 = 'zzzzz' if m2['datatype'] == 'comments' else m2['name']
return cmp(n1.lower(), n2.lower())
return sort_key(n1)
def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, parent=None):
def widget_factory(type, col):
@ -381,7 +381,7 @@ def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, pa
return w
x = db.custom_column_num_map
cols = list(x)
cols.sort(cmp=partial(field_sort, x=x))
cols.sort(key=partial(field_sort_key, x=x))
count_non_comment = len([c for c in cols if x[c]['datatype'] != 'comments'])
layout.setColumnStretch(1, 10)
@ -526,7 +526,7 @@ class BulkSeries(BulkBase):
def setup_ui(self, parent):
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower()))
values.sort(key=sort_key)
w = EnComboBox(parent)
w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon)
w.setMinimumContentsLength(25)
@ -678,7 +678,7 @@ class BulkText(BulkBase):
def setup_ui(self, parent):
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower()))
values.sort(key=sort_key)
if self.col_metadata['is_multiple']:
w = TagsLineEdit(parent, values)
w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)

View File

@ -17,6 +17,7 @@ from calibre.gui2 import error_dialog
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic
from calibre.utils.titlecase import titlecase
from calibre.utils.icu import sort_key
class MyBlockingBusy(QDialog):
@ -594,7 +595,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def initalize_authors(self):
all_authors = self.db.all_authors()
all_authors.sort(cmp=lambda x, y : cmp(x[1].lower(), y[1].lower()))
all_authors.sort(key=lambda x : sort_key(x[1]))
for i in all_authors:
id, name = i
@ -604,7 +605,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def initialize_series(self):
all_series = self.db.all_series()
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_series.sort(key=lambda x : sort_key(x[1]))
for i in all_series:
id, name = i
@ -613,7 +614,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def initialize_publisher(self):
all_publishers = self.db.all_publishers()
all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_publishers.sort(key=lambda x : sort_key(x[1]))
for i in all_publishers:
id, name = i

View File

@ -28,6 +28,7 @@ from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import prefs, tweaks
from calibre.utils.date import qt_to_dt, local_tz, utcfromtimestamp
from calibre.utils.icu import sort_key
from calibre.customize.ui import run_plugins_on_import, get_isbndb_key
from calibre.gui2.preferences.social import SocialMetadata
from calibre.gui2.custom_column_widgets import populate_metadata_page
@ -660,7 +661,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
def initalize_authors(self):
all_authors = self.db.all_authors()
all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_authors.sort(key=lambda x : sort_key(x[1]))
for i in all_authors:
id, name = i
name = [name.strip().replace('|', ',') for n in name.split(',')]
@ -675,7 +676,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
def initialize_series(self):
self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
all_series = self.db.all_series()
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_series.sort(key=lambda x : sort_key(x[1]))
series_id = self.db.series_id(self.row)
idx, c = None, 0
for i in all_series:
@ -692,7 +693,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
def initialize_publisher(self):
all_publishers = self.db.all_publishers()
all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1]))
all_publishers.sort(key=lambda x : sort_key(x[1]))
publisher_id = self.db.publisher_id(self.row)
idx, c = None, 0
for i in all_publishers:

View File

@ -8,6 +8,7 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.saved_search_editor_ui import Ui_SavedSearchEditor
from calibre.utils.search_query_parser import saved_searches
from calibre.utils.icu import sort_key
from calibre.gui2.dialogs.confirm_delete import confirm
class SavedSearchEditor(QDialog, Ui_SavedSearchEditor):
@ -34,7 +35,7 @@ class SavedSearchEditor(QDialog, Ui_SavedSearchEditor):
def populate_search_list(self):
self.search_name_box.clear()
for name in sorted(self.searches.keys()):
for name in sorted(self.searches.keys(), key=sort_key):
self.search_name_box.addItem(name)
def add_search(self):

View File

@ -8,6 +8,7 @@ from PyQt4.QtGui import QDialog, QDialogButtonBox
from calibre.gui2.dialogs.search_ui import Ui_Dialog
from calibre.library.caches import CONTAINS_MATCH, EQUALS_MATCH
from calibre.gui2 import gprefs
from calibre.utils.icu import sort_key
box_values = {}
@ -18,8 +19,7 @@ class SearchDialog(QDialog, Ui_Dialog):
self.setupUi(self)
self.mc = ''
searchables = sorted(db.field_metadata.searchable_fields(),
lambda x, y: cmp(x if x[0] != '#' else x[1:],
y if y[0] != '#' else y[1:]))
key=lambda x: sort_key(x if x[0] != '#' else x[1:]))
self.general_combo.addItems(searchables)
self.box_last_values = copy.deepcopy(box_values)

View File

@ -9,6 +9,7 @@ from PyQt4.QtGui import QDialog, QIcon, QListWidgetItem
from calibre.gui2.dialogs.tag_categories_ui import Ui_TagCategories
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.constants import islinux
from calibre.utils.icu import sort_key
class Item:
def __init__(self, name, label, index, icon, exists):
@ -85,7 +86,7 @@ class TagCategories(QDialog, Ui_TagCategories):
# remove any references to a category that no longer exists
del self.categories[cat][item]
self.all_items_sorted = sorted(self.all_items, cmp=lambda x,y: cmp(x.name.lower(), y.name.lower()))
self.all_items_sorted = sorted(self.all_items, key=lambda x: sort_key(x.name))
self.display_filtered_categories(0)
for v in category_names:
@ -135,7 +136,7 @@ class TagCategories(QDialog, Ui_TagCategories):
index = self.all_items[node.data(Qt.UserRole).toPyObject()].index
if index not in self.applied_items:
self.applied_items.append(index)
self.applied_items.sort(cmp=lambda x, y:cmp(self.all_items[x].name.lower(), self.all_items[y].name.lower()))
self.applied_items.sort(key=lambda x:sort_key(self.all_items[x]))
self.display_filtered_categories(None)
def unapply_tags(self, node=None):
@ -198,5 +199,5 @@ class TagCategories(QDialog, Ui_TagCategories):
self.categories[self.current_cat_name] = l
def populate_category_list(self):
for n in sorted(self.categories.keys(), cmp=lambda x,y: cmp(x.lower(), y.lower())):
for n in sorted(self.categories.keys(), key=sort_key):
self.category_box.addItem(n)

View File

@ -6,12 +6,10 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.tag_editor_ui import Ui_TagEditor
from calibre.gui2 import question_dialog, error_dialog
from calibre.constants import islinux
from calibre.utils.icu import sort_key
class TagEditor(QDialog, Ui_TagEditor):
def tag_cmp(self, x, y):
return cmp(x.lower(), y.lower())
def __init__(self, window, db, index=None):
QDialog.__init__(self, window)
Ui_TagEditor.__init__(self)
@ -25,7 +23,7 @@ class TagEditor(QDialog, Ui_TagEditor):
tags = []
if tags:
tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
tags.sort(cmp=self.tag_cmp)
tags.sort(key=sort_key)
for tag in tags:
self.applied_tags.addItem(tag)
else:
@ -35,7 +33,7 @@ class TagEditor(QDialog, Ui_TagEditor):
all_tags = [tag for tag in self.db.all_tags()]
all_tags = list(set(all_tags))
all_tags.sort(cmp=self.tag_cmp)
all_tags.sort(key=sort_key)
for tag in all_tags:
if tag not in tags:
self.available_tags.addItem(tag)
@ -82,7 +80,7 @@ class TagEditor(QDialog, Ui_TagEditor):
self.tags.append(tag)
self.available_tags.takeItem(self.available_tags.row(item))
self.tags.sort(cmp=self.tag_cmp)
self.tags.sort(key=sort_key)
self.applied_tags.clear()
for tag in self.tags:
self.applied_tags.addItem(tag)
@ -96,14 +94,14 @@ class TagEditor(QDialog, Ui_TagEditor):
self.tags.remove(tag)
self.available_tags.addItem(tag)
self.tags.sort(cmp=self.tag_cmp)
self.tags.sort(key=sort_key)
self.applied_tags.clear()
for tag in self.tags:
self.applied_tags.addItem(tag)
items = [unicode(self.available_tags.item(x).text()) for x in
range(self.available_tags.count())]
items.sort(cmp=self.tag_cmp)
items.sort(key=sort_key)
self.available_tags.clear()
for item in items:
self.available_tags.addItem(item)
@ -117,7 +115,7 @@ class TagEditor(QDialog, Ui_TagEditor):
if tag not in self.tags:
self.tags.append(tag)
self.tags.sort(cmp=self.tag_cmp)
self.tags.sort(key=sort_key)
self.applied_tags.clear()
for tag in self.tags:
self.applied_tags.addItem(tag)

View File

@ -39,7 +39,7 @@ class ListWidgetItem(QListWidgetItem):
class TagListEditor(QDialog, Ui_TagListEditor):
def __init__(self, window, tag_to_match, data, compare):
def __init__(self, window, tag_to_match, data, key):
QDialog.__init__(self, window)
Ui_TagListEditor.__init__(self)
self.setupUi(self)
@ -54,7 +54,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
for k,v in data:
self.all_tags[v] = k
for tag in sorted(self.all_tags.keys(), cmp=compare):
for tag in sorted(self.all_tags.keys(), key=key):
item = ListWidgetItem(tag)
item.setData(Qt.UserRole, self.all_tags[tag])
self.available_tags.addItem(item)

View File

@ -13,6 +13,7 @@ from calibre.gui2 import error_dialog, question_dialog, open_url, \
choose_files, ResizableDialog, NONE
from calibre.gui2.widgets import PythonHighlighter
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.icu import sort_key
class CustomRecipeModel(QAbstractListModel):
@ -256,7 +257,7 @@ class %(classname)s(%(base_class)s):
def add_builtin_recipe(self):
from calibre.web.feeds.recipes.collection import \
get_builtin_recipe_by_title, get_builtin_recipe_titles
items = sorted(get_builtin_recipe_titles())
items = sorted(get_builtin_recipe_titles(), key=sort_key)
title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'),

View File

@ -20,6 +20,7 @@ from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
from calibre.utils.date import now, format_date
from calibre.utils.config import tweaks
from calibre.utils.formatter import validation_formatter
from calibre.utils.icu import sort_key
from calibre.gui2.dialogs.comments_dialog import CommentsDialog
class RatingDelegate(QStyledItemDelegate): # {{{
@ -173,7 +174,8 @@ class TagsDelegate(QStyledItemDelegate): # {{{
editor = TagsLineEdit(parent, self.db.all_tags())
else:
editor = TagsLineEdit(parent,
sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col)))))
sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))),
key=sort_key))
return editor
else:
editor = EnLineEdit(parent)
@ -245,7 +247,8 @@ class CcTextDelegate(QStyledItemDelegate): # {{{
editor.setDecimals(2)
else:
editor = EnLineEdit(parent)
complete_items = sorted(list(m.db.all_custom(label=m.db.field_metadata.key_to_label(col))))
complete_items = sorted(list(m.db.all_custom(label=m.db.field_metadata.key_to_label(col))),
key=sort_key)
completer = QCompleter(complete_items, self)
completer.setCaseSensitivity(Qt.CaseInsensitive)
completer.setCompletionMode(QCompleter.PopupCompletion)

View File

@ -18,6 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import tweaks, prefs
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
from calibre.utils.icu import sort_key
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
@ -305,9 +306,10 @@ class BooksModel(QAbstractTableModel): # {{{
cdata = self.cover(idx)
if cdata:
data['cover'] = cdata
tags = self.db.tags(idx)
tags = list(self.db.get_tags(self.db.id(idx)))
if tags:
tags = tags.replace(',', ', ')
tags.sort(key=sort_key)
tags = ', '.join(tags)
else:
tags = _('None')
data[_('Tags')] = tags
@ -544,7 +546,7 @@ class BooksModel(QAbstractTableModel): # {{{
def tags(r, idx=-1):
tags = self.db.data[r][idx]
if tags:
return QVariant(', '.join(sorted(tags.split(','))))
return QVariant(', '.join(sorted(tags.split(','), key=sort_key)))
return None
def series_type(r, idx=-1, siix=-1):
@ -595,7 +597,7 @@ class BooksModel(QAbstractTableModel): # {{{
def text_type(r, mult=False, idx=-1):
text = self.db.data[r][idx]
if text and mult:
return QVariant(', '.join(sorted(text.split('|'))))
return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
return QVariant(text)
def number_type(r, idx=-1):
@ -1033,8 +1035,8 @@ class DeviceBooksModel(BooksModel): # {{{
x, y = int(self.db[x].size), int(self.db[y].size)
return cmp(x, y)
def tagscmp(x, y):
x = ','.join(sorted(getattr(self.db[x], 'device_collections', []))).lower()
y = ','.join(sorted(getattr(self.db[y], 'device_collections', []))).lower()
x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key))
y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key))
return cmp(x, y)
def libcmp(x, y):
x, y = self.db[x].in_library, self.db[y].in_library
@ -1211,7 +1213,7 @@ class DeviceBooksModel(BooksModel): # {{{
elif cname == 'collections':
tags = self.db[self.map[row]].device_collections
if tags:
tags.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
tags.sort(key=sort_key)
return QVariant(', '.join(tags))
elif DEBUG and cname == 'inlibrary':
return QVariant(self.db[self.map[row]].in_library)

View File

@ -19,6 +19,7 @@ from calibre.utils.search_query_parser import saved_searches
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.oeb.iterator import is_supported
from calibre.constants import iswindows
from calibre.utils.icu import sort_key
class ConfigWidget(ConfigWidgetBase, Ui_Form):
@ -45,8 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
choices = [(x.upper(), x) for x in output_formats]
r('output_format', prefs, choices=choices)
restrictions = sorted(saved_searches().names(),
cmp=lambda x,y: cmp(x.lower(), y.lower()))
restrictions = sorted(saved_searches().names(), key=sort_key)
choices = [('', '')] + [(x, x) for x in restrictions]
r('gui_restriction', db.prefs, choices=choices)
r('new_book_tags', prefs, setting=CommaSeparatedList)

View File

@ -152,7 +152,7 @@
<item>
<widget class="QLineEdit" name="composite_box">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
@ -211,27 +211,27 @@
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QLineEdit" name="enum_box">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="toolTip">
<string>A comma-separated list of permitted values. The empty value is always
included, and is the default. For example, the list 'one,two,three' has
four values, the first of them being the empty value.</string>
</property>
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="enum_default_label">
<property name="text">
<string>Default: (nothing)</string>
</property>
<property name="toolTip">
<string>The empty string is always the first value</string>
</property>
<property name="text">
<string>Default: (nothing)</string>
</property>
</widget>
</item>
</layout>

View File

@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
from calibre.gui2.dialogs.search import SearchDialog
from calibre.utils.search_query_parser import saved_searches
from calibre.utils.icu import sort_key
class SearchLineEdit(QLineEdit): # {{{
key_pressed = pyqtSignal(object)
@ -204,7 +205,7 @@ class SearchBox2(QComboBox): # {{{
self.blockSignals(yes)
self.line_edit.blockSignals(yes)
def set_search_string(self, txt, store_in_history=False):
def set_search_string(self, txt, store_in_history=False, emit_changed=True):
self.setFocus(Qt.OtherFocusReason)
if not txt:
self.clear()
@ -212,7 +213,8 @@ class SearchBox2(QComboBox): # {{{
self.normalize_state()
self.setEditText(txt)
self.line_edit.end(False)
self.changed.emit()
if emit_changed:
self.changed.emit()
self._do_search(store_in_history=store_in_history)
self.focus_to_library.emit()
@ -292,7 +294,7 @@ class SavedSearchBox(QComboBox): # {{{
self.search_box.clear()
self.setEditText(qname)
return
self.search_box.set_search_string(u'search:"%s"' % qname)
self.search_box.set_search_string(u'search:"%s"' % qname, emit_changed=False)
self.setEditText(qname)
self.setToolTip(saved_searches().lookup(qname))
@ -417,7 +419,7 @@ class SavedSearchBoxMixin(object): # {{{
b.setStatusTip(b.toolTip())
def saved_searches_changed(self):
p = sorted(saved_searches().names(), cmp=lambda x,y: cmp(x.lower(), y.lower()))
p = sorted(saved_searches().names(), key=sort_key)
t = unicode(self.search_restriction.currentText())
# rebuild the restrictions combobox using current saved searches
self.search_restriction.clear()

View File

@ -14,6 +14,7 @@ from PyQt4.Qt import QAbstractListModel, Qt, QKeySequence, QListView, \
from calibre.gui2 import NONE, error_dialog
from calibre.utils.config import XMLConfig
from calibre.utils.icu import sort_key
from calibre.gui2.shortcuts_ui import Ui_Frame
DEFAULTS = Qt.UserRole
@ -175,8 +176,7 @@ class Shortcuts(QAbstractListModel):
for k, v in shortcuts.items():
self.keys[k] = v[0]
self.order = list(shortcuts)
self.order.sort(cmp=lambda x,y : cmp(self.descriptions[x],
self.descriptions[y]))
self.order.sort(key=lambda x : sort_key(self.descriptions[x]))
self.sequences = {}
for k, v in self.keys.items():
self.sequences[k] = [QKeySequence(x) for x in v]

View File

@ -18,6 +18,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \
from calibre.ebooks.metadata import title_sort
from calibre.gui2 import config, NONE
from calibre.library.field_metadata import TagsIcons, category_icon_map
from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import saved_searches
from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.confirm_delete import confirm
@ -225,7 +226,7 @@ class TagsView(QTreeView): # {{{
partial(self.context_menu_handler, action='hide', category=category))
if self.hidden_categories:
m = self.context_menu.addMenu(_('Show category'))
for col in sorted(self.hidden_categories, cmp=lambda x,y: cmp(x.lower(), y.lower())):
for col in sorted(self.hidden_categories, key=sort_key):
m.addAction(col,
partial(self.context_menu_handler, action='show', category=col))
@ -599,7 +600,8 @@ class TagsModel(QAbstractItemModel): # {{{
# Reconstruct the user categories, putting them into metadata
self.db.field_metadata.remove_dynamic_categories()
tb_cats = self.db.field_metadata
for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys()):
for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys(),
key=sort_key):
cat_name = user_cat+':' # add the ':' to avoid name collision
tb_cats.add_user_category(label=cat_name, name=user_cat)
if len(saved_searches().names()):
@ -878,13 +880,13 @@ class TagBrowserMixin(object): # {{{
db=self.library_view.model().db
if category == 'tags':
result = db.get_tags_with_ids()
compare = (lambda x,y:cmp(x.lower(), y.lower()))
key = sort_key
elif category == 'series':
result = db.get_series_with_ids()
compare = (lambda x,y:cmp(title_sort(x).lower(), title_sort(y).lower()))
key = lambda x:sort_key(title_sort(x))
elif category == 'publisher':
result = db.get_publishers_with_ids()
compare = (lambda x,y:cmp(x.lower(), y.lower()))
key = sort_key
else: # should be a custom field
cc_label = None
if category in db.field_metadata:
@ -892,9 +894,9 @@ class TagBrowserMixin(object): # {{{
result = db.get_custom_items_with_ids(label=cc_label)
else:
result = []
compare = (lambda x,y:cmp(x.lower(), y.lower()))
key = sort_key
d = TagListEditor(self, tag_to_match=tag, data=result, compare=compare)
d = TagListEditor(self, tag_to_match=tag, data=result, key=key)
d.exec_()
if d.result() == d.Accepted:
to_rename = d.to_rename # dict of new text to old id

View File

@ -796,11 +796,13 @@ class SortKey(object):
class SortKeyGenerator(object):
def __init__(self, fields, field_metadata, data):
from calibre.utils.icu import sort_key
self.field_metadata = field_metadata
self.orders = [-1 if x[1] else 1 for x in fields]
self.entries = [(x[0], field_metadata[x[0]]) for x in fields]
self.library_order = tweaks['title_series_sorting'] == 'library_order'
self.data = data
self.string_sort_key = sort_key
def __call__(self, record):
values = tuple(self.itervals(self.data[record]))
@ -821,17 +823,14 @@ class SortKeyGenerator(object):
if val is None:
val = ('', 1)
else:
val = val.lower()
if self.library_order:
val = title_sort(val)
sidx_fm = self.field_metadata[name + '_index']
sidx = record[sidx_fm['rec_index']]
val = (val, sidx)
val = (self.string_sort_key(val), sidx)
elif dt in ('text', 'comments', 'composite', 'enumeration'):
if val is None:
val = ''
val = val.lower()
val = self.string_sort_key(val)
elif dt == 'bool':
val = {True: 1, False: 2, None: 3}.get(val, 3)

View File

@ -14,6 +14,7 @@ from operator import itemgetter
from PyQt4.QtGui import QImage
from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.library.database import LibraryDatabase
@ -33,6 +34,7 @@ from calibre import isbytestring
from calibre.utils.filenames import ascii_filename
from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp
from calibre.utils.config import prefs, tweaks
from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import saved_searches, set_saved_searches
from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format
from calibre.utils.magick.draw import save_cover_data_to
@ -287,7 +289,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# Assumption is that someone else will fix them if they change.
self.field_metadata.remove_dynamic_categories()
tb_cats = self.field_metadata
for user_cat in sorted(self.prefs.get('user_categories', {}).keys()):
for user_cat in sorted(self.prefs.get('user_categories', {}).keys(), key=sort_key):
cat_name = user_cat+':' # add the ':' to avoid name collision
tb_cats.add_user_category(label=cat_name, name=user_cat)
if len(saved_searches().names()):
@ -1065,7 +1067,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if sort == 'popularity':
query += ' ORDER BY count DESC, sort ASC'
elif sort == 'name':
query += ' ORDER BY sort ASC'
query += ' ORDER BY sort COLLATE icucollate'
else:
query += ' ORDER BY avg_rating DESC, sort ASC'
data = self.conn.get(query)
@ -1137,6 +1139,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if sort == 'popularity':
categories['formats'].sort(key=lambda x: x.count, reverse=True)
else: # no ratings exist to sort on
# No need for ICU here.
categories['formats'].sort(key = lambda x:x.name)
#### Now do the user-defined categories. ####
@ -1151,7 +1154,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
for c in categories.keys():
taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
for user_cat in sorted(user_categories.keys()):
for user_cat in sorted(user_categories.keys(), key=sort_key):
items = []
for (name,label,ign) in user_categories[user_cat]:
if label in taglist and name in taglist[label]:
@ -1167,7 +1170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
sorted(items, key=lambda x: x.count, reverse=True)
elif sort == 'name':
categories[cat_name] = \
sorted(items, key=lambda x: x.sort.lower())
sorted(items, key=lambda x: sort_key(x.sort))
else:
categories[cat_name] = \
sorted(items, key=lambda x:x.avg_rating, reverse=True)

View File

@ -16,6 +16,7 @@ from calibre import isbytestring, force_unicode, fit_image, \
from calibre.utils.ordered_dict import OrderedDict
from calibre.utils.filenames import ascii_filename
from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
from calibre.utils.magick import Image
from calibre.library.comments import comments_to_html
from calibre.library.server import custom_fields_to_display
@ -273,7 +274,7 @@ class BrowseServer(object):
opts = ['<option %svalue="%s">%s</option>' % (
'selected="selected" ' if k==sort else '',
xml(k), xml(n), ) for k, n in
sorted(sort_opts, key=operator.itemgetter(1)) if k and n]
sorted(sort_opts, key=lambda x: sort_key(operator.itemgetter(1)(x))) if k and n]
ans = ans.replace('{sort_select_options}', ('\n'+' '*20).join(opts))
lp = self.db.library_path
if isbytestring(lp):
@ -337,8 +338,7 @@ class BrowseServer(object):
return category_meta[x]['name'].lower()
displayed_custom_fields = custom_fields_to_display(self.db)
for category in sorted(categories,
cmp=lambda x,y: cmp(getter(x), getter(y))):
for category in sorted(categories, key=lambda x: sort_key(getter(x))):
if len(categories[category]) == 0:
continue
if category == 'formats':
@ -375,12 +375,7 @@ class BrowseServer(object):
def browse_sort_categories(self, items, sort):
if sort not in ('rating', 'name', 'popularity'):
sort = 'name'
def sorter(x):
ans = getattr(x, 'sort', x.name)
if hasattr(ans, 'upper'):
ans = ans.upper()
return ans
items.sort(key=sorter)
items.sort(key=lambda x: sort_key(getattr(x, 'sort', x.name)))
if sort == 'popularity':
items.sort(key=operator.attrgetter('count'), reverse=True)
elif sort == 'rating':
@ -703,7 +698,7 @@ class BrowseServer(object):
args[field]
fields.append((m['name'], r))
fields.sort(key=lambda x: x[0].lower())
fields.sort(key=lambda x: sort_key(x[0]))
fields = [u'<div class="field">{0}</div>'.format(f[1]) for f in
fields]
fields = u'<div class="fields">%s</div>'%('\n\n'.join(fields))

View File

@ -21,6 +21,7 @@ from calibre.constants import __appname__
from calibre import human_readable, isbytestring
from calibre.utils.date import utcfromtimestamp
from calibre.utils.filenames import ascii_filename
from calibre.utils.icu import sort_key
def CLASS(*args, **kwargs): # class is a reserved word in Python
kwargs['class'] = ' '.join(args)
@ -211,8 +212,7 @@ class MobileServer(object):
CFM = self.db.field_metadata
CKEYS = [key for key in sorted(custom_fields_to_display(self.db),
cmp=lambda x,y: cmp(CFM[x]['name'].lower(),
CFM[y]['name'].lower()))]
key=lambda x:sort_key(CFM[x]['name']))]
# This method uses its own book dict, not the Metadata dict. The loop
# below could be changed to use db.get_metadata instead of reading
# info directly from the record made by the view, but it doesn't seem

View File

@ -20,6 +20,7 @@ from calibre.library.comments import comments_to_html
from calibre.library.server import custom_fields_to_display
from calibre.library.server.utils import format_tag_string, Offsets
from calibre import guess_type
from calibre.utils.icu import sort_key
from calibre.utils.ordered_dict import OrderedDict
BASE_HREFS = {
@ -279,8 +280,7 @@ class AcquisitionFeed(NavFeed):
NavFeed.__init__(self, id_, updated, version, offsets, page_url, up_url)
CFM = db.field_metadata
CKEYS = [key for key in sorted(custom_fields_to_display(db),
cmp=lambda x,y: cmp(CFM[x]['name'].lower(),
CFM[y]['name'].lower()))]
key=lambda x: sort_key(CFM[x]['name']))]
for item in items:
self.root.append(ACQUISITION_ENTRY(item, version, db, updated,
CFM, CKEYS, prefix))
@ -492,7 +492,7 @@ class OPDSServer(object):
val = 'A'
starts.add(val[0].upper())
category_groups = OrderedDict()
for x in sorted(starts, cmp=lambda x,y:cmp(x.lower(), y.lower())):
for x in sorted(starts, key=sort_key):
category_groups[x] = len([y for y in items if
getattr(y, 'sort', y.name).startswith(x)])
items = [Group(x, y) for x, y in category_groups.items()]
@ -571,8 +571,7 @@ class OPDSServer(object):
]
def getter(x):
return category_meta[x]['name'].lower()
for category in sorted(categories,
cmp=lambda x,y: cmp(getter(x), getter(y))):
for category in sorted(categories, key=lambda x: sort_key(getter(x))):
if len(categories[category]) == 0:
continue
if category == 'formats':

View File

@ -13,6 +13,7 @@ import cherrypy
from calibre import strftime as _strftime, prints, isbytestring
from calibre.utils.date import now as nowf
from calibre.utils.config import tweaks
from calibre.utils.icu import sort_key
class Offsets(object):
'Calculate offsets for a paginated view'
@ -73,7 +74,7 @@ def format_tag_string(tags, sep, ignore_max=False, no_tag_count=False):
tlist = [t.strip() for t in tags.split(sep)]
else:
tlist = []
tlist.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
tlist.sort(key=sort_key)
if len(tlist) > MAX:
tlist = tlist[:MAX]+['...']
if no_tag_count:

View File

@ -17,6 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx
from calibre.constants import preferred_encoding
from calibre import isbytestring
from calibre.utils.filenames import ascii_filename
from calibre.utils.icu import sort_key
E = ElementMaker()
@ -101,8 +102,7 @@ class XMLServer(object):
CFM = self.db.field_metadata
CKEYS = [key for key in sorted(custom_fields_to_display(self.db),
cmp=lambda x,y: cmp(CFM[x]['name'].lower(),
CFM[y]['name'].lower()))]
key=lambda x: sort_key(CFM[x]['name']))]
custcols = []
for key in CKEYS:
def concat(name, val):

View File

@ -115,6 +115,9 @@ def pynocase(one, two, encoding='utf-8'):
pass
return cmp(one.lower(), two.lower())
def icu_collator(s1, s2, func=None):
return cmp(func(unicode(s1)), func(unicode(s2)))
def load_c_extensions(conn, debug=DEBUG):
try:
conn.enable_load_extension(True)
@ -166,6 +169,8 @@ class DBThread(Thread):
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
# Dummy functions for dynamically created filters
self.conn.create_function('books_list_filter', 1, lambda x: 1)
from calibre.utils.icu import sort_key
self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key))
def run(self):
try:

421
src/calibre/utils/icu.c Normal file
View File

@ -0,0 +1,421 @@
#define UNICODE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <unicode/utypes.h>
#include <unicode/uclean.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
// Collator object definition {{{
typedef struct {
PyObject_HEAD
// Type-specific fields go here.
UCollator *collator;
} icu_Collator;
static void
icu_Collator_dealloc(icu_Collator* self)
{
if (self->collator != NULL) ucol_close(self->collator);
self->collator = NULL;
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
icu_Collator *self;
const char *loc;
UErrorCode status = U_ZERO_ERROR;
if (!PyArg_ParseTuple(args, "s", &loc)) return NULL;
self = (icu_Collator *)type->tp_alloc(type, 0);
if (self != NULL) {
self->collator = ucol_open(loc, &status);
if (self->collator == NULL || U_FAILURE(status)) {
PyErr_SetString(PyExc_Exception, "Failed to create collator.");
self->collator = NULL;
Py_DECREF(self);
return NULL;
}
}
return (PyObject *)self;
}
// Collator.display_name {{{
static PyObject *
icu_Collator_display_name(icu_Collator *self, void *closure) {
const char *loc = NULL;
UErrorCode status = U_ZERO_ERROR;
UChar dname[400];
char buf[100];
loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
if (loc == NULL || U_FAILURE(status)) {
PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
}
ucol_getDisplayName(loc, "en", dname, 100, &status);
if (U_FAILURE(status)) return PyErr_NoMemory();
u_strToUTF8(buf, 100, NULL, dname, -1, &status);
if (U_FAILURE(status)) {
PyErr_SetString(PyExc_Exception, "Failed ot convert dname to UTF-8"); return NULL;
}
return Py_BuildValue("s", buf);
}
// }}}
// Collator.actual_locale {{{
static PyObject *
icu_Collator_actual_locale(icu_Collator *self, void *closure) {
const char *loc = NULL;
UErrorCode status = U_ZERO_ERROR;
loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
if (loc == NULL || U_FAILURE(status)) {
PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
}
return Py_BuildValue("s", loc);
}
// }}}
// Collator.sort_key {{{
static PyObject *
icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
char *input;
Py_ssize_t sz;
UChar *buf;
uint8_t *buf2;
PyObject *ans;
int32_t key_size;
UErrorCode status = U_ZERO_ERROR;
if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL;
sz = strlen(input);
buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
if (buf == NULL) return PyErr_NoMemory();
u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status);
PyMem_Free(input);
if (U_SUCCESS(status)) {
buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t));
if (buf2 == NULL) return PyErr_NoMemory();
key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1);
if (key_size == 0) {
ans = PyBytes_FromString("");
} else {
if (key_size >= 7*sz+1) {
free(buf2);
buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t));
if (buf2 == NULL) return PyErr_NoMemory();
ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1);
}
ans = PyBytes_FromString((char *)buf2);
}
free(buf2);
} else ans = PyBytes_FromString("");
free(buf);
if (ans == NULL) return PyErr_NoMemory();
return ans;
} // }}}
// Collator.strcmp {{{
static PyObject *
icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) {
char *a_, *b_;
size_t asz, bsz;
UChar *a, *b;
UErrorCode status = U_ZERO_ERROR;
UCollationResult res = UCOL_EQUAL;
if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL;
asz = strlen(a_); bsz = strlen(b_);
a = (UChar*)calloc(asz*4 + 1, sizeof(UChar));
b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar));
if (a == NULL || b == NULL) return PyErr_NoMemory();
u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status);
u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status);
PyMem_Free(a_); PyMem_Free(b_);
if (U_SUCCESS(status))
res = ucol_strcoll(self->collator, a, -1, b, -1);
free(a); free(b);
return Py_BuildValue("i", res);
} // }}}
static PyMethodDef icu_Collator_methods[] = {
{"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
"sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
},
{"strcmp", (PyCFunction)icu_Collator_strcmp, METH_VARARGS,
"strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster."
},
{NULL} /* Sentinel */
};
static PyGetSetDef icu_Collator_getsetters[] = {
{(char *)"actual_locale",
(getter)icu_Collator_actual_locale, NULL,
(char *)"Actual locale used by this collator.",
NULL},
{(char *)"display_name",
(getter)icu_Collator_display_name, NULL,
(char *)"Display name of this collator in English. The name reflects the actual data source used.",
NULL},
{NULL} /* Sentinel */
};
static PyTypeObject icu_CollatorType = { // {{{
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"icu.Collator", /*tp_name*/
sizeof(icu_Collator), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)icu_Collator_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Collator", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
icu_Collator_methods, /* tp_methods */
0, /* tp_members */
icu_Collator_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
icu_Collator_new, /* tp_new */
}; // }}}
// }}
// }}}
// }}}
// Module initialization {{{
// upper {{{
static PyObject *
icu_upper(PyObject *self, PyObject *args) {
char *input, *ans, *buf3 = NULL;
const char *loc;
size_t sz;
UChar *buf, *buf2;
PyObject *ret;
UErrorCode status = U_ZERO_ERROR;
if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL;
sz = strlen(input);
buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar));
if (buf == NULL || buf2 == NULL) return PyErr_NoMemory();
u_strFromUTF8(buf, sz*4, NULL, input, sz, &status);
u_strToUpper(buf2, sz*8, buf, -1, loc, &status);
ans = input;
sz = u_strlen(buf2);
free(buf);
if (U_SUCCESS(status) && sz > 0) {
buf3 = (char*)calloc(sz*5+1, sizeof(char));
if (buf3 == NULL) return PyErr_NoMemory();
u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status);
if (U_SUCCESS(status)) ans = buf3;
}
ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace");
if (ret == NULL) return PyErr_NoMemory();
free(buf2);
if (buf3 != NULL) free(buf3);
PyMem_Free(input);
return ret;
}
// lower {{{
static PyObject *
icu_lower(PyObject *self, PyObject *args) {
char *input, *ans, *buf3 = NULL;
const char *loc;
size_t sz;
UChar *buf, *buf2;
PyObject *ret;
UErrorCode status = U_ZERO_ERROR;
if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL;
sz = strlen(input);
buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar));
if (buf == NULL || buf2 == NULL) return PyErr_NoMemory();
u_strFromUTF8(buf, sz*4, NULL, input, sz, &status);
u_strToLower(buf2, sz*8, buf, -1, loc, &status);
ans = input;
sz = u_strlen(buf2);
free(buf);
if (U_SUCCESS(status) && sz > 0) {
buf3 = (char*)calloc(sz*5+1, sizeof(char));
if (buf3 == NULL) return PyErr_NoMemory();
u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status);
if (U_SUCCESS(status)) ans = buf3;
}
ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace");
if (ret == NULL) return PyErr_NoMemory();
free(buf2);
if (buf3 != NULL) free(buf3);
PyMem_Free(input);
return ret;
}
// title {{{
static PyObject *
icu_title(PyObject *self, PyObject *args) {
char *input, *ans, *buf3 = NULL;
const char *loc;
size_t sz;
UChar *buf, *buf2;
PyObject *ret;
UErrorCode status = U_ZERO_ERROR;
if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL;
sz = strlen(input);
buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar));
if (buf == NULL || buf2 == NULL) return PyErr_NoMemory();
u_strFromUTF8(buf, sz*4, NULL, input, sz, &status);
u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status);
ans = input;
sz = u_strlen(buf2);
free(buf);
if (U_SUCCESS(status) && sz > 0) {
buf3 = (char*)calloc(sz*5+1, sizeof(char));
if (buf3 == NULL) return PyErr_NoMemory();
u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status);
if (U_SUCCESS(status)) ans = buf3;
}
ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace");
if (ret == NULL) return PyErr_NoMemory();
free(buf2);
if (buf3 != NULL) free(buf3);
PyMem_Free(input);
return ret;
}
static PyMethodDef icu_methods[] = {
{"upper", icu_upper, METH_VARARGS,
"upper(locale, unicode object) -> upper cased unicode object using locale rules."
},
{"lower", icu_lower, METH_VARARGS,
"lower(locale, unicode object) -> lower cased unicode object using locale rules."
},
{"title", icu_title, METH_VARARGS,
"title(locale, unicode object) -> Title cased unicode object using locale rules."
},
{NULL} /* Sentinel */
};
PyMODINIT_FUNC
initicu(void)
{
PyObject* m;
UErrorCode status = U_ZERO_ERROR;
u_init(&status);
if (PyType_Ready(&icu_CollatorType) < 0)
return;
m = Py_InitModule3("icu", icu_methods,
"Wrapper for the ICU internationalization library");
Py_INCREF(&icu_CollatorType);
PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType);
// uint8_t must be the same size as char
PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0);
}
// }}}

229
src/calibre/utils/icu.py Normal file
View File

@ -0,0 +1,229 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# Setup code {{{
from functools import partial
from calibre.constants import plugins
from calibre.utils.config import tweaks
_icu = _collator = None
_locale = None
_none = u''
_none2 = b''
def get_locale():
global _locale
if _locale is None:
from calibre.utils.localization import get_lang
if tweaks['locale_for_sorting']:
_locale = tweaks['locale_for_sorting']
else:
_locale = get_lang()
return _locale
def load_icu():
global _icu
if _icu is None:
_icu = plugins['icu'][0]
if _icu is None:
print plugins['icu'][1]
else:
if not _icu.ok:
print 'icu not ok'
_icu = None
return _icu
def load_collator():
global _collator
if _collator is None:
icu = load_icu()
if icu is not None:
_collator = icu.Collator(get_locale())
return _collator
def py_sort_key(obj):
if not obj:
return _none
return obj.lower()
def icu_sort_key(collator, obj):
if not obj:
return _none2
return collator.sort_key(obj.lower())
def py_case_sensitive_sort_key(obj):
if not obj:
return _none
return obj
def icu_case_sensitive_sort_key(collator, obj):
if not obj:
return _none2
return collator.sort_key(obj)
def icu_strcmp(collator, a, b):
return collator.strcmp(a.lower(), b.lower())
def py_strcmp(a, b):
return cmp(a.lower(), b.lower())
def icu_case_sensitive_strcmp(collator, a, b):
return collator.strcmp(a, b)
load_icu()
load_collator()
_icu_not_ok = _icu is None or _collator is None
# }}}
################# The string functions ########################################
sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator)
strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator)
case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \
icu_case_sensitive_sort_key
case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp
upper = (lambda s: s.upper()) if _icu_not_ok else \
partial(_icu.upper, get_locale())
lower = (lambda s: s.lower()) if _icu_not_ok else \
partial(_icu.lower, get_locale())
title_case = (lambda s: s.title()) if _icu_not_ok else \
partial(_icu.title, get_locale())
################################################################################
def test(): # {{{
# Data {{{
german = '''
Sonntag
Montag
Dienstag
Januar
Februar
März
Fuße
Fluße
Flusse
flusse
fluße
flüße
flüsse
'''
german_good = '''
Dienstag
Februar
flusse
Flusse
fluße
Fluße
flüsse
flüße
Fuße
Januar
März
Montag
Sonntag'''
french = '''
dimanche
lundi
mardi
janvier
février
mars
déjà
Meme
deja
même
dejà
bpef
bœg
Boef
Mémé
bœf
boef
bnef
pêche
pèché
pêché
pêche
pêché'''
french_good = '''
bnef
boef
Boef
bœf
bœg
bpef
deja
dejà
déjà
dimanche
février
janvier
lundi
mardi
mars
Meme
Mémé
même
pèché
pêche
pêche
pêché
pêché'''
# }}}
def create(l):
l = l.decode('utf-8').splitlines()
return [x.strip() for x in l if x.strip()]
def test_strcmp(entries):
for x in entries:
for y in entries:
if strcmp(x, y) != cmp(sort_key(x), sort_key(y)):
print 'strcmp failed for %r, %r'%(x, y)
german = create(german)
c = _icu.Collator('de')
print 'Sorted german:: (%s)'%c.actual_locale
gs = list(sorted(german, key=c.sort_key))
for x in gs:
print '\t', x.encode('utf-8')
if gs != create(german_good):
print 'German failed'
return
print
french = create(french)
c = _icu.Collator('fr')
print 'Sorted french:: (%s)'%c.actual_locale
fs = list(sorted(french, key=c.sort_key))
for x in fs:
print '\t', x.encode('utf-8')
if fs != create(french_good):
print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)'
return
test_strcmp(german + french)
print '\nTesting case transforms in current locale'
for x in ('a', 'Alice\'s code'):
print 'Upper:', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8')
print 'Lower:', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8')
print 'Title:', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8')
print
# }}}

View File

@ -414,7 +414,7 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {
// }}}
// Image.load {{{
// Image.open {{{
static PyObject *
magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
const char *data;

View File

@ -22,6 +22,7 @@ from calibre.utils.pyparsing import CaselessKeyword, Group, Forward, \
CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral, \
Optional, NoMatch, ParseException, QuotedString
from calibre.constants import preferred_encoding
from calibre.utils.icu import sort_key
@ -65,8 +66,7 @@ class SavedSearchQueries(object):
self.db.prefs[self.opt_name] = self.queries
def names(self):
return sorted(self.queries.keys(),
cmp=lambda x,y: cmp(x.lower(), y.lower()))
return sorted(self.queries.keys(),key=sort_key)
'''
Create a global instance of the saved searches. It is global so that the searches

View File

@ -115,14 +115,14 @@ class FeedTemplate(Template):
hr.tail = '| '
if f+1 < len(feeds):
link = A('Next section', href='../feed_%d/index.html'%(f+1))
link = A(_('Next section'), href='../feed_%d/index.html'%(f+1))
link.tail = ' | '
navbar.append(link)
link = A('Main menu', href="../index.html")
link = A(_('Main menu'), href="../index.html")
link.tail = ' | '
navbar.append(link)
if f > 0:
link = A('Previous section', href='../feed_%d/index.html'%(f-1))
link = A(_('Previous section'), href='../feed_%d/index.html'%(f-1))
link.tail = ' |'
navbar.append(link)
if top:
@ -208,17 +208,17 @@ class NavBarTemplate(Template):
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
href = '%s%s/%s/index.html'%(prefix, up, next)
navbar.text = '| '
navbar.append(A('Next', href=href))
navbar.append(A(_('Next'), href=href))
href = '%s../index.html#article_%d'%(prefix, art)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Section Menu', href=href))
navbar.append(A(_('Section Menu'), href=href))
href = '%s../../index.html#feed_%d'%(prefix, feed)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Main Menu', href=href))
navbar.append(A(_('Main Menu'), href=href))
if art > 0 and not bottom:
href = '%s../article_%d/index.html'%(prefix, art-1)
navbar.iterchildren(reversed=True).next().tail = ' | '
navbar.append(A('Previous', href=href))
navbar.append(A(_('Previous'), href=href))
navbar.iterchildren(reversed=True).next().tail = ' | '
if not bottom:
navbar.append(HR())
@ -300,7 +300,7 @@ class TouchscreenFeedTemplate(Template):
navbar_tr.append(TD(CLASS('feed_prev'),link))
# Up to Sections
link = A('Sections', href="../index.html")
link = A(_('Sections'), href="../index.html")
navbar_tr.append(TD(CLASS('feed_up'),link))
# Next Section
@ -381,16 +381,16 @@ class TouchscreenNavBarTemplate(Template):
# | Previous
if art > 0:
link = A(CLASS('article_link'),'Previous',href='%s../article_%d/index.html'%(prefix, art-1))
link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1))
navbar_tr.append(TD(CLASS('article_prev'),link))
else:
navbar_tr.append(TD(CLASS('article_prev'),''))
# | Articles | Sections |
link = A(CLASS('articles_link'),'Articles', href='%s../index.html#article_%d'%(prefix, art))
link = A(CLASS('articles_link'),_('Articles'), href='%s../index.html#article_%d'%(prefix, art))
navbar_tr.append(TD(CLASS('article_articles_list'),link))
link = A(CLASS('sections_link'),'Sections', href='%s../../index.html#feed_%d'%(prefix, feed))
link = A(CLASS('sections_link'),_('Sections'), href='%s../../index.html#feed_%d'%(prefix, feed))
navbar_tr.append(TD(CLASS('article_sections_list'),link))
# | Next
@ -398,7 +398,7 @@ class TouchscreenNavBarTemplate(Template):
else 'article_%d'%(art+1)
up = '../..' if art == number_of_articles_in_feed - 1 else '..'
link = A(CLASS('article_link'),'Next', href='%s%s/%s/index.html'%(prefix, up, next))
link = A(CLASS('article_link'), _('Next'), href='%s%s/%s/index.html'%(prefix, up, next))
navbar_tr.append(TD(CLASS('article_next'),link))
navbar_t.append(navbar_tr)
navbar.append(navbar_t)