diff --git a/setup/extensions.py b/setup/extensions.py index cfbb148873..9a8a6e20a6 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -19,6 +19,7 @@ from setup.build_environment import (chmlib_inc_dirs, magick_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, icu_lib_dirs, win_ddk_lib_dirs, ft_libs, ft_lib_dirs, ft_inc_dirs, zlib_libs, zlib_lib_dirs, zlib_inc_dirs) +from setup.sfntly import SfntlyBuilderMixin MT isunix = islinux or isosx or isbsd @@ -48,6 +49,9 @@ class Extension(object): self.optional = kwargs.get('optional', False) self.needs_ddk = kwargs.get('needs_ddk', False) + def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): + pass + reflow_sources = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.cpp')) reflow_headers = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.h')) @@ -59,9 +63,26 @@ if isosx: icu_libs = ['icucore'] icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib +class SfntlyExtension(Extension, SfntlyBuilderMixin): + + def __init__(self, *args, **kwargs): + Extension.__init__(self, *args, **kwargs) + SfntlyBuilderMixin.__init__(self) + + def preflight(self, *args, **kwargs): + self(*args, **kwargs) extensions = [ + SfntlyExtension('sfntly', + ['calibre/utils/fonts/sfntly.cpp'], + headers= ['calibre/utils/fonts/sfntly.h'], + libraries=icu_libs, + lib_dirs=icu_lib_dirs, + inc_dirs=icu_inc_dirs, + cflags=icu_cflags + ), + Extension('speedup', ['calibre/utils/speedup.c'], ), @@ -363,8 +384,9 @@ class Build(Command): compiler = cxx if ext.needs_cxx else cc linker = msvc.linker if iswindows else compiler objects = [] - einc = self.inc_dirs_to_cflags(ext.inc_dirs) obj_dir = self.j(self.obj_dir, ext.name) + ext.preflight(obj_dir, compiler, linker, self, cflags, ldflags) + einc = self.inc_dirs_to_cflags(ext.inc_dirs) if ext.needs_ddk: ddk_flags = ['-I'+x for x in win_ddk] cflags.extend(ddk_flags) @@ -385,7 +407,7 @@ class Build(Command): dest = self.dest(ext) elib = self.lib_dirs_to_ldflags(ext.lib_dirs) xlib = self.libraries_to_ldflags(ext.libraries) - if self.newer(dest, objects): + if self.newer(dest, objects+ext.extra_objs): print 'Linking', ext.name cmd = [linker] if iswindows: diff --git a/setup/sfntly.py b/setup/sfntly.py new file mode 100644 index 0000000000..60d7808d5f --- /dev/null +++ b/setup/sfntly.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import shlex, os +from glob import glob + +from setup import iswindows + +class Group(object): + + def __init__(self, name, base, build_base, cflags): + self.name = name + self.cflags = cflags + self.headers = frozenset(glob(os.path.join(base, '*.h'))) + self.src_files = glob(os.path.join(base, '*.cc')) + self.bdir = os.path.abspath(os.path.join(build_base, name)) + if not os.path.exists(self.bdir): + os.makedirs(self.bdir) + self.objects = [os.path.join(self.bdir, + os.path.basename(x).rpartition('.')[0] + ('.obj' if iswindows else + '.o')) for x in self.src_files] + + def __call__(self, compiler, linker, builder, all_headers): + for src, obj in zip(self.src_files, self.objects): + if builder.newer(obj, [src] + list(all_headers)): + sinc = ['/Tp'+src] if iswindows else ['-c', src] + oinc = ['/Fo'+obj] if iswindows else ['-o', obj] + cmd = [compiler] + self.cflags + sinc + oinc + builder.info(' '.join(cmd)) + builder.check_call(cmd) + +class SfntlyBuilderMixin(object): + + def __init__(self): + self.sfntly_cflags = [ + '-DSFNTLY_NO_EXCEPTION', + '-DSFNTLY_EXPERIMENTAL', + ] + if iswindows: + self.sfntly_cflags += [ + '-D_UNICODE', '-DUNICODE', + ] + shlex.split('/Zi /nologo /W4 /WX /O2 /Ob2 /Oy /GF /Gm- /MT /GS /Gy ' + '/fp:precise /Zc:wchar_t /Zc:forScope /GR-') + else: + self.sfntly_cflags += [ + '-Werror', + '-fno-exceptions', + ] + if len(self.libraries) > 1: + self.libraries = ['icuuc'] + if not iswindows: + self.libraries += ['pthread'] + + def __call__(self, obj_dir, compiler, linker, builder, cflags, ldflags): + self.sfntly_build_dir = os.path.join(obj_dir, 'sfntly') + + groups = [] + all_headers = set() + all_objects = [] + src_dir = self.absolutize([os.path.join('sfntly', 'src')])[0] + inc_dirs = [src_dir] + self.inc_dirs += inc_dirs + inc_flags = builder.inc_dirs_to_cflags(inc_dirs) + for loc in ('', 'port', 'data', 'math', 'table', 'table/bitmap', + 'table/core', 'table/truetype'): + path = os.path.join(src_dir, 'sfntly', *loc.split('/')) + gr = Group(loc, path, self.sfntly_build_dir, cflags+ + inc_flags+self.sfntly_cflags+self.cflags) + groups.append(gr) + all_headers |= gr.headers + all_objects.extend(gr.objects) + + for group in groups: + group(compiler, linker, builder, all_headers) + + self.extra_objs = all_objects + + diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 3c89db2d1a..953749c92b 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -90,6 +90,7 @@ class Plugins(collections.Mapping): 'speedup', 'freetype', 'woff', + 'sfntly', ] if iswindows: plugins.extend(['winutil', 'wpd', 'winfonts']) diff --git a/src/calibre/debug.py b/src/calibre/debug.py index 22871cab9e..f7fd6f2d72 100644 --- a/src/calibre/debug.py +++ b/src/calibre/debug.py @@ -19,6 +19,8 @@ Run an embedded python interpreter. ''') parser.add_option('-c', '--command', help='Run python code.', default=None) parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.') + parser.add_option('-f', '--subset-font', default=False, + action='store_true', help='Subset the specified font') parser.add_option('-d', '--debug-device-driver', default=False, action='store_true', help='Debug the specified device driver.') parser.add_option('-g', '--gui', default=False, action='store_true', @@ -209,6 +211,11 @@ def main(args=sys.argv): execfile(ef, g) return + if len(args) > 1 and args[1] in ('-f', '--subset-font'): + from calibre.utils.fonts.subset import main + main(['subset-font']+args[2:]) + return + opts, args = option_parser().parse_args(args) if opts.gui: from calibre.gui2.main import main diff --git a/src/calibre/utils/fonts/sfntly.cpp b/src/calibre/utils/fonts/sfntly.cpp new file mode 100644 index 0000000000..f156611909 --- /dev/null +++ b/src/calibre/utils/fonts/sfntly.cpp @@ -0,0 +1,606 @@ +/* + * sfntly.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#define _UNICODE +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include +#include "sfntly.h" + +#include + +#include +#include + +static PyObject *Error = NULL; +static PyObject *NoGlyphs = NULL; + +// Predicates {{{ +CompositePredicate::CompositePredicate(IntegerSet &chars, IntegerList &ranges) : + chars(chars), ranges(ranges) {} + +CompositePredicate::~CompositePredicate() {} + +bool CompositePredicate::operator()(int32_t character) const { + for (size_t i = 0; i < ranges.size()/2; i++) { + if (ranges[2*i] <= character && character <= ranges[2*i+1]) return true; + } + return chars.count(character) > 0; +} + +// }}} + +// Font Info {{{ + +GlyphId::GlyphId(int32_t glyph_id, FontId font_id) : glyph_id_(glyph_id), font_id_(font_id) {} + +GlyphId::~GlyphId() {} + +bool GlyphId::operator==(const GlyphId& other) const { return glyph_id_ == other.glyph_id(); } + +bool GlyphId::operator<(const GlyphId& other) const { return glyph_id_ < other.glyph_id(); } + +int32_t GlyphId::glyph_id() const { return glyph_id_; } + +void GlyphId::set_glyph_id(const int32_t glyph_id) { glyph_id_ = glyph_id; } + +FontId GlyphId::font_id() const { return font_id_; } + +void GlyphId::set_font_id(const FontId font_id) { font_id_ = font_id; } + +FontInfo::FontInfo() : chars_to_glyph_ids_(new CharacterMap), + resolved_glyph_ids_(new GlyphIdSet), fonts_(new FontIdMap) { } + +FontInfo::FontInfo(CharacterMap* chars_to_glyph_ids, + GlyphIdSet* resolved_glyph_ids, + FontIdMap* fonts) { + chars_to_glyph_ids_ = new CharacterMap(chars_to_glyph_ids->begin(), + chars_to_glyph_ids->end()); + resolved_glyph_ids_ = new GlyphIdSet(resolved_glyph_ids->begin(), + resolved_glyph_ids->end()); + fonts_ = new FontIdMap(fonts->begin(), fonts->end()); +} + +FontInfo::~FontInfo() { + delete chars_to_glyph_ids_; + delete resolved_glyph_ids_; + delete fonts_; +} + +FontDataTable* FontInfo::GetTable(FontId font_id, int32_t tag) { + if (!fonts_) + return NULL; + FontIdMap::iterator it = fonts_->find(font_id); + if (it == fonts_->end()) + return NULL; + return it->second->GetTable(tag); +} + +const TableMap* FontInfo::GetTableMap(FontId font_id) { + if (!fonts_) + return NULL; + FontIdMap::iterator it = fonts_->find(font_id); + if (it == fonts_->end()) + return NULL; + return it->second->GetTableMap(); +} + +CharacterMap* FontInfo::chars_to_glyph_ids() const { return chars_to_glyph_ids_; } + +void FontInfo::set_chars_to_glyph_ids(CharacterMap* chars_to_glyph_ids) { *chars_to_glyph_ids_ = *chars_to_glyph_ids; } + +GlyphIdSet* FontInfo::resolved_glyph_ids() const { return resolved_glyph_ids_; } + +void FontInfo::set_resolved_glyph_ids(GlyphIdSet* resolved_glyph_ids) { *resolved_glyph_ids_ = *resolved_glyph_ids; } + +FontIdMap* FontInfo::fonts() const { return fonts_; } + +void FontInfo::set_fonts(FontIdMap* fonts) { *fonts_ = *fonts; } + +FontSourcedInfoBuilder::FontSourcedInfoBuilder(Font* font, FontId font_id) : font_(font), font_id_(font_id), +predicate_(NULL) { Initialize(); } + +FontSourcedInfoBuilder::FontSourcedInfoBuilder(Font* font, + FontId font_id, + CharacterPredicate* predicate) : + font_(font), font_id_(font_id), predicate_(predicate) { Initialize(); } + +FontSourcedInfoBuilder::~FontSourcedInfoBuilder() { } + +CALLER_ATTACH FontInfo* FontSourcedInfoBuilder::GetFontInfo() { + CharacterMap* chars_to_glyph_ids = new CharacterMap; + bool success = GetCharacterMap(chars_to_glyph_ids); + if (!success) { + delete chars_to_glyph_ids; + PyErr_SetString(Error, "Error creating character map.\n"); + return NULL; + } + GlyphIdSet* resolved_glyph_ids = new GlyphIdSet; + success = ResolveCompositeGlyphs(chars_to_glyph_ids, resolved_glyph_ids); + if (!success) { + delete chars_to_glyph_ids; + delete resolved_glyph_ids; + PyErr_SetString(Error, "Error resolving composite glyphs.\n"); + return NULL; + } + Ptr font_info = new FontInfo; + font_info->set_chars_to_glyph_ids(chars_to_glyph_ids); + font_info->set_resolved_glyph_ids(resolved_glyph_ids); + FontIdMap* font_id_map = new FontIdMap; + font_id_map->insert(std::make_pair(font_id_, font_)); + font_info->set_fonts(font_id_map); + delete chars_to_glyph_ids; + delete resolved_glyph_ids; + delete font_id_map; + return font_info.Detach(); +} + +bool FontSourcedInfoBuilder::GetCharacterMap(CharacterMap* chars_to_glyph_ids) { + if (!cmap_ || !chars_to_glyph_ids) + return false; + chars_to_glyph_ids->clear(); + CMapTable::CMap::CharacterIterator* character_iterator = cmap_->Iterator(); + if (!character_iterator) + return false; + while (character_iterator->HasNext()) { + int32_t character = character_iterator->Next(); + if (!predicate_ || (*predicate_)(character)) { + chars_to_glyph_ids->insert + (std::make_pair(character, + GlyphId(cmap_->GlyphId(character), font_id_))); + } + } + delete character_iterator; + return true; +} + +bool FontSourcedInfoBuilder::ResolveCompositeGlyphs(CharacterMap* chars_to_glyph_ids, + GlyphIdSet* resolved_glyph_ids) { + if (!chars_to_glyph_ids || !resolved_glyph_ids) + return false; + resolved_glyph_ids->clear(); + resolved_glyph_ids->insert(GlyphId(0, font_id_)); + IntegerSet* unresolved_glyph_ids = new IntegerSet; + // Since composite glyph elements might themselves be composite, we would need + // to recursively resolve the elements too. To avoid the recursion we + // create two sets, |unresolved_glyph_ids| for the unresolved glyphs, + // initially containing all the ids and |resolved_glyph_ids|, initially empty. + // We'll remove glyph ids from |unresolved_glyph_ids| until it is empty and, + // if the glyph is composite, add its elements to the unresolved set. + for (CharacterMap::iterator it = chars_to_glyph_ids->begin(), + e = chars_to_glyph_ids->end(); it != e; ++it) { + unresolved_glyph_ids->insert(it->second.glyph_id()); + } + // As long as there are unresolved glyph ids. + while (!unresolved_glyph_ids->empty()) { + // Get the corresponding glyph. + int32_t glyph_id = *(unresolved_glyph_ids->begin()); + unresolved_glyph_ids->erase(unresolved_glyph_ids->begin()); + if (glyph_id < 0 || glyph_id > loca_table_->num_glyphs()) { + continue; + } + int32_t length = loca_table_->GlyphLength(glyph_id); + if (length == 0) { + continue; + } + int32_t offset = loca_table_->GlyphOffset(glyph_id); + GlyphPtr glyph; + glyph.Attach(glyph_table_->GetGlyph(offset, length)); + if (glyph == NULL) { + continue; + } + // Mark the glyph as resolved. + resolved_glyph_ids->insert(GlyphId(glyph_id, font_id_)); + // If it is composite, add all its components to the unresolved glyph set. + if (glyph->GlyphType() == GlyphType::kComposite) { + Ptr composite_glyph = + down_cast(glyph.p_); + int32_t num_glyphs = composite_glyph->NumGlyphs(); + for (int32_t i = 0; i < num_glyphs; ++i) { + int32_t glyph_id = composite_glyph->GlyphIndex(i); + if (resolved_glyph_ids->find(GlyphId(glyph_id, -1)) + == resolved_glyph_ids->end()) { + unresolved_glyph_ids->insert(glyph_id); + } + } + } + } + delete unresolved_glyph_ids; + return true; +} + +void FontSourcedInfoBuilder::Initialize() { + Ptr cmap_table = down_cast(font_->GetTable(Tag::cmap)); + // We prefer Windows BMP format 4 cmaps. + cmap_.Attach(cmap_table->GetCMap(CMapTable::WINDOWS_BMP)); + // But if none is found, + if (!cmap_) { + return; + } + loca_table_ = down_cast(font_->GetTable(Tag::loca)); + glyph_table_ = down_cast(font_->GetTable(Tag::glyf)); +} + + +// }}} + +// Font Assembler {{{ + +FontAssembler::FontAssembler(FontInfo* font_info, IntegerSet* table_blacklist) : + table_blacklist_(table_blacklist) { + font_info_ = font_info; + Initialize(); + } + +FontAssembler::FontAssembler(FontInfo* font_info) : table_blacklist_(NULL) { + font_info_ = font_info; + Initialize(); +} + +FontAssembler::~FontAssembler() { } + +// Assemble a new font from the font info object. +CALLER_ATTACH Font* FontAssembler::Assemble() { + // Assemble tables we can subset. + if (!AssembleCMapTable() || !AssembleGlyphAndLocaTables()) { + return NULL; + } + // For all other tables, either include them unmodified or don't at all. + const TableMap* common_table_map = + font_info_->GetTableMap(font_info_->fonts()->begin()->first); + for (TableMap::const_iterator it = common_table_map->begin(), + e = common_table_map->end(); it != e; ++it) { + if (table_blacklist_ + && table_blacklist_->find(it->first) != table_blacklist_->end()) { + continue; + } + font_builder_->NewTableBuilder(it->first, it->second->ReadFontData()); + } + return font_builder_->Build(); +} + +IntegerSet* FontAssembler::table_blacklist() const { return table_blacklist_; } + +void FontAssembler::set_table_blacklist(IntegerSet* table_blacklist) { + table_blacklist_ = table_blacklist; +} + +bool FontAssembler::AssembleCMapTable() { + // Creating the new CMapTable and the new format 4 CMap + Ptr cmap_table_builder = + down_cast + (font_builder_->NewTableBuilder(Tag::cmap)); + if (!cmap_table_builder) + return false; + Ptr cmap_builder = + down_cast + (cmap_table_builder->NewCMapBuilder(CMapFormat::kFormat4, + CMapTable::WINDOWS_BMP)); + if (!cmap_builder) + return false; + // Creating the segments and the glyph id array + CharacterMap* chars_to_glyph_ids = font_info_->chars_to_glyph_ids(); + SegmentList* segment_list = new SegmentList; + IntegerList* glyph_id_array = new IntegerList; + int32_t last_chararacter = -2; + int32_t last_offset = 0; + Ptr current_segment; + + // For simplicity, we will have one segment per contiguous range. + // To test the algorithm, we've replaced the original CMap with the CMap + // generated by this code without removing any character. + // Tuffy.ttf: CMap went from 3146 to 3972 bytes (1.7% to 2.17% of file) + // AnonymousPro.ttf: CMap went from 1524 to 1900 bytes (0.96% to 1.2%) + for (CharacterMap::iterator it = chars_to_glyph_ids->begin(), + e = chars_to_glyph_ids->end(); it != e; ++it) { + int32_t character = it->first; + int32_t glyph_id = it->second.glyph_id(); + if (character != last_chararacter + 1) { // new segment + if (current_segment != NULL) { + current_segment->set_end_count(last_chararacter); + segment_list->push_back(current_segment); + } + // start_code = character + // end_code = -1 (unknown for now) + // id_delta = 0 (we don't use id_delta for this representation) + // id_range_offset = last_offset (offset into the glyph_id_array) + current_segment = + new CMapTable::CMapFormat4::Builder:: + Segment(character, -1, 0, last_offset); + } + glyph_id_array->push_back(glyph_id); + last_offset += DataSize::kSHORT; + last_chararacter = character; + } + // The last segment is still open. + if (glyph_id_array->size() < 1) { + PyErr_SetString(NoGlyphs, "No glyphs for the specified characters found"); + return false; + } + current_segment->set_end_count(last_chararacter); + segment_list->push_back(current_segment); + // Updating the id_range_offset for every segment. + for (int32_t i = 0, num_segs = segment_list->size(); i < num_segs; ++i) { + Ptr segment = segment_list->at(i); + segment->set_id_range_offset(segment->id_range_offset() + + (num_segs - i + 1) * DataSize::kSHORT); + } + // Adding the final, required segment. + current_segment = + new CMapTable::CMapFormat4::Builder::Segment(0xffff, 0xffff, 1, 0); + segment_list->push_back(current_segment); + // Writing the segments and glyph id array to the CMap + cmap_builder->set_segments(segment_list); + cmap_builder->set_glyph_id_array(glyph_id_array); + delete segment_list; + delete glyph_id_array; + return true; +} + +bool FontAssembler::AssembleGlyphAndLocaTables() { + Ptr loca_table_builder = + down_cast + (font_builder_->NewTableBuilder(Tag::loca)); + Ptr glyph_table_builder = + down_cast + (font_builder_->NewTableBuilder(Tag::glyf)); + + GlyphIdSet* resolved_glyph_ids = font_info_->resolved_glyph_ids(); + IntegerList loca_list; + // Basic sanity check: all LOCA tables are of the same size + // This is necessary but not sufficient! + int32_t previous_size = -1; + for (FontIdMap::iterator it = font_info_->fonts()->begin(); + it != font_info_->fonts()->end(); ++it) { + Ptr loca_table = + down_cast(font_info_->GetTable(it->first, Tag::loca)); + int32_t current_size = loca_table->header_length(); + if (previous_size != -1 && current_size != previous_size) { + return false; + } + previous_size = current_size; + } + + // Assuming all fonts referenced by the FontInfo are the subsets of the same + // font, their loca tables should all have the same sizes. + // We'll just get the size of the first font's LOCA table for simplicty. + Ptr first_loca_table = + down_cast + (font_info_->GetTable(font_info_->fonts()->begin()->first, Tag::loca)); + int32_t num_loca_glyphs = first_loca_table->num_glyphs(); + loca_list.resize(num_loca_glyphs); + loca_list.push_back(0); + int32_t last_glyph_id = 0; + int32_t last_offset = 0; + GlyphTable::GlyphBuilderList* glyph_builders = + glyph_table_builder->GlyphBuilders(); + + for (GlyphIdSet::iterator it = resolved_glyph_ids->begin(), + e = resolved_glyph_ids->end(); it != e; ++it) { + // Get the glyph for this resolved_glyph_id. + int32_t resolved_glyph_id = it->glyph_id(); + int32_t font_id = it->font_id(); + // Get the LOCA table for the current glyph id. + Ptr loca_table = + down_cast + (font_info_->GetTable(font_id, Tag::loca)); + int32_t length = loca_table->GlyphLength(resolved_glyph_id); + int32_t offset = loca_table->GlyphOffset(resolved_glyph_id); + + // Get the GLYF table for the current glyph id. + Ptr glyph_table = + down_cast + (font_info_->GetTable(font_id, Tag::glyf)); + GlyphPtr glyph; + glyph.Attach(glyph_table->GetGlyph(offset, length)); + + // The data reference by the glyph is copied into a new glyph and + // added to the glyph_builders belonging to the glyph_table_builder. + // When Build gets called, all the glyphs will be built. + Ptr data = glyph->ReadFontData(); + Ptr copy_data; + copy_data.Attach(WritableFontData::CreateWritableFontData(data->Length())); + data->CopyTo(copy_data); + GlyphBuilderPtr glyph_builder; + glyph_builder.Attach(glyph_table_builder->GlyphBuilder(copy_data)); + glyph_builders->push_back(glyph_builder); + + // If there are missing glyphs between the last glyph_id and the + // current resolved_glyph_id, since the LOCA table needs to have the same + // size, the offset is kept the same. + for (int32_t i = last_glyph_id + 1; i <= resolved_glyph_id; ++i) + loca_list[i] = last_offset; + last_offset += length; + loca_list[resolved_glyph_id + 1] = last_offset; + last_glyph_id = resolved_glyph_id + 1; + } + // If there are missing glyph ids, their loca entries must all point + // to the same offset as the last valid glyph id making them all zero length. + for (int32_t i = last_glyph_id + 1; i <= num_loca_glyphs; ++i) + loca_list[i] = last_offset; + loca_table_builder->SetLocaList(&loca_list); + return true; +} + +void FontAssembler::Initialize() { + font_factory_.Attach(FontFactory::GetInstance()); + font_builder_.Attach(font_factory_->NewFontBuilder()); +} + + +// }}} + +// Subsetters {{{ +// Subsets a given font using a character predicate. + +PredicateSubsetter::PredicateSubsetter(Font* font, CharacterPredicate* predicate) : font_(font), predicate_(predicate) {} + +PredicateSubsetter::~PredicateSubsetter() { } + +// Performs subsetting returning the subsetted font. +CALLER_ATTACH Font* PredicateSubsetter::Subset() { + Ptr info_builder = + new FontSourcedInfoBuilder(font_, 0, predicate_); + + Ptr font_info; + font_info.Attach(info_builder->GetFontInfo()); + if (!font_info) { + PyErr_SetString(Error, "Could not create font info"); + return NULL; + } + + IntegerSet* table_blacklist = new IntegerSet; + table_blacklist->insert(Tag::DSIG); + Ptr font_assembler = new FontAssembler(font_info, + table_blacklist); + Ptr font_subset; + font_subset.Attach(font_assembler->Assemble()); + delete table_blacklist; + if (!font_subset) { if (!PyErr_Occurred()) PyErr_SetString(Error, "Could not subset font"); } + return font_subset.Detach(); +} + + +// }}} + +static void get_stats(Font *font, PyObject *dict) { + PyObject *t; + const TableMap* tables = font->GetTableMap(); + for (TableMap::const_iterator it = tables->begin(), + e = tables->end(); it != e; ++it) { + t = PyInt_FromLong(it->second->DataLength()); + if (t != NULL) { + PyDict_SetItemString(dict, TagToString(it->first), t); + Py_DECREF(t); + } + } +} + +static PyObject* +do_subset(const char *data, Py_ssize_t sz, Ptr &predicate) { + FontPtr font; + Ptr font_factory; + FontArray fonts; + MemoryInputStream stream; + PyObject *stats, *stats2; + + if (!stream.Attach(reinterpret_cast(data), sz)) + return PyErr_NoMemory(); + font_factory.Attach(FontFactory::GetInstance()); + font_factory->LoadFonts(&stream, &fonts); + if (fonts.empty() || fonts[0] == NULL) { + PyErr_SetString(Error, "Failed to load font from provided data."); + return NULL; + } + + font = fonts[0].Detach(); + if (font->num_tables() == 0) { + PyErr_SetString(Error, "Loaded font has 0 tables."); + return NULL; + } + Ptr subsetter = new PredicateSubsetter(font, predicate); + Ptr new_font; + new_font.Attach(subsetter->Subset()); + if (!new_font) return NULL; + + Ptr ff; + ff.Attach(FontFactory::GetInstance()); + MemoryOutputStream output_stream; + ff->SerializeFont(new_font, &output_stream); + + stats = PyDict_New(); stats2 = PyDict_New(); + if (stats == NULL || stats2 == NULL) return PyErr_NoMemory(); + get_stats(font, stats); + get_stats(new_font, stats2); + return Py_BuildValue("s#NN", (char*)output_stream.Get(), output_stream.Size(), stats, stats2); +} + +static PyObject* +subset(PyObject *self, PyObject *args) { + const char *data; + Py_ssize_t sz; + PyObject *individual_chars, *ranges, *t; + int32_t temp; + + if (!PyArg_ParseTuple(args, "s#OO", &data, &sz, &individual_chars, &ranges)) return NULL; + + if (!PyTuple_Check(individual_chars) || !PyTuple_Check(ranges)) { + PyErr_SetString(PyExc_TypeError, "individual_chars and ranges must be tuples"); + return NULL; + } + + if (PyTuple_Size(ranges) < 1 && PyTuple_Size(individual_chars) < 1) { + PyErr_SetString(NoGlyphs, "No characters specified"); + return NULL; + } + + IntegerSet chars; + for (Py_ssize_t i = 0; i < PyTuple_Size(individual_chars); i++) { + temp = (int32_t)PyInt_AsLong(PyTuple_GET_ITEM(individual_chars, i)); + if (temp == -1 && PyErr_Occurred()) return NULL; + chars.insert(temp); + } + + IntegerList cranges; + cranges.resize(2*PyTuple_Size(ranges)); + for (Py_ssize_t i = 0; i < PyTuple_Size(ranges); i++) { + t = PyTuple_GET_ITEM(ranges, i); + if (!PyTuple_Check(t) || PyTuple_Size(t) != 2) { + PyErr_SetString(PyExc_TypeError, "ranges must contain only 2-tuples"); + return NULL; + } + for (Py_ssize_t j = 0; j < 2; j++) { + cranges[2*i+j] = (int32_t)PyInt_AsLong(PyTuple_GET_ITEM(t, j)); + if (cranges[2*i+j] == -1 && PyErr_Occurred()) return NULL; + } + } + + Ptr predicate = new (std::nothrow) CompositePredicate(chars, cranges); + if (predicate == NULL) return PyErr_NoMemory(); + + try { + return do_subset(data, sz, predicate); + } catch (std::exception &e) { + PyErr_SetString(Error, e.what()); + return NULL; + } catch (...) { + PyErr_SetString(Error, "An unknown exception occurred while subsetting"); + return NULL; + } + +} + +static +PyMethodDef methods[] = { + {"subset", (PyCFunction)subset, METH_VARARGS, + "subset(bytestring, individual_chars, ranges) -> Subset the sfnt in bytestring, keeping only characters specified by individual_chars and ranges. Returns the subset font as a bytestring and the sizes of all font tables in the old and new fonts." + }, + + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initsfntly(void) { + PyObject *m; + + m = Py_InitModule3( + "sfntly", methods, + "Wrapper for the Google sfntly library" + ); + if (m == NULL) return; + + Error = PyErr_NewException((char*)"sfntly.Error", NULL, NULL); + if (Error == NULL) return; + PyModule_AddObject(m, "Error", Error); + + NoGlyphs = PyErr_NewException((char*)"sfntly.NoGlyphs", NULL, NULL); + if (NoGlyphs == NULL) return; + PyModule_AddObject(m, "NoGlyphs", NoGlyphs); +} + + + diff --git a/src/calibre/utils/fonts/sfntly.h b/src/calibre/utils/fonts/sfntly.h new file mode 100644 index 0000000000..8b015d3dd3 --- /dev/null +++ b/src/calibre/utils/fonts/sfntly.h @@ -0,0 +1,196 @@ +/* + * sfntly.h + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ +#pragma once + + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace sfntly; + +typedef int32_t FontId; +typedef std::map > FontIdMap; + +class CharacterPredicate : virtual public RefCount { + public: + CharacterPredicate() {} + virtual ~CharacterPredicate() {} + virtual bool operator()(int32_t character) const = 0; +}; + +class CompositePredicate : public CharacterPredicate, + public RefCounted { + public: + CompositePredicate(IntegerSet &chars, IntegerList &ranges); + ~CompositePredicate(); + virtual bool operator()(int32_t character) const; + private: + IntegerSet chars; + IntegerList ranges; +}; + + + +// Glyph id pair that contains the loca table glyph id as well as the +// font id that has the glyph table this glyph belongs to. +class GlyphId { + public: + GlyphId(int32_t glyph_id, FontId font_id); + ~GlyphId(); + + bool operator==(const GlyphId& other) const; + bool operator<(const GlyphId& other) const; + + int32_t glyph_id() const; + void set_glyph_id(const int32_t glyph_id); + FontId font_id() const; + void set_font_id(const FontId font_id); + + private: + int32_t glyph_id_; + FontId font_id_; +}; + +typedef std::map CharacterMap; +typedef std::set GlyphIdSet; + + +// Font information used for FontAssembler in the construction of a new font. +// Will make copies of character map, glyph id set and font id map. +class FontInfo : public RefCounted { + public: + // Empty FontInfo object. + FontInfo(); + + // chars_to_glyph_ids maps characters to GlyphIds for CMap construction + // resolved_glyph_ids defines GlyphIds which should be in the final font + // fonts is a map of font ids to fonts to reference any needed table + FontInfo(CharacterMap* chars_to_glyph_ids, + GlyphIdSet* resolved_glyph_ids, + FontIdMap* fonts); + + virtual ~FontInfo(); + + // Gets the table with the specified tag from the font corresponding to + // font_id or NULL if there is no such font/table. + // font_id is the id of the font that contains the table + // tag identifies the table to be obtained + virtual FontDataTable* GetTable(FontId font_id, int32_t tag); + + // Gets the table map of the font whose id is font_id + virtual const TableMap* GetTableMap(FontId font_id); + + CharacterMap* chars_to_glyph_ids() const; + // Takes ownership of the chars_to_glyph_ids CharacterMap. + void set_chars_to_glyph_ids(CharacterMap* chars_to_glyph_ids); + + GlyphIdSet* resolved_glyph_ids() const; + // Takes ownership of the glyph_ids GlyphIdSet. + void set_resolved_glyph_ids(GlyphIdSet* resolved_glyph_ids); + + FontIdMap* fonts() const; + + // Takes ownership of the fonts FontIdMap. + void set_fonts(FontIdMap* fonts); + + private: + CharacterMap* chars_to_glyph_ids_; + GlyphIdSet* resolved_glyph_ids_; + FontIdMap* fonts_; +}; + + +// FontSourcedInfoBuilder is used to create a FontInfo object from a Font +// optionally specifying a CharacterPredicate to filter out some of +// the font's characters. +// It does not take ownership or copy the values its constructor receives. +class FontSourcedInfoBuilder : + public RefCounted { + public: + FontSourcedInfoBuilder(Font* font, FontId font_id); + + FontSourcedInfoBuilder(Font* font, + FontId font_id, + CharacterPredicate* predicate); + + virtual ~FontSourcedInfoBuilder(); + + virtual CALLER_ATTACH FontInfo* GetFontInfo(); + + protected: + bool GetCharacterMap(CharacterMap* chars_to_glyph_ids); + + bool ResolveCompositeGlyphs(CharacterMap* chars_to_glyph_ids, + GlyphIdSet* resolved_glyph_ids); + + void Initialize(); + + private: + Ptr font_; + FontId font_id_; + CharacterPredicate* predicate_; + + Ptr cmap_; + Ptr loca_table_; + Ptr glyph_table_; + }; + + +// Assembles FontInfo into font builders. +// Does not take ownership of data passed to it. +class FontAssembler : public RefCounted { + public: + // font_info is the FontInfo which will be used for the new font + // table_blacklist is used to decide which tables to exclude from the + // final font. + FontAssembler(FontInfo* font_info, IntegerSet* table_blacklist); + + explicit FontAssembler(FontInfo* font_info); + + ~FontAssembler(); + + // Assemble a new font from the font info object. + virtual CALLER_ATTACH Font* Assemble(); + + IntegerSet* table_blacklist() const; + + void set_table_blacklist(IntegerSet* table_blacklist); + + protected: + virtual bool AssembleCMapTable(); + + virtual bool AssembleGlyphAndLocaTables(); + + virtual void Initialize(); + + private: + Ptr font_info_; + Ptr font_factory_; + Ptr font_builder_; + IntegerSet* table_blacklist_; +}; + +class PredicateSubsetter : public RefCounted { + public: + PredicateSubsetter(Font* font, CharacterPredicate* predicate); + virtual ~PredicateSubsetter(); + + // Performs subsetting returning the subsetted font. + virtual CALLER_ATTACH Font* Subset(); + + private: + Ptr font_; + Ptr predicate_; +}; diff --git a/src/calibre/utils/fonts/subset.py b/src/calibre/utils/fonts/subset.py new file mode 100644 index 0000000000..51b64af6b2 --- /dev/null +++ b/src/calibre/utils/fonts/subset.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from future_builtins import map + +class NoGlyphs(ValueError): + pass + +def load_sfntly(): + from calibre.constants import plugins + sfntly, err = plugins['sfntly'] + if err: + raise RuntimeError('Failed to load sfntly: %s'%err) + return sfntly + +def subset(font_data, individual_chars, ranges): + individual = tuple(sorted(map(ord, individual_chars))) + cranges = [] + for s, e in ranges: + sc, ec = map(ord, (s, e)) + if ec <= sc: + raise ValueError('The start character %s is after the end' + ' character %s'%(s, e)) + cranges.append((sc, ec)) + sfntly = load_sfntly() + try: + return sfntly.subset(font_data, individual, tuple(cranges)) + except sfntly.NoGlyphs: + raise NoGlyphs('No glyphs were found in this font for the' + ' specified characters. Subsetting is pointless') + +def option_parser(): + import textwrap + from calibre.utils.config import OptionParser + parser = OptionParser(usage=textwrap.dedent('''\ + %prog [options] input_font_file output_font_file characters_to_keep + + Subset the specified font, keeping only the glyphs for the characters in + characters_to_keep. characters_to_keep is a comma separated list of characters of + the form: a,b,c,A-Z,0-9,xyz + + You can specify ranges in the list of characters, as shown above. + ''')) + parser.add_option('-c', '--codes', default=False, action='store_true', + help='If specified, the list of characters is interpreted as ' + 'numeric unicode codes instead of characters. So to specify the ' + 'characters a,b you would use 97,98') + parser.prog = 'subset-font' + return parser + +def print_stats(old_stats, new_stats): + from calibre import prints + prints('========= Table comparison (original vs. subset) =========') + prints('Table', ' ', '%10s'%'Size', ' ', 'Percent', ' ', '%10s'%'New Size', + ' New Percent') + prints('='*80) + old_total = sum(old_stats.itervalues()) + new_total = sum(new_stats.itervalues()) + tables = sorted(old_stats.iterkeys(), key=lambda x:old_stats[x], + reverse=True) + for table in tables: + osz = old_stats[table] + op = osz/old_total * 100 + nsz = new_stats.get(table, 0) + np = nsz/new_total * 100 + suffix = ' | same size' + if nsz != osz: + suffix = ' | reduced to %.1f %%'%(nsz/osz * 100) + prints('%4s'%table, ' ', '%10s'%osz, ' ', '%5.1f %%'%op, ' ', + '%10s'%nsz, ' ', '%5.1f %%'%np, suffix) + prints('='*80) + +def main(args): + import sys, time + from calibre import prints + parser = option_parser() + opts, args = parser.parse_args(args) + if len(args) < 4 or len(args) > 4: + parser.print_help() + raise SystemExit(1) + iff, off, chars = args[1:] + with open(iff, 'rb') as f: + orig = f.read() + + chars = [x.strip() for x in chars.split(',')] + individual, ranges = set(), set() + + def not_single(c): + if len(c) > 1: + prints(c, 'is not a single character', file=sys.stderr) + raise SystemExit(1) + + for c in chars: + if '-' in c: + parts = [x.strip() for x in c.split('-')] + if len(parts) != 2: + prints('Invalid range:', c, file=sys.stderr) + raise SystemExit(1) + if opts.codes: + parts = tuple(map(unichr, map(int, parts))) + map(not_single, parts) + ranges.add(tuple(parts)) + else: + if opts.codes: + c = unichr(int(c)) + not_single(c) + individual.add(c) + st = time.time() + sf, old_stats, new_stats = subset(orig, individual, ranges) + taken = time.time() - st + reduced = (len(sf)/len(orig)) * 100 + def sz(x): + return '%gKB'%(len(x)/1024.) + print_stats(old_stats, new_stats) + prints('Original size:', sz(orig), 'Subset size:', sz(sf), 'Reduced to: %g%%'%(reduced)) + prints('Subsetting took %g seconds'%taken) + with open(off, 'wb') as f: + f.write(sf) + prints('Subset font written to:', off) + +if __name__ == '__main__': + try: + import init_calibre + init_calibre + except ImportError: + pass + import sys + main(sys.argv) + +