diff --git a/session.vim b/session.vim index 2bfb0ea3ab..83336b5993 100644 --- a/session.vim +++ b/session.vim @@ -1,6 +1,9 @@ " Project wide builtins let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext" +" Include directories for C modules +let g:syntastic_c_include_dirs = [ '/usr/include/podofo'] + fun! CalibreLog() " Setup buffers to edit the calibre changelog and version info prior to " making a release. diff --git a/setup/build_environment.py b/setup/build_environment.py index 46661d79a0..b44c57f069 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, socket, struct, subprocess, glob +import os, socket, struct, subprocess from distutils.spawn import find_executable from PyQt4 import pyqtconfig @@ -84,7 +84,6 @@ ft_lib_dirs = [] ft_libs = [] jpg_libs = [] jpg_lib_dirs = [] -poppler_objs = [] fc_inc = '/usr/include/fontconfig' fc_lib = '/usr/lib' podofo_inc = '/usr/include/podofo' @@ -114,12 +113,7 @@ if iswindows: jpg_libs = ['jpeg'] ft_lib_dirs = [sw_lib_dir] ft_libs = ['freetype'] - poppler_inc_dirs = consolidate('POPPLER_INC_DIR', - r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir)) - poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir) - popplerqt4_lib_dirs = poppler_lib_dirs - poppler_libs = ['poppler'] magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')] magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')] magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_'] @@ -128,13 +122,6 @@ if iswindows: elif isosx: fc_inc = '/sw/include/fontconfig' fc_lib = '/sw/lib' - poppler = glob.glob('/sw/build/poppler-*')[-1] - poppler_inc_dirs = consolidate('POPPLER_INC_DIR', - '{0}/poppler:{0}'.format(poppler)) - poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', - '/sw/lib') - poppler_libs = ['poppler'] - popplerqt4_lib_dirs = poppler_lib_dirs podofo_inc = '/sw/podofo' podofo_lib = '/sw/lib' magick_inc_dirs = consolidate('MAGICK_INC', @@ -147,22 +134,15 @@ elif isosx: png_libs = ['png12'] else: # Include directories - poppler_inc_dirs = pkgconfig_include_dirs('poppler', - 'POPPLER_INC_DIR', '/usr/include/poppler') png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR', '/usr/include') magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick') # Library directories - poppler_lib_dirs = popplerqt4_lib_dirs = pkgconfig_lib_dirs('poppler', 'POPPLER_LIB_DIR', - '/usr/lib') png_lib_dirs = pkgconfig_lib_dirs('libpng', 'PNG_LIB_DIR', '/usr/lib') magick_lib_dirs = pkgconfig_lib_dirs('MagickWand', 'MAGICK_LIB', '/usr/lib') # Libraries - poppler_libs = pkgconfig_libs('poppler', '', '') - if not poppler_libs: - poppler_libs = ['poppler'] magick_libs = pkgconfig_libs('MagickWand', '', '') if not magick_libs: magick_libs = ['MagickWand', 'MagickCore'] @@ -176,26 +156,6 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \ 'Try setting the FC_INC_DIR and FC_LIB_DIR environment ' 'variables.') - -poppler_error = None -poppler_cflags = ['-DPNG_SKIP_SETJMP_CHECK'] if islinux else [] -if not poppler_inc_dirs or not os.path.exists( - os.path.join(poppler_inc_dirs[0], 'OutputDev.h')): - poppler_error = \ - ('Poppler not found on your system. Various PDF related', - ' functionality will not work. Use the POPPLER_INC_DIR and', - ' POPPLER_LIB_DIR environment variables. calibre requires ' - ' the poppler XPDF headers. If your distro does not ' - ' include them you will have to re-compile poppler ' - ' by hand with --enable-xpdf-headers') -else: - lh = os.path.join(poppler_inc_dirs[0], 'Link.h') - if 'class AnnotLink' not in open(lh, 'rb').read(): - poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE') - ph = os.path.join(poppler_inc_dirs[0], 'Page.h') - if 'getLinks(Catalog' in open(ph, 'rb').read(): - poppler_cflags.append('-DPOPPLER_PRE_20') - magick_error = None if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0], 'wand')): diff --git a/setup/commands.py b/setup/commands.py index 9fbc048254..3409a1ad4b 100644 --- a/setup/commands.py +++ b/setup/commands.py @@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en' __all__ = [ 'pot', 'translations', 'get_translations', 'iso639', - 'build', 'build_pdf2xml', 'server', + 'build', 'server', 'gui', 'develop', 'install', 'kakasi', 'coffee', 'resources', @@ -31,9 +31,8 @@ translations = Translations() get_translations = GetTranslations() iso639 = ISO639() -from setup.extensions import Build, BuildPDF2XML +from setup.extensions import Build build = Build() -build_pdf2xml = BuildPDF2XML() from setup.server import Server server = Server() diff --git a/setup/extensions.py b/setup/extensions.py index ccff4b6ff7..dadb84233d 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -12,14 +12,11 @@ from distutils import sysconfig from PyQt4.pyqtconfig import QtGuiModuleMakefile from setup import Command, islinux, isbsd, isosx, SRC, iswindows -from setup.build_environment import (fc_inc, fc_lib, chmlib_inc_dirs, - fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, - podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, - QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, - magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, - magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, - jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, - icu_lib_dirs, poppler_cflags) +from setup.build_environment import (fc_inc, fc_lib, chmlib_inc_dirs, fc_error, + podofo_inc, podofo_lib, podofo_error, pyqt, OSX_SDK, NMAKE, QMAKE, + msvc, MT, win_inc, win_lib, win_ddk, magick_inc_dirs, magick_lib_dirs, + magick_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, + icu_lib_dirs) MT isunix = islinux or isosx or isbsd @@ -51,7 +48,6 @@ class Extension(object): reflow_sources = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.cpp')) reflow_headers = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.h')) -reflow_error = poppler_error if poppler_error else magick_error pdfreflow_libs = [] if iswindows: @@ -107,16 +103,6 @@ extensions = [ inc_dirs=magick_inc_dirs ), - Extension('pdfreflow', - reflow_sources, - headers=reflow_headers, - libraries=poppler_libs+magick_libs+png_libs+ft_libs+jpg_libs+pdfreflow_libs, - lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs+ft_lib_dirs+jpg_lib_dirs, - inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs, - error=reflow_error, - cflags=poppler_cflags - ), - Extension('lzx', ['calibre/utils/lzx/lzxmodule.c', 'calibre/utils/lzx/compressor.c', @@ -445,48 +431,5 @@ class Build(Command): shutil.rmtree(build_dir) -class BuildPDF2XML(Command): - - description = 'Build command line pdf2xml utility' - - def run(self, opts): - dest = os.path.expanduser('~/bin/pdf2xml') - if iswindows: - dest = r'C:\cygwin\home\kovid\sw\bin\pdf2xml.exe' - odest = self.j(self.d(self.SRC), 'build', 'objects', 'pdf2xml') - if not os.path.exists(odest): - os.makedirs(odest) - - objects = [] - for src in reflow_sources: - if src.endswith('python.cpp'): - continue - obj = self.j(odest, self.b(src+('.obj' if iswindows else '.o'))) - if self.newer(obj, [src]+reflow_headers): - cmd = [cxx, '-pthread', '-pedantic', '-ggdb', '-c', '-Wall', '-I/usr/include/poppler', - '-I/usr/include/ImageMagick', - '-DPDF2XML', '-o', obj, src] - if iswindows: - cmd = [cxx, '/c', '/MD', '/W3', '/EHsc', '/Zi', '/DPDF2XML'] - cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs] - cmd += ['/Fo'+obj, src] - self.info(*cmd) - self.check_call(cmd) - objects.append(obj) - - if self.newer(dest, objects): - cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand', - '-lpng', '-lpthread'] - if iswindows: - cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split() - cmd += ['/LIBPATH:'+x for x in magick_lib_dirs+poppler_lib_dirs] - cmd += [x+'.lib' for x in - png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs] - cmd += ['/OUT:'+dest] + objects - self.info(*cmd) - self.check_call(cmd) - - self.info('Binary installed as', dest) - diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 88b97c6e2d..81a9b70457 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -22,6 +22,8 @@ QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', MAGICK_PREFIX = '/usr' binary_includes = [ '/usr/bin/pdftohtml', + '/usr/bin/pdfinfo', + '/usr/bin/pdftoppm', '/usr/lib/libwmflite-0.2.so.7', '/usr/lib/liblcms.so.1', '/usr/lib/liblzma.so.0', diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 9584c80b67..2cf7e1df48 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -387,7 +387,8 @@ class Py2App(object): info('\nAdding poppler') for x in ('libpoppler.25.dylib',): self.install_dylib(os.path.join(SW, 'lib', x)) - self.install_dylib(os.path.join(SW, 'bin', 'pdftohtml'), False) + for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'): + self.install_dylib(os.path.join(SW, 'bin', x), False) @flush def add_libjpeg(self): diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index 0278e77a23..448d888505 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -260,7 +260,8 @@ class Win32Freeze(Command, WixMixIn): print '\tAdding misc binary deps' bindir = os.path.join(SW, 'bin') - shutil.copy2(os.path.join(bindir, 'pdftohtml.exe'), self.base) + for x in ('pdftohtml', 'pdfinfo', 'pdftoppm'): + shutil.copy2(os.path.join(bindir, x+'.exe'), self.base) for pat in ('*.dll',): for f in glob.glob(os.path.join(bindir, pat)): ok = True diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index c2e7dca8eb..7fe978d30b 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -293,9 +293,7 @@ In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable qt4, NOTE: poppler must be built as a static library, unless you build the qt4 bindings -Now do the same for the pdftohtml project - -cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/pdftohtml.exe ../../bin/ +cp build/utils/Release/*.exe ../../bin/ podofo diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 2a0602afa0..cdded6b149 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -82,7 +82,6 @@ class Plugins(collections.Mapping): 'podofo', 'cPalmdoc', 'fontconfig', - 'pdfreflow', 'progress_indicator', 'chmlib', 'chm_extra', diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 20a4c4659e..53929013dd 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -4,25 +4,80 @@ __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' #import re +import os, subprocess, shutil from functools import partial from calibre import prints -from calibre.constants import plugins +from calibre.constants import iswindows +from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.metadata import MetaInformation, string_to_authors - -pdfreflow, pdfreflow_error = plugins['pdfreflow'] +from calibre.utils.ipc.simple_worker import fork_job #_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)') +def read_info(outputdir, get_cover): + ''' Read info dict and cover from a pdf file named src.pdf in outputdir. + Note that this function changes the cwd to outputdir and is therefore not + thread safe. Run it using fork_job. This is necessary as there is no safe + way to pass unicode paths via command line arguments. This also ensures + that if poppler crashes, no stale file handles are left for the original + file, only for src.pdf.''' + + from calibre.ebooks.pdf.pdftohtml import PDFTOHTML + os.chdir(outputdir) + base = os.path.dirname(PDFTOHTML) + suffix = '.exe' if iswindows else '' + pdfinfo = os.path.join(base, 'pdfinfo') + suffix + pdftoppm = os.path.join(base, 'pdftoppm') + suffix + + try: + raw = subprocess.check_output([pdfinfo, '-enc', 'UTF-8', 'src.pdf']) + except subprocess.CalledProcessError as e: + prints('pdfinfo errored out with return code: %d'%e.returncode) + return None + try: + raw = raw.decode('utf-8') + except UnicodeDecodeError: + prints('pdfinfo returned no UTF-8 data') + return None + + ans = {} + for line in raw.splitlines(): + if u':' not in line: continue + field, val = line.partition(u':')[::2] + val = val.strip() + if field and val: + ans[field] = val.strip() + + if get_cover: + try: + subprocess.check_call([pdftoppm, '-singlefile', '-jpeg', + 'src.pdf', 'cover']) + except subprocess.CalledProcessError as e: + prints('pdftoppm errored out with return code: %d'%e.returncode) + + return ans + def get_metadata(stream, cover=True): - if pdfreflow is None: - raise RuntimeError(pdfreflow_error) - stream.seek(0) - raw = stream.read() - #isbn = _isbn_pat.search(raw) - #if isbn is not None: - # isbn = isbn.group(1).replace('-', '').replace(' ', '') - info = pdfreflow.get_metadata(raw, cover) + with TemporaryDirectory('_pdf_metadata_read') as pdfpath: + stream.seek(0) + with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: + shutil.copyfileobj(stream, f) + res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, + bool(cover))) + info = res['result'] + with open(res['stdout_stderr'], 'rb') as f: + raw = f.read().strip() + if raw: + prints(raw) + if not info: + raise ValueError('Could not read info dict from PDF') + covpath = os.path.join(pdfpath, 'cover.jpg') + cdata = None + if cover and os.path.exists(covpath): + with open(covpath, 'rb') as f: + cdata = f.read() + title = info.get('Title', None) au = info.get('Author', None) if au is None: @@ -46,12 +101,8 @@ def get_metadata(stream, cover=True): if subject: mi.tags.insert(0, subject) - if cover and 'cover' in info: - data = info['cover'] - if data is None: - prints(title, 'has no pages, cover extraction impossible.') - else: - mi.cover_data = ('png', data) + if cdata: + mi.cover_data = ('jpeg', cdata) return mi diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp deleted file mode 100644 index c3a709869e..0000000000 --- a/src/calibre/ebooks/pdf/fonts.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#include "fonts.h" -#include "utils.h" - -using namespace calibre_reflow; -using namespace std; - -XMLColor::XMLColor(GfxRGB rgb) { - this->r = static_cast(rgb.r/65535.0*255.0); - this->g = static_cast(rgb.g/65535.0*255.0); - this->b = static_cast(rgb.b/65535.0*255.0); - if (!(this->ok(this->r) && this->ok(this->b) && this->ok(this->g))) { - this->r = 0; this->g = 0; this->b = 0; - } -} - -string XMLColor::str() const { - ostringstream oss; - oss << "rgb(" << this->r << "," << this->g << "," << this->b << ")"; - return oss.str(); -} - -static const char *FONT_MODS[7] = { - "-bolditalic", "-boldoblique", "-bold", "-italic", "-oblique", "-roman", - NULL -}; - -#ifdef _WIN32 -#define ap_toupper(c) (toupper(((unsigned char)(c)))) -static inline -const char *strcasestr(const char *h, const char *n ) -{ /* h="haystack", n="needle" */ - const char *a=h, *e=n; - - if( !h || !*h || !n || !*n ) { return 0; } - - while( *a && *e ) { - if( ap_toupper(*a)!=ap_toupper(*e) ) { - ++h; a=h; e=n; - } - else { - ++a; ++e; - } - } - return *e ? 0 : h; -} -#endif - -static string* family_name(const string *font_name) { - if (!font_name) return NULL; - string *fn = new string(*font_name); - size_t pos; - const char *p; - for (size_t i = 0; FONT_MODS[i] != NULL; i++) { - p = strcasestr(fn->c_str(), FONT_MODS[i]); - if (p != NULL) { - pos = p - fn->c_str(); - fn->replace(pos, strlen(FONT_MODS[i]), ""); - break; - } - } - return fn; -} - -XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) : - size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name), - font_family(NULL), color(rgb) { - - - if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY); - this->font_family = family_name(this->font_name); - if (strcasestr(font_name->c_str(), "bold")) this->bold = true; - - if (strcasestr(font_name->c_str(),"italic")|| - strcasestr(font_name->c_str(),"oblique")) this->italic = true; - - -} - -XMLFont& XMLFont::operator=(const XMLFont& x){ - if (this==&x) return *this; - this->size = x.size; - this->line_size = x.line_size; - this->italic = x.italic; - this->bold = x.bold; - this->color = x.color; - if (this->font_name) delete this->font_name; - this->font_name = new string(*x.font_name); - if (this->font_family) delete this->font_family; - this->font_family = new string(*x.font_family); - return *this; -} - -bool XMLFont::operator==(const XMLFont &f) const { - return (fabs(this->size - f.size) < 0.1) && - (fabs(this->line_size - f.line_size) < 0.1) && - (this->italic == f.italic) && - (this->bold == f.bold) && - (this->color == f.color) && - ((*this->font_family) == (*f.font_family)); -} - -bool XMLFont::eq_upto_inline(const XMLFont &f) const { - return (fabs(this->size - f.size) < 0.1) && - (fabs(this->line_size - f.line_size) < 0.1) && - (this->color == f.color) && - ((*this->font_family) == (*f.font_family)); -} - -string XMLFont::str(Fonts::size_type id) const { - ostringstream oss; - oss << "font_family) << "\" "; - oss << "color=\"" << this->color.str() << "\" "; - oss << setiosflags(ios::fixed) << setprecision(2) - << "size=\"" << this->size << "\""; - oss << "/>"; - return oss.str(); -} - -Fonts::size_type Fonts::add_font(XMLFont *f) { - Fonts::iterator it; - size_type i; - for ( i=0, it=this->begin(); it < this->end(); it++, i++ ) { - if (**it == *f) return i; - } - this->push_back(f); - return this->size()-1; -} - -Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) { - XMLFont *f = NULL; - if (font_name == NULL) - font_name = new string("Unknown"); - // font_name must not be deleted - f = new XMLFont(font_name, size, rgb); - - return this->add_font(f); -} - -Fonts::~Fonts() { - Fonts::iterator it; - for ( it=this->begin(); it < this->end(); it++ ) delete *it; - this->resize(0); -} diff --git a/src/calibre/ebooks/pdf/fonts.h b/src/calibre/ebooks/pdf/fonts.h deleted file mode 100644 index 1b380e1b87..0000000000 --- a/src/calibre/ebooks/pdf/fonts.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include - -using namespace std; - -#define DEFAULT_FONT_FAMILY "Times New Roman" - -namespace calibre_reflow { - -class XMLColor { - - private: - unsigned int r; - unsigned int g; - unsigned int b; - inline bool ok(unsigned int xcol) const { - return ( (xcol <= 255) && (xcol >= 0) ); - } - - public: - XMLColor():r(0),g(0),b(0){} - - XMLColor(GfxRGB rgb); - - XMLColor(const XMLColor& x) { - this->r=x.r; this->g=x.g; this->b=x.b; - } - - XMLColor& operator=(const XMLColor &x){ - this->r=x.r; this->g=x.g; this->b=x.b; - return *this; - } - - ~XMLColor(){} - - string str() const; - - bool operator==(const XMLColor &col) const { - return ((r==col.r)&&(g==col.g)&&(b==col.b)); - } - -}; - - -class XMLFont { - -private: - double size; - double line_size; - bool italic; - bool bold; - string *font_name; - string *font_family; - XMLColor color; - -public: - XMLFont(const char *font_family=DEFAULT_FONT_FAMILY, double size=12.0) : - size(size), line_size(-1.0), italic(false), bold(false), - font_name(new string(font_family)), font_family(new string(font_family)), - color() {} - - XMLFont(string* font_name, double size, GfxRGB rgb); - XMLFont(const XMLFont& other) : - size(other.size), line_size(other.line_size), italic(other.italic), - bold(other.bold), font_name(new string(*other.font_name)), - font_family(other.font_family), color(other.color) {} - - XMLColor get_color() { return this->color; } - string* get_font_name() { return this->font_name; } - double get_size() const { return this->size; } - double get_line_size() { return this->line_size; } - void set_line_size(double ls) { this->line_size = ls; } - bool is_italic() const { return this->italic; } - bool is_bold() const { return this->bold; } - ~XMLFont() { delete this->font_name; delete this->font_family; } - XMLFont& operator=(const XMLFont& other); - bool operator==(const XMLFont &other) const; - bool eq_upto_inline(const XMLFont &f) const; - string str(vector::size_type id) const; -}; - -class Fonts : public vector { - public: - Fonts::size_type add_font(XMLFont *f); - Fonts::size_type add_font(string* font_name, double size, GfxRGB rgb); - ~Fonts(); -}; - - -} diff --git a/src/calibre/ebooks/pdf/images.cpp b/src/calibre/ebooks/pdf/images.cpp deleted file mode 100644 index 466c69af4b..0000000000 --- a/src/calibre/ebooks/pdf/images.cpp +++ /dev/null @@ -1,433 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "images.h" -#include "utils.h" - -#ifdef _WIN32 -inline double round(double x) { return (x-floor(x))>0.5 ? ceil(x) : floor(x); } -#endif - -#define xoutRound(x) ( static_cast(round(x)) ) -using namespace std; -using namespace calibre_reflow; - -calibre_reflow::ImageInfo::ImageInfo(GfxState *state) { - // get image position and size - state->transform(0, 0, &xt, &yt); - state->transformDelta(1, 1, &wt, &ht); - if (wt > 0) { - x0 = xoutRound(xt); - w0 = xoutRound(wt); - } else { - x0 = xoutRound(xt + wt); - w0 = xoutRound(-wt); - } - if (ht > 0) { - y0 = xoutRound(yt); - h0 = xoutRound(ht); - } else { - y0 = xoutRound(yt + ht); - h0 = xoutRound(-ht); - } - state->transformDelta(1, 0, &xt, &yt); - rotate = fabs(xt) < fabs(yt); - if (rotate) { - w1 = h0; - h1 = w0; - x_flip = ht < 0; - y_flip = wt > 0; - } else { - w1 = w0; - h1 = h0; - x_flip = wt < 0; - y_flip = ht > 0; - } - //cout << x_flip << "|" << y_flip << endl; -} - -void XMLImages::clear() { - vector::iterator it; - for (it = this->masks.begin(); it < this->masks.end(); it++) - delete *it; - for (it = this->images.begin(); it < this->images.end(); it++) - delete *it; - this->masks.clear(); - this->images.clear(); -} - -void XMLImages::add_mask(GfxState *state, Object *ref, Stream *str, - unsigned int width, unsigned int height, bool invert, - bool interpolate, bool inline_img) { -} - -static void throw_magick_exception(MagickWand *wand) { - ExceptionType severity; - char *description = MagickGetException(wand, &severity); - ostringstream oss; - oss << description << endl; - description=(char *) MagickRelinquishMemory(description); - wand = DestroyMagickWand(wand); - MagickWandTerminus(); - throw ReflowException(oss.str().c_str()); -} - - -static void flip_image(string file_name, bool x_flip, bool y_flip) { - MagickWand *magick_wand; - MagickBooleanType status; - - MagickWandGenesis(); - magick_wand = NewMagickWand(); - status = MagickReadImage(magick_wand, file_name.c_str()); - if (status == MagickFalse) throw_magick_exception(magick_wand); - - if (y_flip) { - status = MagickFlipImage(magick_wand); - if (status == MagickFalse) throw_magick_exception(magick_wand); - } - if (x_flip) { - status = MagickFlopImage(magick_wand); - if (status == MagickFalse) throw_magick_exception(magick_wand); - } - - status = MagickWriteImage(magick_wand, NULL); - if (status == MagickFalse) throw_magick_exception(magick_wand); - - magick_wand = DestroyMagickWand(magick_wand); - MagickWandTerminus(); -} - -void XMLImages::add(GfxState *state, Object *ref, Stream *str, - unsigned int width, unsigned int height, GfxImageColorMap *colorMap, - bool interpolate, int *maskColors, bool inline_img) { - XMLImage *img = new XMLImage(state); - this->images.push_back(img); - img->width = width; img->height = height; - img->type = (str->getKind() == strDCT) ? jpeg : png; - string file_name = this->file_name(img); - - FILE *of = fopen(file_name.c_str(), "wb"); - if (!of) throw ReflowException(strerror(errno)); - - if (img->type == jpeg) { - int c; - str = str->getNextStream(); - str->reset(); - - // copy the stream - while ((c = str->getChar()) != EOF) fputc(c, of); - } else { //Render as PNG - Guchar *p; - GfxRGB rgb; - png_byte *row = (png_byte *) malloc(3 * width); // 3 bytes/pixel: RGB - png_bytep *row_pointer= &row; - - PNGWriter *writer = new PNGWriter(); - writer->init(of, width, height); - - // Initialize the image stream - ImageStream *imgStr = new ImageStream(str, width, - colorMap->getNumPixelComps(), colorMap->getBits()); - imgStr->reset(); - - // For each line... - for (unsigned int y = 0; y < height; y++) { - // Convert into a PNG row - p = imgStr->getLine(); - for (unsigned int x = 0; x < width; x++) { - colorMap->getRGB(p, &rgb); - // Write the RGB pixels into the row - row[3*x]= colToByte(rgb.r); - row[3*x+1]= colToByte(rgb.g); - row[3*x+2]= colToByte(rgb.b); - p += colorMap->getNumPixelComps(); - } - - writer->writeRow(row_pointer); - } - - writer->close(); - delete writer; - - free(row); - imgStr->close(); - delete imgStr; - - } - fclose(of); - img->written = true; - if (img->info.x_flip || img->info.y_flip) - flip_image(file_name, img->info.x_flip, img->info.y_flip); -} - - -string XMLImages::file_name(const XMLImage *img) const { - vector::const_iterator ir, mr; - size_t idx = 0; - bool mask = false; - - ir = find( this->images.begin(), this->images.end(), img); - if (ir == this->images.end()) { - mr = find( this->masks.begin(), this->masks.end(), img); - idx = mr - this->masks.begin(); - mask = true; - } else idx = ir - this->images.begin(); - - ostringstream oss; - oss << ((mask) ? "mask" : "image") << "-" << idx+1 << '.'; - oss << ((img->type == jpeg) ? "jpg" : "png"); - return oss.str(); -} - -vector XMLImages::str() const { - vector ans; - vector ::const_iterator it; - for (it = this->masks.begin(); it < this->masks.end(); it++) { - if ((*it)->written) - ans.push_back(new string((*it)->str(it - this->masks.begin(), true, - this->file_name(*it)))); - } - for (it = this->images.begin(); it < this->images.end(); it++) { - if ((*it)->written) - ans.push_back(new string((*it)->str(it - this->images.begin(), false, - this->file_name(*it)))); - } - return ans; -} - -string XMLImage::str(size_t num, bool mask, string file_name) const { - ostringstream oss; - oss << "width << "\" iheight=\"" << this->height << "\" " - << "rwidth=\"" << this->info.w1 << "\" rheight=\"" << this->info.h1 << "\" " - << setiosflags(ios::fixed) << setprecision(2) - << "top=\"" << this->info.y0 << "\" left=\"" << this->info.x0 << "\"/>"; - return oss.str(); - - -} -PNGWriter::~PNGWriter() -{ - /* cleanup heap allocation */ - png_destroy_write_struct(&png_ptr, &info_ptr); -} - -void PNGWriter::init(FILE *f, int width, int height) -{ - /* initialize stuff */ - png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (!png_ptr) - throw ReflowException("png_create_write_struct failed"); - - info_ptr = png_create_info_struct(png_ptr); - if (!info_ptr) - throw ReflowException("png_create_info_struct failed"); - - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("png_jmpbuf failed"); - - /* write header */ - png_init_io(png_ptr, f); - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("Error during writing header"); - - // Set up the type of PNG image and the compression level - png_set_compression_level(png_ptr, Z_BEST_COMPRESSION); - - png_byte bit_depth = 8; - png_byte color_type = PNG_COLOR_TYPE_RGB; - png_byte interlace_type = PNG_INTERLACE_NONE; - - png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type, interlace_type, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - - png_write_info(png_ptr, info_ptr); - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("error during writing png info bytes"); - -} - -void PNGWriter::writePointers(png_bytep *rowPointers) -{ - png_write_image(png_ptr, rowPointers); - /* write bytes */ - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("Error during writing bytes"); -} - -void PNGWriter::writeRow(png_bytep *row) -{ - // Write the row to the file - png_write_rows(png_ptr, row, 1); - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("error during png row write"); -} - -void PNGWriter::close() -{ - /* end write */ - png_write_end(png_ptr, info_ptr); - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("Error during end of write"); -} - -void PNGWriter::write_splash_bitmap(SplashBitmap *bitmap) { - SplashColorPtr row = bitmap->getDataPtr(); - int height = bitmap->getHeight(); - int row_size = bitmap->getRowSize(); - png_bytep *row_pointers = new png_bytep[height]; - - for (int y = 0; y < height; ++y) { - row_pointers[y] = row; - row += row_size; - } - this->writePointers(row_pointers); - delete[] row_pointers; -} - -void calibre_png_mem_write(png_structp png_ptr, png_bytep data, png_size_t length) { - if (!png_ptr || length < 1) return; - vector *buf = static_cast< vector* >(png_get_io_ptr(png_ptr)); - buf->reserve(buf->capacity() + length); - do { - buf->push_back(static_cast(*data)); - data++; length--; - } while(length > 0); -} - -void calibre_png_mem_flush(png_structp png_ptr) {} - -void PNGMemWriter::init(vector *buf, int width, int height) { - /* initialize stuff */ - this->png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (!this->png_ptr) - throw ReflowException("png_create_write_struct failed"); - - this->info_ptr = png_create_info_struct(png_ptr); - if (!this->info_ptr) - throw ReflowException("png_create_info_struct failed"); - - if (setjmp(png_jmpbuf(this->png_ptr))) - throw ReflowException("png_jmpbuf failed"); - - png_set_write_fn(this->png_ptr, static_cast(buf), - calibre_png_mem_write, calibre_png_mem_flush); - if (setjmp(png_jmpbuf(this->png_ptr))) - throw ReflowException("png_set_write failed"); - - - // Set up the type of PNG image and the compression level - png_set_compression_level(this->png_ptr, Z_BEST_COMPRESSION); - - png_byte bit_depth = 8; - png_byte color_type = PNG_COLOR_TYPE_RGB; - png_byte interlace_type = PNG_INTERLACE_NONE; - - png_set_IHDR(this->png_ptr, this->info_ptr, width, height, - bit_depth, color_type, interlace_type, - PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - - png_write_info(png_ptr, info_ptr); - if (setjmp(png_jmpbuf(png_ptr))) - throw ReflowException("error during writing png info bytes"); - -} - -/* -void calibre_jpeg_error_exit (j_common_ptr cinfo) -{ - // cinfo->err really points to a my_error_mgr struct, so coerce pointer - calibre_jpeg_err_mgr *err = (calibre_jpeg_err_mgr *)(cinfo->err); - - // Always display the message. - // We could postpone this until after returning, if we chose. - //(*cinfo->err->output_message) (cinfo); - - // Return control to the setjmp point - longjmp(err->setjmp_buffer, 1); -} - - -JPEGWriter::JPEGWriter() { - this->cinfo.err = jpeg_std_error(&this->jerr.pub); - jpeg_create_compress(&this->cinfo); - this->jerr.pub.error_exit = calibre_jpeg_error_exit; - this->check(); - this->outfile = NULL; -} - -void JPEGWriter::init(int width, int height) { - cinfo.image_width = width; - cinfo.image_height = height; - cinfo.input_components = 3; // # of color components per pixel - cinfo.in_color_space = JCS_RGB; - jpeg_set_defaults(&this->cinfo); - this->check(); - jpeg_start_compress(&this->cinfo, TRUE); - this->check(); -} - -void JPEGWriter::init_io(FILE *f) { - jpeg_stdio_dest(&this->cinfo, f); - this->check(); - this->outfile = f; -} - -void JPEGWriter::check() { - if (setjmp(jerr.setjmp_buffer)) this->raise(); -} - -void JPEGWriter::raise() { - char buffer[JMSG_LENGTH_MAX]; - - // Create the message - (*this->cinfo.err->format_message) ((jpeg_common_struct *)(&this->cinfo), buffer); - jpeg_destroy_compress(&this->cinfo); - throw ReflowException(buffer); -} - -void JPEGWriter::write_image(JSAMPARRAY image_buffer, JDIMENSION num) { - size_t num_written = jpeg_write_scanlines(&this->cinfo, image_buffer, num); - this->check(); - if (num_written != num) { - jpeg_destroy_compress(&this->cinfo); - throw ReflowException("Failed to write all JPEG scanlines."); - } -} - -void JPEGWriter::write_splash_bitmap(SplashBitmap *bitmap) { - SplashColorPtr row = bitmap->getDataPtr(); - int height = bitmap->getHeight(); - int row_size = bitmap->getRowSize(); - JSAMPARRAY row_pointers = new JSAMPLE*[height]; - - for (int y = 0; y < height; ++y) { - row_pointers[y] = row; - row += row_size; - } - this->write_image(row_pointers, height); - delete[] row_pointers; - jpeg_finish_compress(&this->cinfo); - this->check(); - fclose(this->outfile); -} - -JPEGWriter::~JPEGWriter() { - jpeg_destroy_compress(&this->cinfo); -} -*/ diff --git a/src/calibre/ebooks/pdf/images.h b/src/calibre/ebooks/pdf/images.h deleted file mode 100644 index 1b4d9b58bf..0000000000 --- a/src/calibre/ebooks/pdf/images.h +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#pragma once - -#include -#include -#include -#include -#include -#include "utils.h" - -using namespace std; - -namespace calibre_reflow { - - enum ImageType { - jpeg, png - }; - - class PNGWriter - { - public: - PNGWriter() {} - ~PNGWriter(); - - void init(FILE *f, int width, int height); - - void writePointers(png_bytep *rowPointers); - void writeRow(png_bytep *row); - void write_splash_bitmap(SplashBitmap *bitmap); - void close(); - - protected: - png_structp png_ptr; - png_infop info_ptr; - }; - - - class PNGMemWriter : public PNGWriter - { - - public: - void init(vector *buf, int width, int height); - }; - - class ImageInfo { - public: - - ImageInfo(GfxState *state); - - private: - int x0, y0; // top left corner of image - int w0, h0, w1, h1; // size of image - double xt, yt, wt, ht; - bool rotate, x_flip, y_flip; - - friend class XMLImage; - friend class XMLImages; - - }; - - class XMLImage { - private: - double x, y; - unsigned int width, height; - ImageType type; - bool written; - ImageInfo info; - - friend class XMLImages; - - public: - XMLImage(GfxState *state) : - x(0.), y(0.), width(0), height(0), type(jpeg), written(false), info(state) - {} - - ~XMLImage() {} - - string str(size_t num, bool mask, string file_name) const; - }; - - class XMLImages { - private: - vector images; - vector masks; - - public: - - ~XMLImages() { this->clear(); } - - void add_mask(GfxState *state, Object *ref, Stream *str, - unsigned int width, unsigned int height, bool invert, - bool interpolate, bool inline_img); - - void add(GfxState *state, Object *ref, Stream *str, - unsigned int width, unsigned int height, GfxImageColorMap *colorMap, - bool interpolate, int *maskColors, bool inline_img); - - string file_name(const XMLImage *img) const; - vector str() const; - void clear(); - }; -/* - struct calibre_jpeg_err_mgr { - struct jpeg_error_mgr pub; // "public" fields - - jmp_buf setjmp_buffer; // for return to caller - }; - - class JPEGWriter { - private: - FILE *outfile; - - protected: - struct jpeg_compress_struct cinfo; - struct calibre_jpeg_err_mgr jerr; - - void raise(); - void check(); - - public: - JPEGWriter(); - ~JPEGWriter(); - void init_io(FILE *f); - void init(int width, int height); - void write_image(JSAMPARRAY image_buffer, JDIMENSION number_of_scanlines); - void write_splash_bitmap(SplashBitmap *bitmap); - }; -*/ -} diff --git a/src/calibre/ebooks/pdf/links.cpp b/src/calibre/ebooks/pdf/links.cpp deleted file mode 100644 index 8d28492bab..0000000000 --- a/src/calibre/ebooks/pdf/links.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#include "links.h" -#include "utils.h" - -using namespace std; -using namespace calibre_reflow; - -XMLLink& XMLLink::operator=(const XMLLink &x) { - if (this==&x) return *this; - if (this->dest) {delete this->dest; this->dest=NULL;} - this->x_min = x.x_min; - this->y_min = x.y_min; - this->x_max = x.x_max; - this->y_max = x.y_max; - this->dest = new string(*x.dest); - return *this; -} - -bool XMLLink::in_link(double xmin,double ymin,double xmax,double ymax) const { - double y = (ymin + ymax)/2; - if (y > this->y_max) return false; - return (y > this->y_min) && (xmin < this->x_max) && (xmax > this->x_min); -} - -string XMLLink::get_link_start() { - ostringstream oss; - oss << "dest) oss << encode_for_xml(*this->dest); - oss << "\">"; - return oss.str(); -} - -XMLLinks::~XMLLinks() { - for(XMLLinks::iterator i = this->begin(); i != this->end(); i++) - delete *i; - this->clear(); -} - -bool XMLLinks::in_link(double xmin, double ymin, double xmax, - double ymax, XMLLinks::size_type &p) const { - for(XMLLinks::const_iterator i = this->begin(); i != this->end(); i++) { - if ( (*i)->in_link(xmin, ymin, xmax, ymax) ) { - p = (i - this->begin()); - return true; - } - } - return false; -} - - diff --git a/src/calibre/ebooks/pdf/links.h b/src/calibre/ebooks/pdf/links.h deleted file mode 100644 index c43911ddca..0000000000 --- a/src/calibre/ebooks/pdf/links.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#pragma once -#include -#include - -using namespace std; - -namespace calibre_reflow { - -class XMLLink { - -private: - double x_min; - double y_min; - double x_max; - double y_max; - string* dest; - -public: - XMLLink() : dest(NULL) {} - XMLLink(const XMLLink& x) : - x_min(x.x_min), y_min(x.y_min), x_max(x.x_max), - y_max(x.y_max), dest(new string(*x.dest)) {} - XMLLink(double x_min, double y_min, double x_max, - double y_max, const char *dest) : - x_min((x_min < x_max) ? x_min : x_max), - y_min((y_min < y_max) ? y_min : y_max), - x_max((x_max > x_min) ? x_max : x_min), - y_max((y_max > y_min) ? y_max : y_min), - dest(new string(dest)) {} - - ~XMLLink() { delete this->dest; } - - string* get_dest() { return this->dest; } - double get_x1() const {return x_min;} - double get_x2() const {return x_max;} - double get_y1() const {return y_min;} - double get_y2() const {return y_max;} - - XMLLink& operator=(const XMLLink &x); - bool operator==(const XMLLink &x) const { - return (this->dest != NULL) && (x.dest != NULL) && - this->dest->compare(*x.dest) == 0; - } - bool in_link(double xmin, double ymin, double xmax, double ymax) const; - string get_link_start(); - -}; - -class XMLLinks : public vector { - public: - ~XMLLinks(); - - bool in_link(double xmin, double ymin, double xmax, - double ymax, XMLLinks::size_type &p) const; -}; - - -} - diff --git a/src/calibre/ebooks/pdf/main.cpp b/src/calibre/ebooks/pdf/main.cpp deleted file mode 100644 index 869204dc1d..0000000000 --- a/src/calibre/ebooks/pdf/main.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - - - -#ifndef PDF2XML -#define UNICODE -#define PY_SSIZE_T_CLEAN -#include -#endif - -#include "reflow.h" - -using namespace std; -using namespace calibre_reflow; - -#ifndef PDF2XML - -extern "C" { - - static PyObject * - pdfreflow_reflow(PyObject *self, PyObject *args) { - char *pdfdata; - Py_ssize_t size; - int first_page, last_page, num = 0; - - if (!PyArg_ParseTuple(args, "s#ii", &pdfdata, &size, &first_page, &last_page)) - return NULL; - - try { - Reflow reflow(pdfdata, static_cast(size)); - num = reflow.render(first_page, last_page); - } catch (std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL; - } catch (...) { - PyErr_SetString(PyExc_RuntimeError, - "Unknown exception raised while rendering PDF"); return NULL; - } - - return Py_BuildValue("i", num); - } - - static PyObject * - pdfreflow_get_metadata(PyObject *self, PyObject *args) { - char *pdfdata; - Py_ssize_t size; - map info; - PyObject *cover; - PyObject *ans = PyDict_New(); - - if (!ans) return PyErr_NoMemory(); - - if (!PyArg_ParseTuple(args, "s#O", &pdfdata, &size, &cover)) - return NULL; - - Reflow *reflow = NULL; - try { - reflow = new Reflow(pdfdata, size); - info = reflow->get_info(); - if (PyObject_IsTrue(cover)) { - if (reflow->numpages() > 0) { - vector *data = reflow->render_first_page(); - if (data && data->size() > 0) { - PyObject *d = PyBytes_FromStringAndSize(&((*data)[0]), data->size()); - delete data; - if (d == NULL) {delete reflow; return PyErr_NoMemory();} - if (PyDict_SetItemString(ans, "cover", d) == -1) {delete reflow; return NULL;} - Py_XDECREF(d); - } - } else { - if (PyDict_SetItemString(ans, "cover", Py_None) == -1) {delete reflow; return NULL;} - } - } - } catch (std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); delete reflow; return NULL; - } catch (...) { - PyErr_SetString(PyExc_RuntimeError, - "Unknown exception raised while getting metadata from PDF"); delete reflow; return NULL; - } - delete reflow; reflow = NULL; - - - for (map::const_iterator it = info.begin() ; it != info.end(); it++ ) { - PyObject *key = PyUnicode_Decode((*it).first.c_str(), (*it).first.size(), "UTF-8", "replace"); - if (!key) return NULL; - PyObject *val = PyUnicode_Decode((*it).second.c_str(), (*it).second.size(), "UTF-8", "replace"); - if (!val) return NULL; - if (PyDict_SetItem(ans, key, val) == -1) return NULL; - Py_XDECREF(key); Py_XDECREF(val); - } - return ans; - } - - static PyObject * - pdfreflow_get_numpages(PyObject *self, PyObject *args) { - char *pdfdata; - int num = 0; - Py_ssize_t size; - map info; - - if (!PyArg_ParseTuple(args, "s#", &pdfdata, &size)) - return NULL; - - Reflow *reflow = NULL; - try { - reflow = new Reflow(pdfdata, size); - num = reflow->numpages(); - } catch (std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); delete reflow; return NULL; - } catch (...) { - PyErr_SetString(PyExc_RuntimeError, - "Unknown exception raised while getting metadata from PDF"); delete reflow; return NULL; - } - - delete reflow; reflow = NULL; - return Py_BuildValue("i", num); - } - - - static PyObject * - pdfreflow_set_metadata(PyObject *self, PyObject *args) { - char *pdfdata; - Py_ssize_t size; - PyObject *info; - - if (!PyArg_ParseTuple(args, "s#O", &pdfdata, &size, &info)) - return NULL; - - if (!PyDict_Check(info)) { - PyErr_SetString(PyExc_ValueError, "Info object must be a dictionary."); - return NULL; - } - - char Title[10] = "Title", Author[10] = "Author", Keywords[10] = "Keywords"; - char *keys[3] = { Title, Author, Keywords }; - map pinfo; - PyObject *val = NULL, *utf8 = NULL; - - for (int i = 0; i < 3; i++) { - val = PyDict_GetItemString(info, keys[i]); - if (!val || !PyUnicode_Check(val)) continue; - utf8 = PyUnicode_AsUTF8String(val); - if (!utf8) continue; - pinfo[keys[i]] = PyString_AS_STRING(utf8); - } - - PyObject *ans = NULL; - try { - Reflow reflow(pdfdata, static_cast(size)); - if (reflow.is_locked()) { - PyErr_SetString(PyExc_ValueError, "Setting metadata not possible in encrypeted PDFs"); - return NULL; - } - string result = reflow.set_info(pinfo); - ans = PyString_FromStringAndSize(result.c_str(), result.size()); - } catch (std::exception &e) { - PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL; - } catch (...) { - PyErr_SetString(PyExc_RuntimeError, - "Unknown exception raised while getting metadata from PDF"); return NULL; - } - return ans; - } - - static - PyMethodDef pdfreflow_methods[] = { - {"reflow", pdfreflow_reflow, METH_VARARGS, - "reflow(pdf_data, first_page, last_page)\n\n" - "Reflow the specified PDF. Returns the number of pages in the PDF. If last_page is -1 renders to end of document." - }, - {"get_metadata", pdfreflow_get_metadata, METH_VARARGS, - "get_metadata(pdf_data, cover)\n\n" - "Get metadata and (optionally) cover from the specified PDF." - }, - {"set_metadata", pdfreflow_set_metadata, METH_VARARGS, - "get_metadata(info_dict)\n\n" - "Set metadata in the specified PDF. Currently broken." - }, - {"get_numpages", pdfreflow_get_numpages, METH_VARARGS, - "get_numpages(pdf_data)\n\n" - "Get number of pages in the PDF." - }, - - {NULL, NULL, 0, NULL} - }; - - - PyMODINIT_FUNC - initpdfreflow(void) - { - PyObject* m; - - m = Py_InitModule3("pdfreflow", pdfreflow_methods, - "Reflow a PDF file"); - - if (m == NULL) return; - - } -} - - -#else - -int main(int argc, char **argv) { - char *memblock; - ifstream::pos_type size; - int ret = 0; - map info; - Reflow *reflow = NULL; - - - if (argc != 2) { - cerr << "Usage: " << argv[0] << " file.pdf" << endl; - return 1; - } - - ifstream file (argv[1], ios::in|ios::binary|ios::ate); - if (file.is_open()) { - size = file.tellg(); - memblock = new char[size]; - file.seekg (0, ios::beg); - file.read (memblock, size); - file.close(); - } else { - cerr << "Unable to read from: " << argv[1] << endl; - return 1; - } - - try { - reflow = new Reflow(memblock, size); - info = reflow->get_info(); - for (map::const_iterator it = info.begin() ; it != info.end(); it++ ) { - cout << (*it).first << " : " << (*it).second << endl; - } - //reflow->render(); - vector *data = reflow->render_first_page(); - ofstream file("cover.png", ios::binary); - file.write(&((*data)[0]), data->size()); - delete data; - file.close(); - } catch(exception &e) { - cerr << e.what() << endl; - ret = 1; - } - delete reflow; - delete[] memblock; - return ret; -} -#endif diff --git a/src/calibre/ebooks/pdf/reflow.cpp b/src/calibre/ebooks/pdf/reflow.cpp deleted file mode 100644 index c01f357d3c..0000000000 --- a/src/calibre/ebooks/pdf/reflow.cpp +++ /dev/null @@ -1,976 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - */ - -#include -#include -#include -#include -#include -#include -#include -#include "reflow.h" -#include "utils.h" - -using namespace std; -using namespace calibre_reflow; - -static const size_t num_info_keys = 8; -static const char* info_keys[num_info_keys] = { - "Title", "Subject", "Keywords", "Author", "Creator", "Producer", - "CreationDate", "ModDate" -}; -static char encoding[10] = "UTF-8"; -static char yes[10] = "yes"; - - -//------------------------------------------------------------------------ -// XMLString -//------------------------------------------------------------------------ - -XMLString::XMLString(GfxState *state, GooString *s, double current_font_size, - Fonts *fonts) : - text(new vector(0)), x_right(new vector(0)), - yx_next(NULL), xy_next(NULL), fonts(fonts), font_idx(0), xml_text(NULL), - link(NULL), x_min(0), x_max(0), y_min(0), y_max(0), col(0), dir(text_dir_unknown) -{ - double x = 0, y = 0; - GfxFont *font; - - state->transform(state->getCurX(), state->getCurY(), &x, &y); - - if ((font = state->getFont())) { - double ascent = font->getAscent(); - double descent = font->getDescent(); - if( ascent > 1.05 ){ - //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent ); - ascent = 1.05; - } - if( descent < -0.4 ){ - //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent ); - descent = -0.4; - } - this->y_min = y - ascent * current_font_size; - this->y_max = y - descent * current_font_size; - GfxRGB rgb; - state->getFillRGB(&rgb); - GooString *name = state->getFont()->getName(); - if (!name) - this->font_idx = this->fonts->add_font(NULL, current_font_size-1, rgb); - else - this->font_idx = this->fonts->add_font( - new string(name->getCString()), current_font_size-1, rgb); - - } else { - // this means that the PDF file draws text without a current font, - // which should never happen - this->y_min = y - 0.95 * current_font_size; - this->y_max = y + 0.35 * current_font_size; - } - if (this->y_min == this->y_max) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - this->y_min = y; - this->y_max = y + 1; - } -} - -void XMLString::add_char(GfxState *state, double x, double y, - double dx, double dy, Unicode u) { - if (dir == text_dir_unknown) { - //dir = UnicodeMap::getDirection(u); - dir = text_dir_left_right; - } - - if (this->text->capacity() == this->text->size()) { - this->text->reserve(text->size()+16); - this->x_right->reserve(x_right->size()+16); - } - this->text->push_back(u); - if (this->length() == 1) { - this->x_min = x; - } - this->x_max = x + dx; - this->x_right->push_back(x_max); - //printf("added char: %f %f xright = %f\n", x, dx, x+dx); -} - -void XMLString::end_string() -{ - if( this->dir == text_dir_right_left && this->length() > 1 ) - { - //printf("will reverse!\n"); - reverse(this->text->begin(), this->text->end()); - } -} - -static string encode_unicode_chars(const Unicode *u, size_t num) { - ostringstream oss; - UnicodeMap *uMap; - char buf[10]; - int n; - if (!(uMap = globalParams->getTextEncoding())) { - throw ReflowException("Failed to allocate unicode map."); - } - - for (size_t i = 0; i < num; i++) { - switch (u[i]) { - case '&': oss << "&"; break; - case '<': oss << "<"; break; - case '>': oss << ">"; break; - default: - { - // convert unicode to string - if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) { - buf[n] = 0; - oss << buf; - } - } - } - } - uMap->decRefCnt(); - return oss.str(); -} - -void XMLString::encode() { - delete this->xml_text; - this->xml_text = new string(encode_unicode_chars(&((*this->text)[0]), this->text->size())); -} - -string XMLString::str() const { - ostringstream oss; - oss << "font_idx << "\" "; - oss << setiosflags(ios::fixed) << setprecision(2) - << "top=\"" << this->y_min << "\" left=\"" << this->x_min - << "\" width=\"" << this->x_max - this->x_min << "\" " - << "height=\"" << this->y_max - this->y_min << "\">"; - oss << *this->xml_text << ""; - return oss.str(); -} - -XMLString::~XMLString() { - delete this->text; delete this->x_right; delete this->xml_text; -} - - -//------------------------------------------------------------------------ -// XMLPage -//------------------------------------------------------------------------ - -XMLPage::XMLPage(unsigned int num, GfxState *state, ofstream *output, Fonts* fonts) : - current_string(NULL), num(num), output(output), current_font_size(0.0), - yx_strings(NULL), xy_strings(NULL), yx_cur1(NULL), yx_cur2(NULL), - fonts(fonts), links(new XMLLinks()) -{ - (*this->output) << setiosflags(ios::fixed) << setprecision(2) << - "\t\tnum << "\" width=\"" << - state->getPageWidth() << "\" height=\"" << state->getPageHeight() << - "\">" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); -} - -XMLPage::~XMLPage() { - (*this->output) << "\t\t" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - for (XMLString *tmp = this->yx_strings; tmp; tmp = tmp->yx_next) - delete tmp; - - delete this->links; -} - -void XMLPage::update_font(GfxState *state) { - GfxFont *font; - double *fm; - char *name; - int code; - double w; - - current_font_size = state->getTransformedFontSize(); - - if ((font = state->getFont()) && font->getType() == fontType3) { - // This is a hack which makes it possible to deal with some Type 3 - // fonts. The problem is that it's impossible to know what the - // base coordinate system used in the font is without actually - // rendering the font. This code tries to guess by looking at the - // width of the character 'm' (which breaks if the font is a - // subset that doesn't contain 'm'). - for (code = 0; code < 256; ++code) { - if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && - name[0] == 'm' && name[1] == '\0') break; - - } - if (code < 256) { - w = ((Gfx8BitFont *)font)->getWidth(code); - if (w != 0) { - // 600 is a generic average 'm' width -- yes, this is a hack - current_font_size *= w / 0.6; - } - } - fm = font->getFontMatrix(); - if (fm[0] != 0) { - current_font_size *= fabs(fm[3] / fm[0]); - } - } - -} - -void XMLPage::draw_char(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen) { - if ( (state->getRender() & 3) == 3) return; //Hidden text - double x1, y1, w1, h1, dx2, dy2; - int i; - state->transform(x, y, &x1, &y1); - - // check that new character is in the same direction as current string - // and is not too far away from it before adding - if (this->current_string->character_does_not_belong_to_string(state, x1)) { - this->end_string(); - this->begin_string(state, NULL); - } - state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), - 0, &dx2, &dy2); - dx -= dx2; - dy -= dy2; - state->transformDelta(dx, dy, &w1, &h1); - if (uLen != 0) { - w1 /= uLen; - h1 /= uLen; - } - for (i = 0; i < uLen; ++i) { - this->current_string->add_char(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); - } - -} - -void XMLPage::end_string() { - XMLString *p1 = NULL, *p2 = NULL; - double h, y1, y2; - - // throw away zero-length strings -- they don't have valid xMin/xMax - // values, and they're useless anyway - if (this->current_string->length() == 0) { - delete this->current_string; - this->current_string = NULL; - return; - } - - this->current_string->end_string(); - - // insert string in y-major list - h = this->current_string->height(); - y1 = this->current_string->y_min + 0.5 * h; - y2 = this->current_string->y_min + 0.8 * h; - if (gFalse) { //rawOrder - p1 = this->yx_cur1; - p2 = NULL; - } else if ( - (!this->yx_cur1 || - (y1 >= this->yx_cur1->y_min && - (y2 >= this->yx_cur1->y_max || - this->current_string->x_max >= this->yx_cur1->x_min))) && - (!this->yx_cur2 || - (y1 < this->yx_cur2->y_min || - (y2 < this->yx_cur2->y_max && - this->current_string->x_max < this->yx_cur2->x_min))) - ) { - p1 = this->yx_cur1; - p2 = this->yx_cur2; - } else { - for (p1 = NULL, p2 = this->yx_strings; p2; p1 = p2, p2 = p2->yx_next) { - if (y1 < p2->y_min || (y2 < p2->y_max && this->current_string->x_max < p2->x_min)) - break; - } - this->yx_cur2 = p2; - } - this->yx_cur1 = this->current_string; - if (p1) - p1->yx_next = this->current_string; - else - this->yx_strings = this->current_string; - this->current_string->yx_next = p2; - this->current_string = NULL; -} - -void XMLPage::end() { - XMLLinks::size_type link_index = 0; - Fonts::size_type pos = 0; - XMLFont* h; - - for (XMLString *tmp = this->yx_strings; tmp; tmp = tmp->yx_next) { - pos = tmp->font_idx; - h = this->fonts->at(pos); - - tmp->encode(); - - if (this->links->in_link( - tmp->x_min, tmp->y_min, tmp->x_max, tmp->y_max, link_index)) { - tmp->link = links->at(link_index); - } - } - - this->coalesce(); - - for (XMLString *tmp = yx_strings; tmp; tmp=tmp->yx_next) { - if (tmp->xml_text && tmp->xml_text->size() > 0) { - (*this->output) << "\t\t\t" << tmp->str() << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - } - } -} - -static const char *strrstr( const char *s, const char *ss ) -{ - const char *p = strstr( s, ss ); - for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){ - p = pp; - } - return p; -} - - -static void close_tags( string *xml_text, bool &finish_a, bool &finish_italic, bool &finish_bold ) -{ - const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( xml_text->c_str(), "" ) : NULL; - const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( xml_text->c_str(), "" ) : NULL; - const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( xml_text->c_str(), " ( last_italic > last_bold ? last_italic : last_bold ) ) { - xml_text->append(""); - finish_a = false; - } - if( finish_italic && finish_bold && last_italic > last_bold ){ - xml_text->append(""); - finish_italic = false; - } - if( finish_bold ) - xml_text->append(""); - if( finish_italic ) - xml_text->append(""); - if( finish_a ) - xml_text->append(""); -} - -void XMLPage::coalesce() { - XMLString *str1, *str2, *str3; - XMLFont *hfont1, *hfont2; - double space, hor_space, vert_space, vert_overlap, size, x_limit; - bool add_space, found; - int n, i; - double cur_x, cur_y; - - str1 = this->yx_strings; - - if( !str1 ) return; - - //----- discard duplicated text (fake boldface, drop shadows) - - while (str1) - { - size = str1->y_max - str1->y_min; - x_limit = str1->x_min + size * 0.2; - found = false; - for (str2 = str1, str3 = str1->yx_next; - str3 && str3->x_min < x_limit; - str2 = str3, str3 = str2->yx_next) - { - if (str3->length() == str1->length() && - !memcmp(str3->text, str1->text, str1->length() * sizeof(Unicode)) && - fabs(str3->y_min - str1->y_min) < size * 0.2 && - fabs(str3->y_max - str1->y_max) < size * 0.2 && - fabs(str3->x_max - str1->x_max) < size * 0.2) - { - found = true; - //printf("found duplicate!\n"); - break; - } - } - if (found) - { - str2->xy_next = str3->xy_next; - str2->yx_next = str3->yx_next; - delete str3; - } - else - { - str1 = str1->yx_next; - } - } - - str1 = yx_strings; - - hfont1 = this->fonts->at(str1->font_idx); - if( hfont1->is_bold() ) - str1->xml_text->insert(0, ""); - if( hfont1->is_italic() ) - str1->xml_text->insert(0, ""); - if (str1->get_link()) - str1->xml_text->insert(0, str1->get_link()->get_link_start()); - cur_x = str1->x_min; cur_y = str1->y_min; - - while (str1 && (str2 = str1->yx_next)) { - hfont2 = this->fonts->at(str2->font_idx); - space = str1->y_max - str1->y_min; - hor_space = str2->x_min - str1->x_max; - vert_space = str2->y_min - str1->y_max; - - vert_overlap = 0; - if (str2->y_min >= str1->y_min && str2->y_min <= str1->y_max) - { - vert_overlap = str1->y_max - str2->y_min; - } else if (str2->y_max >= str1->y_min && str2->y_max <= str1->y_max) - { - vert_overlap = str2->y_max - str1->y_min; - } - if ( - ( - ( - (str2->y_min < str1->y_max) - && - (hor_space > -0.5 * space && hor_space < space) - ) - ) && - (hfont1->eq_upto_inline(*hfont2)) && - str1->dir == str2->dir // text direction the same - ) - { - n = str1->length() + str2->length(); - if ((add_space = hor_space > 0.1 * space)) { - ++n; - } - - str1->text->reserve((n + 15) & ~15); - str1->x_right->reserve((n + 15) & ~15); - if (add_space) { - str1->text->push_back(0x20); - str1->xml_text->push_back(' '); - str1->x_right->push_back(str2->x_min); - } - - for (i = 0; i < str2->length(); i++) { - str1->text->push_back(str2->text->at(i)); - str1->x_right->push_back(str2->x_right->at(i)); - } - - /* fix , if str1 and str2 differ and handle switch of links */ - XMLLink *hlink1 = str1->get_link(); - XMLLink *hlink2 = str2->get_link(); - bool switch_links = !hlink1 || !hlink2 || !((*hlink1) == (*hlink2)); - bool finish_a = switch_links && hlink1 != NULL; - bool finish_italic = hfont1->is_italic() && ( !hfont2->is_italic() || finish_a ); - bool finish_bold = hfont1->is_bold() && - ( !hfont2->is_bold() || finish_a || finish_italic ); - close_tags( str1->xml_text, finish_a, finish_italic, finish_bold ); - if( switch_links && hlink2 != NULL ) { - string ls = hlink2->get_link_start(); - str1->xml_text->append(ls); - } - if( ( !hfont1->is_italic() || finish_italic ) && hfont2->is_italic() ) - str1->xml_text->append(""); - if( ( !hfont1->is_bold() || finish_bold ) && hfont2->is_bold() ) - str1->xml_text->append(""); - - - str1->xml_text->append(*str2->xml_text); - // str1 now contains href for link of str2 (if it is defined) - str1->link = str2->link; - hfont1 = hfont2; - if (str2->x_max > str1->x_max) { - str1->x_max = str2->x_max; - } - if (str2->y_max > str1->y_max) { - str1->y_max = str2->y_max; - } - str1->yx_next = str2->yx_next; - delete str2; - } else { // keep strings separate - bool finish_a = str1->get_link() != NULL; - bool finish_bold = hfont1->is_bold(); - bool finish_italic = hfont1->is_italic(); - close_tags( str1->xml_text, finish_a, finish_italic, finish_bold ); - - str1->x_min = cur_x; str1->y_min = cur_y; - str1 = str2; - cur_x = str1->x_min; cur_y = str1->y_min; - hfont1 = hfont2; - if ( hfont1->is_bold() ) - str1->xml_text->insert(0, ""); - if( hfont1->is_italic() ) - str1->xml_text->insert(0, ""); - if( str1->get_link() != NULL ) { - str1->xml_text->insert(0, str1->get_link()->get_link_start()); - } - } - } - str1->x_min = cur_x; str1->y_min = cur_y; - - bool finish_bold = hfont1->is_bold(); - bool finish_italic = hfont1->is_italic(); - bool finish_a = str1->get_link() != NULL; - close_tags( str1->xml_text, finish_a, finish_italic, finish_bold ); - -} - - -//------------------------------------------------------------------------ -// XMLOutputDev -//------------------------------------------------------------------------ - -XMLOutputDev::XMLOutputDev(PDFDoc *doc) : - current_page(NULL), output(new ofstream("index.xml", ios::trunc)), - fonts(new Fonts()), catalog(NULL), images(new XMLImages()), doc(doc) -{ - if (!(*this->output)) { - throw ReflowException(strerror(errno)); - } - (*this->output) << "" << endl; - (*this->output) << "\t" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); -} - -XMLOutputDev::~XMLOutputDev() { - (*this->output) << "\t" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - (*this->output) << "\t" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - for (Fonts::const_iterator it = this->fonts->begin(); it < this->fonts->end(); it++) { - (*this->output) << "\t\t" << (*it)->str(it - this->fonts->begin()) << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - } - (*this->output) << "\t" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - (*this->output) << "" << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - this->output->close(); - delete this->output; - delete this->fonts; - delete this->images; -} - -static string get_link_dest(LinkAction *link, PDFDoc *doc) { - unsigned int page = 1; - ostringstream oss; - - switch(link->getKind()) - { - case actionGoTo: - { - LinkGoTo *ha = (LinkGoTo *)link; - LinkDest *dest = NULL; - if (ha->getDest() != NULL) - dest = ha->getDest()->copy(); - else if (ha->getNamedDest() != NULL) { - dest = doc->findDest(ha->getNamedDest()); - } - - if (dest) { - if (dest->isPageRef()) { - Ref pageref = dest->getPageRef(); - page = doc->findPage(pageref.num, pageref.gen); - } - else { - page = dest->getPageNum(); - } - - oss << "#" << page - << setiosflags(ios::fixed) << setprecision(2) - << ":l=" << dest->getLeft() - << "t=" << dest->getTop(); - //<< "r=" << dest->getRight() - //<< "b=" << dest->getBottom(); - delete dest; - } - break; - } - - case actionGoToR: - { - LinkGoToR *ha = (LinkGoToR *) link; - LinkDest *dest = NULL; - bool has_file = false; - if (ha->getFileName()) { - oss << ha->getFileName()->getCString(); - has_file = true; - } - if (ha->getDest() != NULL) dest=ha->getDest()->copy(); - - if (dest && has_file) { - if (!(dest->isPageRef())) page = dest->getPageNum(); - delete dest; - oss << '#' << page; - } - break; - } - case actionURI: - { - LinkURI *ha=(LinkURI *) link; - oss << ha->getURI()->getCString(); - break; - } - case actionLaunch: - { - LinkLaunch *ha = (LinkLaunch *) link; - oss << ha->getFileName()->getCString(); - break; - } - case actionNamed: break; - case actionMovie: break; - case actionRendition: break; - case actionSound: break; - case actionJavaScript: break; - case actionUnknown: break; - default: break; - } - return oss.str(); -} - -void XMLOutputDev::process_link(AnnotLink* link){ - - double _x1, _y1, _x2, _y2; - int x1, y1, x2, y2; - - link->getRect(&_x1, &_y1, &_x2, &_y2); - cvtUserToDev(_x1, _y1, &x1, &y1); - - cvtUserToDev(_x2, _y2, &x2, &y2); - - LinkAction *a = link->getAction(); - if (!a) return; - string dest = get_link_dest(a, this->doc); - if (dest.length() > 0) { - XMLLink *t = new XMLLink((double)x1, (double)y2, (double)x2, (double)y1, - dest.c_str()); - this->current_page->add_link(t); - } -} - - -void XMLOutputDev::endPage() { -#ifdef POPPLER_PRE_20 - Links *slinks = catalog->getPage(current_page->number())->getLinks(catalog); -#else - Links *slinks = catalog->getPage(current_page->number())->getLinks(); -#endif - - for (int i = 0; i < slinks->getNumLinks(); i++) - { - this->process_link(slinks->getLink(i)); - } - delete slinks; - - this->current_page->end(); - vector images = this->images->str(); - for (vector::iterator it = images.begin(); it < images.end(); it++) { - (*this->output) << "\t\t\t" << *(*it) << endl; - if (!(*this->output)) throw ReflowException(strerror(errno)); - delete *it; - } - this->images->clear(); - delete this->current_page; - this->current_page = NULL; -} - - -void XMLOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, - int width, int height, GBool invert, - GBool interpolate, GBool inlineImg) { - OutputDev::drawImageMask(state, ref, str, width, height, - invert, interpolate, inlineImg); - //this->images->add_mask(); - cerr << "mask requested" << endl; -} - -void XMLOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, - int width, int height, GfxImageColorMap *colorMap, - GBool interpolate, int *maskColors, GBool inlineImg) { - this->images->add(state, ref, str, - static_cast(width), static_cast(height), - colorMap, interpolate, maskColors, inlineImg); -} - -Reflow::Reflow(char *pdfdata, size_t sz) : - pdfdata(pdfdata), current_font_size(-1), doc(NULL), obj() -{ - int err; - this->obj.initNull(); - if (globalParams == NULL) { - globalParams = new GlobalParams(); - if (!globalParams) - throw ReflowException("Failed to allocate Globalparams"); - } - MemStream *str = new MemStream(pdfdata, 0, sz, &this->obj); - this->doc = new PDFDoc(str, NULL, NULL); - - if (!this->doc->isOk()) { - err = this->doc->getErrorCode(); - ostringstream stm; - if (err == errEncrypted) - stm << "PDF is password protected."; - else { - stm << "Failed to open PDF file"; - stm << " with error code: " << err; - } - delete this->doc; - this->doc = NULL; - throw ReflowException(stm.str().c_str()); - } - -} - -int -Reflow::render(int first_page, int last_page) { - - if (!this->doc->okToCopy()) - cout << "Warning, this document has the copy protection flag set, ignoring." << endl; - - globalParams->setTextEncoding(encoding); - - int doc_pages = doc->getNumPages(); - if (last_page < 1 || last_page > doc_pages) last_page = doc_pages; - if (first_page < 1) first_page = 1; - if (first_page > last_page) first_page = last_page; - - XMLOutputDev *xml_out = new XMLOutputDev(this->doc); - doc->displayPages(xml_out, first_page, last_page, - 96, //hDPI - 96, //vDPI - 0, //rotate - true, //UseMediaBox - true, //Crop - false //Printing - ); - - if (last_page - first_page == doc_pages - 1) - this->dump_outline(); - - delete xml_out; - - return doc_pages; -} - -void Reflow::dump_outline() { - Outline *outline = this->doc->getOutline(); - if (!outline) return; - GooList *items = outline->getItems(); - if ( !items || items->getLength() < 1 ) - return; - - ostringstream *output = new ostringstream(); - (*output) << "" << endl; - this->outline_level(output, items); - (*output) << "" << endl; - ofstream of("outline.xml", ios::trunc); - of << output->str(); - if (!of) throw ReflowException("Error writing outline file"); - of.close(); - delete output; -} - -static inline void outline_tabs(ostringstream *o, int level) { - for (int i = 0; i < level; i++) - (*o) << "\t"; -} - -void Reflow::outline_level(ostringstream *oss, GooList *items, int level) -{ - int num_of_items = items->getLength(); - if (num_of_items > 0) { - outline_tabs(oss, level); - (*oss) << "" << endl; - - for (int i = 0; i < num_of_items; i++) { - OutlineItem* item = (OutlineItem *)items->get(i); - Unicode *u = item->getTitle(); - string title = encode_unicode_chars(u, item->getTitleLength()); - if (title.size() < 1) continue; - outline_tabs(oss, level+1); - (*oss) << "isOpen()?"yes":"no") << "\""; - LinkAction *a = item->getAction(); - if (a != NULL) - (*oss) << " dest=\"" << get_link_dest(a, this->doc) << "\""; - (*oss) << ">" << title << "" << endl; - item->open(); - GooList *children = item->getKids(); - if (children) - outline_level(oss, children, level+1); - } - } -} - -Reflow::~Reflow() { - delete this->doc; -} - -map Reflow::get_info() { - Object info; - map ans; - string val; - globalParams->setTextEncoding(encoding); - - this->doc->getDocInfo(&info); - if (info.isDict()) { - for(size_t i = 0; i < num_info_keys; i++) { - val = this->decode_info_string(info.getDict(), info_keys[i]); - if (val.size() > 0) { - ans[string(info_keys[i])] = string(val); - } - } - } - return ans; -} - -string Reflow::decode_info_string(Dict *info, const char *key) const { - Object obj; - GooString *s1; - bool is_unicode; - Unicode u; - char buf[8]; - int i, n; - ostringstream oss; - char *tmp = new char[strlen(key)+1]; - strncpy(tmp, key, strlen(key)+1); - UnicodeMap *umap; - if (!(umap = globalParams->getTextEncoding())) { - throw ReflowException("Failed to allocate unicode map."); - } - - - if (info->lookup(tmp, &obj)->isString()) { - s1 = obj.getString(); - if ((s1->getChar(0) & 0xff) == 0xfe && - (s1->getChar(1) & 0xff) == 0xff) { - is_unicode = true; - i = 2; - } else { - is_unicode = false; - i = 0; - } - while (i < obj.getString()->getLength()) { - if (is_unicode) { - u = ((s1->getChar(i) & 0xff) << 8) | - (s1->getChar(i+1) & 0xff); - i += 2; - } else { - u = pdfDocEncoding[s1->getChar(i) & 0xff]; - ++i; - } - n = umap->mapUnicode(u, buf, sizeof(buf)); - buf[n] = 0; - oss << buf; - } - } - obj.free(); - delete[] tmp; - return oss.str(); -} - -vector* Reflow::render_first_page(bool use_crop_box, double x_res, - double y_res) { - if (this->numpages() < 1) throw ReflowException("Document has no pages."); - globalParams->setTextEncoding(encoding); - globalParams->setEnableFreeType(yes); - globalParams->setAntialias(yes); - globalParams->setVectorAntialias(yes); - - SplashColor paper_color; - paper_color[0] = 255; - paper_color[1] = 255; - paper_color[2] = 255; - SplashOutputDev *out = new SplashOutputDev(splashModeRGB8, 4, false, paper_color, true, true); - out->setVectorAntialias(true); - if (!out) { - throw ReflowException("Failed to allocate SplashOutputDev"); - } - try { -#ifdef POPPLER_PRE_20 - out->startDoc(doc->getXRef()); -#else - out->startDoc(doc); -#endif - out->startPage(1, NULL); - - double pg_w, pg_h; - int pg = 1; - - if (use_crop_box) { - pg_w = this->doc->getPageCropWidth(pg); - pg_h = this->doc->getPageCropHeight(pg); - } else { - pg_w = this->doc->getPageMediaWidth(pg); - pg_h = this->doc->getPageMediaHeight(pg); - } - - pg_w *= x_res/72.; - pg_h *= y_res/72.; - - int x=0, y=0; - this->doc->displayPageSlice(out, pg, x_res, y_res, 0, - !use_crop_box, false, false, x, y, pg_w, pg_h); - } catch(...) { delete out; throw; } - - SplashBitmap *bmp = out->takeBitmap(); - out->endPage(); - delete out; out = NULL; - PNGMemWriter writer; - vector *buf = new vector(); - try { - writer.init(buf, bmp->getWidth(), bmp->getHeight()); - writer.write_splash_bitmap(bmp); - writer.close(); - } catch(...) { delete buf; delete bmp; throw; } - delete bmp; - return buf; -} - -class MemOutStream : public OutStream { - private: - ostringstream out; - - public: - MemOutStream() :OutStream() {} - ~MemOutStream() {} - void close() {} - int getPos() { return out.tellp(); } - void put(char c) { out.put(c); } - void printf (const char *format, ...) { - vector buf; - size_t written = strlen(format)*5; - va_list ap; - do { - buf.reserve(written + 20); - va_start(ap, format); - written = vsnprintf(&buf[0], buf.capacity(), format, ap); - va_end(ap); - } while (written >= buf.capacity()); - out.write(&buf[0], written); - } -}; - -string Reflow::set_info(map sinfo) { - XRef *xref = this->doc->getXRef(); - if (!xref) throw ReflowException("No XRef table"); - Object *trailer_dict = xref->getTrailerDict(); - if (!trailer_dict || !trailer_dict->isDict()) throw ReflowException("No trailer dictionary"); - Object tmp; - char INFO[5] = "Info"; - Object *info = trailer_dict->dictLookup(INFO, &tmp); - if (!info) { - info = new Object(); - info->initDict(xref); - } - if (!info->isDict()) throw ReflowException("Invalid info object"); - - for (map::iterator it = sinfo.begin(); it != sinfo.end(); it++) { - Object *tmp = new Object(); - tmp->initString(new GooString((*it).second)); - info->dictSet((*it).first, tmp); - } - - trailer_dict->dictSet(INFO, info); - char out[20] = "/t/out.pdf"; - this->doc->saveAs(new GooString(out), writeForceRewrite); - string ans; - return ans; -} - diff --git a/src/calibre/ebooks/pdf/reflow.h b/src/calibre/ebooks/pdf/reflow.h deleted file mode 100644 index c841076ca0..0000000000 --- a/src/calibre/ebooks/pdf/reflow.h +++ /dev/null @@ -1,253 +0,0 @@ -/** - * Copyright 2009 Kovid Goyal - * License: GNU GPL v2+ - * Based on pdftohtml from the poppler project. - */ - -#pragma once -#define UNICODE - -#ifdef _WIN32 -#include -#elif defined(_OSX) -#include -#else -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "fonts.h" -#include "links.h" -#include "images.h" - -using namespace std; - -namespace calibre_reflow { - -#ifdef POPPLER_OLD_LINK_TYPE -#define AnnotLink Link -#endif - -enum UnicodeTextDirection { - text_dir_unknown, - text_dir_left_right, - text_dir_right_left, - text_dir_top_bottom -}; - -class Reflow { - - private: - char *pdfdata; - double current_font_size; - PDFDoc *doc; - Object obj; - - string decode_info_string(Dict *info, const char *key) const; - void outline_level(ostringstream *oss, GooList *items, - int level=1); - - public: - Reflow (char *xpdfdata, size_t sz); - ~Reflow(); - - /* Convert the PDF to XML. All files are output to the current directory */ - int render(int first_page, int last_page); - - /* Get the PDF Info Dictionary */ - map get_info(); - - /* True if the PDF is encrypted */ - bool is_locked() const { return !this->doc || this->doc->isEncrypted(); } - - /* Return the first page of the PDF, rendered as a PNG image */ - vector* render_first_page(bool use_crop_box=true, double x_res=150.0, - double y_res = 150.0); - - /* Dump the PDF outline as the file outline.xml in the current directory */ - void dump_outline(); - - /* Set the info dictionary. Currently broken. */ - string set_info(map info); - - /* Number of pages in the document */ - int numpages() { return this->doc->getNumPages(); } -}; - -class XMLString { - private: - vector *text; // the text - vector *x_right; // right-hand x coord of each char - XMLString *yx_next; // next string in y-major order - XMLString *xy_next; // next string in x-major order - Fonts *fonts; - Fonts::size_type font_idx; - string *xml_text; - XMLLink *link; - - double x_min, x_max; // bounding box x coordinates - double y_min, y_max; // bounding box y coordinates - int col; // starting column - UnicodeTextDirection dir; // direction (left to right/right to left) - - friend class XMLPage; - - public: - XMLString(GfxState *state, GooString *s, double current_font_size, Fonts *fonts); - ~XMLString(); - - bool character_does_not_belong_to_string(GfxState *state, double x1) { - return this->length() > 0 && - fabs(x1 - x_right->at(this->length()-1)) > 0.1 * (y_max - y_min); - } - - void add_char(GfxState *state, double x, double y, - double dx, double dy, Unicode u); - - void end_string(); - inline int length() const { return this->text->size(); } - inline double height() const { return y_max - y_min; } - void encode(); - XMLLink* get_link() { return this->link; } - string str() const; -}; - -class XMLPage { - private: - XMLString *current_string; - unsigned int num; - ofstream *output; - double current_font_size; - XMLString *yx_strings; // strings in y-major order - XMLString *xy_strings; // strings in x-major order - XMLString *yx_cur1, *yx_cur2; // cursors for yxStrings list - Fonts *fonts; - XMLLinks *links; - void coalesce(); - - public: - XMLPage(unsigned int num, GfxState *state, ofstream *output, Fonts* fonts); - ~XMLPage(); - - void update_font(GfxState *state); - - void begin_string(GfxState *state, GooString *s) { - this->current_string = new XMLString(state, s, - this->current_font_size, this->fonts); - } - - void draw_char(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen); - - void end_string(); - - void end(); - - void add_link(XMLLink *t) { this->links->push_back(t); } - - unsigned int number() const { return this->num; } -}; - -class XMLOutputDev : public OutputDev { - public: - XMLOutputDev(PDFDoc *doc); - virtual ~XMLOutputDev(); - //---- get info about output device - - // Does this device use upside-down coordinates? - // (Upside-down means (0,0) is the top left corner of the page.) - virtual GBool upsideDown() { return gTrue; } - - // Does this device use drawChar() or drawString()? - virtual GBool useDrawChar() { return gTrue; } - - // Does this device use beginType3Char/endType3Char? Otherwise, - // text in Type 3 fonts will be drawn with drawChar/drawString. - virtual GBool interpretType3Chars() { return gFalse; } - - // Does this device need non-text content? - virtual GBool needNonText() { return gTrue; } - - //----- initialization and control - - virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI, - int rotate, GBool useMediaBox, GBool crop, - int sliceX, int sliceY, int sliceW, int sliceH, - GBool printing, Catalog * catalogA, - GBool (* abortCheckCbk)(void *data) = NULL, - void * abortCheckCbkData = NULL) - { - this->catalog = catalogA; - return gTrue; - } - - - // Start a page. - virtual void startPage(int page_num, GfxState *state) { - this->current_page = new XMLPage(page_num, state, this->output, this->fonts); - } - - - // End a page. - virtual void endPage(); - - //----- update text state - virtual void updateFont(GfxState *state) {current_page->update_font(state);} - - //----- text drawing - virtual void beginString(GfxState *state, GooString *s) { - this->current_page->begin_string(state, s); - } - virtual void endString(GfxState *state) { - this->current_page->end_string(); - } - virtual void drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen) { - this->current_page->draw_char(state, x, y, dx, dy, originX, - originY, code, nBytes, u, uLen); - } - - virtual void drawImageMask(GfxState *state, Object *ref, - Stream *str, - int width, int height, GBool invert, - GBool interpolate, GBool inlineImg); - virtual void drawImage(GfxState *state, Object *ref, Stream *str, - int width, int height, GfxImageColorMap *colorMap, - GBool interpolate, int *maskColors, GBool inlineImg); - - //new feature - virtual int DevType() {return 1234;} - - private: - XMLPage *current_page; - ofstream *output; // xml file - Fonts *fonts; - Catalog *catalog; - XMLImages *images; - PDFDoc *doc; - - void process_link(AnnotLink* link); - -}; -}