diff --git a/setup/build_environment.py b/setup/build_environment.py index e7b40449c3..46661d79a0 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, socket, struct, subprocess +import os, socket, struct, subprocess, glob from distutils.spawn import find_executable from PyQt4 import pyqtconfig @@ -128,8 +128,9 @@ if iswindows: elif isosx: fc_inc = '/sw/include/fontconfig' fc_lib = '/sw/lib' + poppler = glob.glob('/sw/build/poppler-*')[-1] poppler_inc_dirs = consolidate('POPPLER_INC_DIR', - '/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5') + '{0}/poppler:{0}'.format(poppler)) poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', '/sw/lib') poppler_libs = ['poppler'] diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 7efae423e9..49ea1868f5 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -32,7 +32,7 @@ binary_includes = [ '/lib/libz.so.1', '/usr/lib/libtiff.so.5', '/lib/libbz2.so.1', - '/usr/lib/libpoppler.so.7', + '/usr/lib/libpoppler.so.25', '/usr/lib/libxml2.so.2', '/usr/lib/libopenjpeg.so.2', '/usr/lib/libxslt.so.1', diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 3c694b706e..9584c80b67 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -385,7 +385,7 @@ class Py2App(object): @flush def add_poppler(self): info('\nAdding poppler') - for x in ('libpoppler.7.dylib',): + for x in ('libpoppler.25.dylib',): self.install_dylib(os.path.join(SW, 'lib', x)) self.install_dylib(os.path.join(SW, 'bin', 'pdftohtml'), False) diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 49fcd95cc1..c2e7dca8eb 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -295,7 +295,7 @@ NOTE: poppler must be built as a static library, unless you build the qt4 bindin Now do the same for the pdftohtml project -cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/*.exe ../../bin/ +cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/pdftohtml.exe ../../bin/ podofo diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index 4916ecc6c3..7c5e852b24 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal , ' \ '2009, John Schember ' __docformat__ = 'restructuredtext en' -import errno, os, sys, subprocess, shutil +import errno, os, sys, subprocess, shutil, re from functools import partial from calibre.ebooks import ConversionError, DRMError @@ -96,8 +96,36 @@ def pdftohtml(output_dir, pdf_path, no_images): with open(index, 'r+b') as i: raw = i.read() + raw = flip_images(raw) raw = '\n' + raw i.seek(0) i.truncate() - i.write(raw) + # versions of pdftohtml >= 0.20 output self closing
tags, this + # breaks the pdf heuristics regexps, so replace them + i.write(raw.replace(b'
', b'
')) + +def flip_image(img, flip): + from calibre.utils.magick import Image + im = Image() + im.open(img) + if b'x' in flip: + im.flip(True) + if b'y' in flip: + im.flip() + im.save(img) + +def flip_images(raw): + for match in re.finditer(b']+/?>', raw): + img = match.group() + m = re.search(br'class="(x|y|xy)flip"', img) + if m is None: continue + flip = m.group(1) + src = re.search(br'src="([^"]+)"', img) + if src is None: continue + img = src.group(1) + if not os.path.exists(img): continue + print ('Flipping image %s: %s'%(img, flip)) + flip_image(img, flip) + raw = re.sub(br'\s*', b'', raw, flags=re.I|re.DOTALL) + return raw diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index f6fe04dd90..5eaca5a469 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -669,7 +669,6 @@ Some limitations of PDF input are: * Complex, multi-column, and image based documents are not supported. * Extraction of vector images and tables from within the document is also not supported. * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF. - * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. * Links and Tables of Contents are not supported * PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters * Some PDFs are made up of photographs of the page with OCRed text behind them. In such cases |app| uses the OCRed text, which can be very different from what you see when you view the PDF file diff --git a/src/calibre/utils/magick/magick.c b/src/calibre/utils/magick/magick.c index 58954fb903..e14c966282 100644 --- a/src/calibre/utils/magick/magick.c +++ b/src/calibre/utils/magick/magick.c @@ -909,6 +909,24 @@ magick_Image_rotate(magick_Image *self, PyObject *args, PyObject *kwargs) { } // }}} +// Image.rotate {{{ + +static PyObject * +magick_Image_flip(magick_Image *self, PyObject *args, PyObject *kwargs) { + PyObject *obj = NULL; + MagickBooleanType ret = 0; + + NULL_CHECK(NULL) + + if (!PyArg_ParseTuple(args, "|O", &obj)) return NULL; + ret = (obj != NULL && PyObject_IsTrue(obj)) ? MagickFlopImage(self->wand) : MagickFlipImage(self->wand); + if (!ret) { PyErr_SetString(PyExc_ValueError, "Failed to flip image"); return NULL; } + + Py_RETURN_NONE; +} +// }}} + + // Image.set_page {{{ static PyObject * @@ -1174,6 +1192,10 @@ static PyMethodDef magick_Image_methods[] = { {"rotate", (PyCFunction)magick_Image_rotate, METH_VARARGS, "rotate(background_pixel_wand, degrees) \n\n Rotate image by specified degrees." }, + {"flip", (PyCFunction)magick_Image_flip, METH_VARARGS, + "flip(horizontal=False) \n\n Flip image about a vertical axis. If horizontal is True, flip about horizontal axis instead." + }, + {"normalize", (PyCFunction)magick_Image_normalize, METH_VARARGS, "normalize() \n\n enhances the contrast of a color image by adjusting the pixels color to span the entire range of colors available."