From 9c74cd945fb3548d97b673116aa2ecbbfa0943f6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 21 Feb 2010 10:48:24 -0500
Subject: [PATCH 1/5] Implement bug #4971: Support reading of PDF PDB files.

---
 src/calibre/ebooks/oeb/iterator.py     |  2 +-
 src/calibre/ebooks/pdb/__init__.py     |  4 ++-
 src/calibre/ebooks/pdb/pdf/__init__.py |  0
 src/calibre/ebooks/pdb/pdf/reader.py   | 38 ++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/ebooks/pdb/pdf/__init__.py
 create mode 100644 src/calibre/ebooks/pdb/pdf/reader.py

diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index 8959d62fac..d09c49ebeb 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -177,7 +177,7 @@ class EbookIterator(object):
                 plumber.opts, plumber.input_fmt, self.log,
                 {}, self.base)
 
-        if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
+        if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
                 not hasattr(self.pathtoopf, 'manifest'):
             self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                     plumber.input_plugin)
diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py
index 54f3826470..092c8a21bd 100644
--- a/src/calibre/ebooks/pdb/__init__.py
+++ b/src/calibre/ebooks/pdb/__init__.py
@@ -11,12 +11,14 @@ class PDBError(Exception):
 from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
+from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
 
 FORMAT_READERS = {
     'PNPdPPrs': ereader_reader,
     'PNRdPPrs': ereader_reader,
     'zTXTGPlm': ztxt_reader,
     'TEXtREAd': palmdoc_reader,
+    '.pdfADBE': pdf_reader,
 }
 
 from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@@ -34,8 +36,8 @@ IDENTITY_TO_NAME = {
     'PNRdPPrs': 'eReader',
     'zTXTGPlm': 'zTXT',
     'TEXtREAd': 'PalmDOC',
-
     '.pdfADBE': 'Adobe Reader',
+
     'BVokBDIC': 'BDicty',
     'DB99DBOS': 'DB (Database program)',
     'vIMGView': 'FireViewer (ImageViewer)',
diff --git a/src/calibre/ebooks/pdb/pdf/__init__.py b/src/calibre/ebooks/pdb/pdf/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py
new file mode 100644
index 0000000000..913d06f634
--- /dev/null
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import cStringIO
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ptempfile import TemporaryFile
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.header = header
+        self.stream = stream
+        self.log = log
+        self.options = options
+        setattr(self.options, 'new_pdf_engine', False)
+        setattr(self.options, 'no_images', False)
+        setattr(self.options, 'unwrap_factor', 0.5)
+
+    def extract_content(self, output_dir):
+        self.log.info('Extracting PDF...')
+
+        with TemporaryFile() as pdf_n:
+            pdf = open(pdf_n, 'rw+b')
+            for x in xrange(self.header.section_count()):
+                pdf.write(self.header.section_data(x))
+
+            from calibre.customize.ui import plugin_for_input_format
+            pdf.seek(0)
+            return plugin_for_input_format('pdf').convert(pdf, self.options,
+                'pdf', self.log, [])

From 0ef1d8f5f764979189bf66e69edc3dd892671771 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 21 Feb 2010 11:03:32 -0500
Subject: [PATCH 2/5] Remove debugging print.

---
 src/calibre/gui2/ui.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 756ac113dc..b741e18f89 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -991,7 +991,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
             self.library_view.model().current_changed(current_idx, current_idx)
 
     def __add_filesystem_book(self, paths, allow_device=True):
-        print 222, paths
         if isinstance(paths, basestring):
             paths = [paths]
         books = [path for path in map(os.path.abspath, paths) if os.access(path,

From 5fd1703e30e2fd1681f2be219884879e29d34034 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Feb 2010 11:36:59 -0700
Subject: [PATCH 3/5] ...

---
 src/calibre/manual/faq.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index bafc13f388..1d44c004fd 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -180,7 +180,7 @@ Why is my device not detected in linux?
     
     grep SYSFS_DEPRECATED /boot/config-`uname -r`
 
-You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``. 
+You should see something like ``CONFIG_SYSFS_DEPRECATED_V2 is not set``. If you don't you have to either recompile your kernel with the correct setting, or upgrade your linux distro to a more modern version, where this will not be set.
 
 Library Management
 ------------------

From 6b04e572752326ccf840a8263dcfa07236f3f137 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Feb 2010 12:52:41 -0700
Subject: [PATCH 4/5] Fix #4973 (Improvement for chm conversion - remove br tag
 at top of page)

---
 src/calibre/ebooks/chm/input.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py
index ecb54dffdb..3b08854532 100644
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@@ -4,11 +4,11 @@ __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                  ' and Alex Bramley <a.bramley at gmail.com>.'
 
-import os, shutil, uuid
+import os, shutil, uuid, re
 from tempfile import mkdtemp
 from mimetypes import guess_type as guess_mimetype
 
-from BeautifulSoup import BeautifulSoup
+from BeautifulSoup import BeautifulSoup, NavigableString
 from lxml import html
 from pychm.chm import CHMFile
 from pychm.chmlib import (
@@ -29,6 +29,17 @@ def match_string(s1, s2_already_lowered):
             return True
     return False
 
+def check_all_prev_empty(tag):
+    if tag is None:
+        return True
+    if tag.__class__ == NavigableString and not check_empty(tag):
+        return False
+    return check_all_prev_empty(tag.previousSibling)
+
+def check_empty(s, rex = re.compile(r'\S')):
+    return rex.search(s) is None
+
+
 def option_parser():
     parser = OptionParser(usage=_('%prog [options] mybook.chm'))
     parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
@@ -155,6 +166,12 @@ class CHMReader(CHMFile):
         # for some very odd reason each page's content appears to be in a table
         # too. and this table has sub-tables for random asides... grr.
 
+        # remove br at top of page if present after nav bars removed
+        br = soup('br')
+        if br:
+            if check_all_prev_empty(br[0].previousSibling):
+                br[0].extract()
+
         # some images seem to be broken in some chm's :/
         for img in soup('img'):
             try:

From eae6bf3608f7fc9b1eaae2fb0f6009f3313d9a4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Feb 2010 13:33:49 -0700
Subject: [PATCH 5/5] MOBI metadata: Do not try to extarct embedded metadata
 from MOBI files larger than 4MB

---
 src/calibre/ebooks/mobi/reader.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index ae175f1493..ed61dbd719 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -10,6 +10,7 @@ import re
 import struct
 import textwrap
 import cStringIO
+import sys
 
 try:
     from PIL import Image as PILImage
@@ -806,13 +807,20 @@ def get_metadata(stream):
             if mh.exth.mi is not None:
                 mi = mh.exth.mi
         else:
-            with TemporaryDirectory('_mobi_meta_reader') as tdir:
-                with CurrentDir(tdir):
-                    mr = MobiReader(stream, log)
-                    parse_cache = {}
-                    mr.extract_content(tdir, parse_cache)
-                    if mr.embedded_mi is not None:
-                        mi = mr.embedded_mi
+            size = sys.maxint
+            if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+                pos = stream.tell()
+                stream.seek(0, 2)
+                size = stream.tell()
+                stream.seek(pos)
+            if size < 4*1024*1024:
+                with TemporaryDirectory('_mobi_meta_reader') as tdir:
+                    with CurrentDir(tdir):
+                        mr = MobiReader(stream, log)
+                        parse_cache = {}
+                        mr.extract_content(tdir, parse_cache)
+                        if mr.embedded_mi is not None:
+                            mi = mr.embedded_mi
         if hasattr(mh.exth, 'cover_offset'):
             cover_index = mh.first_image_index + mh.exth.cover_offset
             data  = mh.section_data(int(cover_index))