Merge upstream changes

2025-07-09 03:04:10 -04:00 · 2009-01-17 11:17:48 -05:00 · 2009-01-17 11:17:48 -05:00 · 7449870919
commit 7449870919
parent e8b919db81 35fce76007
84 changed files with 28093 additions and 18947 deletions
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.126'
+__version__   = '0.4.128'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -43,7 +43,11 @@ def update_module(mod, path):
        zp = os.path.join(os.path.dirname(sys.executable), 'library.zip')
    elif isosx:
        zp = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')),
-                            'Resources', 'lib', 'python2.5', 'site-packages.zip')
+                            'Resources', 'lib', 
+                            'python'+'.'.join(map(str, sys.version_info[:2])), 
+                            'site-packages.zip')
+    else:
+        zp = os.path.join(getattr(sys, 'frozen_path'), 'loader.zip')
    if zp is not None:
        update_zipfile(zp, mod, path)
    else:
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@ -9,31 +9,26 @@ import os, fnmatch
 from calibre.devices.usbms.driver import USBMS

 class CYBOOKG3(USBMS):
-    MIME_MAP   = { 
-                'mobi' : 'application/mobi',
-                'prc' : 'application/prc',
-                'html' : 'application/html', 
-                'pdf' : 'application/pdf',  
-                'rtf' : 'application/rtf', 
-                'txt' : 'text/plain',
-              }
    # Ordered list of supported formats
-    FORMATS     = MIME_MAP.keys()
+    # Be sure these have an entry in calibre.devices.mime
+    FORMATS     = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
    
    VENDOR_ID   = 0x0bda
    PRODUCT_ID  = 0x0703
    BCD         = [0x110, 0x132]
    
    VENDOR_NAME = 'BOOKEEN'
-    PRODUCT_NAME = 'CYBOOK_GEN3'
+    WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
+    WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
    
-    OSX_NAME_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
-    OSX_NAME_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
+    OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
+    OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
    
    MAIN_MEMORY_VOLUME_LABEL  = 'Cybook Gen 3 Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
    
-    EBOOK_DIR = "eBooks"
+    EBOOK_DIR_MAIN = "eBooks"
+    SUPPORTS_SUB_DIRS = True

    def delete_books(self, paths, end_session=True):
        for path in paths:
@ -52,3 +47,8 @@ class CYBOOKG3(USBMS):
                    for filen in fnmatch.filter(files, filename + "*.t2b"):
                        os.unlink(os.path.join(p, filen))

+                try:
+                    os.removedirs(os.path.dirname(path))
+                except:
+                    pass
+
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -41,6 +41,20 @@ class Device(object):
        '''Return the FDI description of this device for HAL on linux.'''
        return ''
    
+    @classmethod
+    def can_handle(cls, device_info):
+        '''
+        Optional method to perform further checks on a device to see if this driver
+        is capable of handling it. If it is not it should return False. This method
+        is only called after the vendor, product ids and the bcd have matched, so
+        it can do some relatively time intensive checks. The default implementation
+        returns True.
+        
+        :param device_info: On windows a device ID string. On Unix a tuple of 
+        ``(vendor_id, product_id, bcd)``. 
+        '''
+        return True
+    
    def open(self):
        '''
        Perform any device specific initialization. Called after the device is
@ -109,7 +123,8 @@ class Device(object):
        """    
        raise NotImplementedError()
    
-    def upload_books(self, files, names, on_card=False, end_session=True):
+    def upload_books(self, files, names, on_card=False, end_session=True,
+                     metadata=None):
        '''
        Upload a list of books to the device. If a file already
        exists on the device, it should be replaced.
@ -121,6 +136,10 @@ class Device(object):
        once uploaded to the device. len(names) == len(files)
        @return: A list of 3-element tuples. The list is meant to be passed 
        to L{add_books_to_metadata}.
+        @param metadata: If not None, it is a list of dictionaries. Each dictionary 
+        will have at least the key tags to allow the driver to choose book location
+        based on tags. len(metadata) == len(files). If your device does not support
+        hierarchical ebook folders, you can safely ignore this parameter.
        '''
        raise NotImplementedError()
    
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -9,24 +9,30 @@ import os, fnmatch
 from calibre.devices.usbms.driver import USBMS

 class KINDLE(USBMS):
-    MIME_MAP   = { 
-                'azw' : 'application/azw',
-                'mobi' : 'application/mobi',
-                'prc' : 'application/prc',
-                'txt' : 'text/plain',
-              }
    # Ordered list of supported formats
-    FORMATS     = MIME_MAP.keys()
+    FORMATS     = ['azw', 'mobi', 'prc', 'txt']
    
    VENDOR_ID   = 0x1949
    PRODUCT_ID  = 0x0001
-    BCD         = 0x399
+    BCD         = [0x399]
    
    VENDOR_NAME = 'AMAZON'
-    PRODUCT_NAME = 'KINDLE'
+    WINDOWS_MAIN_MEM = 'KINDLE'
    
    MAIN_MEMORY_VOLUME_LABEL  = 'Kindle Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
    
-    EBOOK_DIR = "documents"
+    EBOOK_DIR_MAIN = "documents"
+
+    def delete_books(self, paths, end_session=True):
+        for path in paths:
+            if os.path.exists(path):
+                os.unlink(path)
+                
+                filepath, ext = os.path.splitext(path)
+                basepath, filename = os.path.split(filepath)
+                
+                # Delete the ebook auxiliary file
+                if os.path.exists(filepath + '.mbp'):
+                    os.unlink(filepath + '.mbp')

--- a/src/calibre/devices/mime.py
+++ b/src/calibre/devices/mime.py
@ -0,0 +1,19 @@
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john at nachtimwald.com>'
+'''
+Global Mime mapping of ebook types.
+'''
+
+MIME_MAP   = {
+                'azw'  : 'application/azw',
+                'epub' : 'application/epub+zip',
+                'html' : 'text/html',
+                'lrf'  : 'application/x-sony-bbeb',
+                'lrx'  : 'application/x-sony-bbeb',
+                'mobi' : 'application/mobi',
+                'pdf'  : 'application/pdf',
+                'prc'  : 'application/prc',
+                'rtf'  : 'application/rtf',
+                'txt'  : 'text/plain',
+              }
+
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@ -841,7 +841,8 @@ class PRS500(Device):
            self.upload_book_list(booklists[1], end_session=False)
    
    @safe
-    def upload_books(self, files, names, on_card=False, end_session=True):
+    def upload_books(self, files, names, on_card=False, end_session=True, 
+                     metadata=None):
        card = self.card(end_session=False)
        prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/'
        if on_card and not self._exists(prefix)[0]:
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -407,7 +407,8 @@ class PRS505(Device):
        if not os.path.isdir(path):
            os.utime(path, None)
            
-    def upload_books(self, files, names, on_card=False, end_session=True):
+    def upload_books(self, files, names, on_card=False, end_session=True, 
+                     metadata=None):
        if on_card and not self._card_prefix:
            raise ValueError(_('The reader has no storage card connected.'))
        path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \
--- a/src/calibre/devices/scanner.py
+++ b/src/calibre/devices/scanner.py
@ -60,15 +60,18 @@ class DeviceScanner(object):
    def is_device_connected(self, device):
        if iswindows:
            vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
+            vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
            for device_id in self.devices:
-                if vid in device_id and pid in device_id:
+                if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
                    if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
-                        return True
+                        if device.can_handle(device_id):
+                            return True
        else:
            for vendor, product, bcdDevice in self.devices:
                if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
                    if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
-                        return True
+                        if device.can_handle((vendor, product, bcdDevice)):
+                            return True
        return False


--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -6,7 +6,7 @@ intended to be subclassed with the relevant parts implemented for a particular
 device. This class handles devive detection.
 '''

-import os, time
+import os, subprocess, time

 from calibre.devices.interface import Device as _Device
 from calibre.devices.errors import DeviceError
@ -23,11 +23,12 @@ class Device(_Device):
    PRODUCT_ID  = 0x0
    BCD         = None
    
-    VENDOR_NAME = ''
-    PRODUCT_NAME = ''
+    VENDOR_NAME = None
+    WINDOWS_MAIN_MEM = None
+    WINDOWS_CARD_MEM = None
    
-    OSX_NAME_MAIN_MEM = ''
-    OSX_NAME_CARD_MEM = ''
+    OSX_MAIN_MEM = None
+    OSX_CARD_MEM = None
    
    MAIN_MEMORY_VOLUME_LABEL  = ''
    STORAGE_CARD_VOLUME_LABEL = ''
@ -148,43 +149,47 @@ class Device(_Device):
                
        return (msz, 0, csz)

-    @classmethod
-    def windows_match_device(cls, device_id):
-        device_id = device_id.upper()
-        if 'VEN_'+cls.VENDOR_NAME in device_id and \
-               'PROD_'+cls.PRODUCT_NAME in device_id:
-            return True
-        vid, pid = hex(cls.VENDOR_ID)[2:], hex(cls.PRODUCT_ID)[2:]        
-        while len(vid) < 4: vid = '0' + vid
-        while len(pid) < 4: pid = '0' + pid        
-        if 'VID_'+vid in device_id and 'PID_'+pid in device_id:
-            return True
+    def windows_match_device(self, pnp_id, device_id):
+        pnp_id = pnp_id.upper()
+        
+        if device_id and pnp_id is not None:
+            device_id = device_id.upper()
+            
+            if 'VEN_' + self.VENDOR_NAME in pnp_id and 'PROD_' + device_id in pnp_id:
+                return True
+
        return False

-    # This only supports Windows >= 2000
+    def windows_get_drive_prefix(self, drive):
+        prefix = None
+        
+        try:
+            partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
+            logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
+            prefix = logical_disk.DeviceID + os.sep
+        except IndexError:
+            pass
+            
+        return prefix
+
    def open_windows(self):
-        drives = []
+        drives = {}
        wmi = __import__('wmi', globals(), locals(), [], -1) 
        c = wmi.WMI()
        for drive in c.Win32_DiskDrive():
-            if self.__class__.windows_match_device(str(drive.PNPDeviceID)):
-                if drive.Partitions == 0:
-                    continue
-                try:
-                    partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
-                    logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
-                    prefix = logical_disk.DeviceID+os.sep
-                    drives.append((drive.Index, prefix))
-                except IndexError:
-                    continue
+            if self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_MAIN_MEM):
+                drives['main'] = self.windows_get_drive_prefix(drive)
+            elif self.windows_match_device(str(drive.PNPDeviceID), WINDOWS_CARD_MEM):
+                drives['card'] = self.windows_get_drive_prefix(drive)
+                
+            if 'main' and 'card' in drives.keys():
+                break
                
        if not drives:
-            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
-        
-        drives.sort(cmp=lambda a, b: cmp(a[0], b[0]))
-        self._main_prefix = drives[0][1]
-        if len(drives) > 1:
-            self._card_prefix = drives[1][1]
+            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.') % self.__class__.__name__)
+            
+        self._main_prefix = drives['main'] if 'main' in names.keys() else None
+        self._card_prefix = drives['card'] if 'card' in names.keys() else None

    @classmethod
    def get_osx_mountpoints(self, raw=None):
@ -207,9 +212,9 @@ class Device(_Device):
                    break
                    
        for i, line in enumerate(lines):
-            if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_MAIN_MEM in line:
+            if self.OSX_MAIN_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_MAIN_MEM in line:
                get_dev_node(lines[i+1:], 'main')
-            if line.strip().endswith('<class IOMedia>') and self.OSX_NAME_CARD_MEM in line:
+            if self.OSX_CARD_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_MEM in line:
                get_dev_node(lines[i+1:], 'card')
            if len(names.keys()) == 2:
                break
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -12,11 +12,13 @@ from itertools import cycle
 from calibre.devices.usbms.device import Device
 from calibre.devices.usbms.books import BookList, Book
 from calibre.devices.errors import FreeSpaceError
+from calibre.devices.mime import MIME_MAP

 class USBMS(Device):
-    EBOOK_DIR = ''
-    MIME_MAP = {}
    FORMATS = []
+    EBOOK_DIR_MAIN = ''
+    EBOOK_DIR_CARD = ''
+    SUPPORTS_SUB_DIRS = False

    def __init__(self, key='-1', log_packets=False, report_progress=None):
        pass
@ -35,29 +37,39 @@ class USBMS(Device):
            return bl

        prefix = self._card_prefix if oncard else self._main_prefix
+        ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
        
-        # Get all books in all directories under the root EBOOK_DIR directory
-        for path, dirs, files in os.walk(os.path.join(prefix, self.EBOOK_DIR)):
+        # Get all books in all directories under the root ebook_dir directory
+        for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
            # Filter out anything that isn't in the list of supported ebook types
-            for book_type in self.MIME_MAP.keys():
+            for book_type in self.FORMATS:
                for filename in fnmatch.filter(files, '*.%s' % (book_type)):
                    title, author, mime = self.__class__.extract_book_metadata_by_filename(filename)
                    
                    bl.append(Book(os.path.join(path, filename), title, author, mime))
        return bl
    
-    def upload_books(self, files, names, on_card=False, end_session=True):
+    def upload_books(self, files, names, on_card=False, end_session=True, 
+                     metadata=None):
        if on_card and not self._card_prefix:
            raise ValueError(_('The reader has no storage card connected.'))
            
        if not on_card:
-            path = os.path.join(self._main_prefix, self.EBOOK_DIR)
+            path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
        else:
-            path = os.path.join(self._card_prefix, self.EBOOK_DIR)
-            
-        sizes = map(os.path.getsize, files)
+            path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
+
+        def get_size(obj):
+            if hasattr(obj, 'seek'):
+                obj.seek(0, os.SEEK_END)
+                size = obj.tell()
+                obj.seek(0)
+                return size
+            return os.path.getsize(obj)
+
+        sizes = map(get_size, files)
        size = sum(sizes)
-    
+
        if on_card and size > self.free_space()[2] - 1024*1024: 
            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
        if not on_card and size > self.free_space()[0] - 2*1024*1024: 
@ -65,17 +77,42 @@ class USBMS(Device):

        paths = []
        names = iter(names)
+        metadata = iter(metadata)
        
        for infile in files:
-            filepath = os.path.join(path, names.next())
+            newpath = path
+            
+            if self.SUPPORTS_SUB_DIRS:
+                mdata = metadata.next()
+                
+                if 'tags' in mdata.keys():
+                    for tag in mdata['tags']:
+                        if tag.startswith('/'):
+                            newpath += tag
+                            newpath = os.path.normpath(newpath)
+                            break
+
+            if not os.path.exists(newpath):
+                os.makedirs(newpath)
+            
+            filepath = os.path.join(newpath, names.next())                
            paths.append(filepath)
            
-            shutil.copy2(infile, filepath)
+            if hasattr(infile, 'read'):
+                infile.seek(0)
+                
+                dest = open(filepath, 'wb')
+                shutil.copyfileobj(infile, dest, 10*1024*1024)
+
+                dest.flush()                
+                dest.close()
+            else:
+                shutil.copy2(infile, filepath)
    
        return zip(paths, cycle([on_card]))
    
    @classmethod
-    def add_books_to_metadata(cls, locations, metadata, booklists):
+    def add_books_to_metadata(cls, locations, metadata, booklists):    
        for location in locations:
            path = location[0]
            on_card = 1 if location[1] else 0
@ -88,6 +125,10 @@ class USBMS(Device):
            if os.path.exists(path):
                # Delete the ebook
                os.unlink(path)
+                try:
+                    os.removedirs(os.path.dirname(path))
+                except:
+                    pass
    
    @classmethod
    def remove_books_from_metadata(cls, paths, booklists):
@ -96,7 +137,6 @@ class USBMS(Device):
                for book in bl:
                    if path.endswith(book.path):
                        bl.remove(book)
-                        break
        
    def sync_booklists(self, booklists, end_session=True):
        # There is no meta data on the device to update. The device is treated
@ -136,10 +176,11 @@ class USBMS(Device):
        else:
            book_title = os.path.splitext(filename)[0].replace('_', ' ')
           
-        fileext = os.path.splitext(filename)[1]
-        if fileext in cls.MIME_MAP.keys():
-            book_mime = cls.MIME_MAP[fileext]
-            
+        fileext = os.path.splitext(filename)[1][1:]
+
+        if fileext in cls.FORMATS:
+            book_mime = MIME_MAP[fileext] if fileext in MIME_MAP.keys() else 'Unknown'
+
        return book_title, book_author, book_mime

 # ls, rm, cp, mkdir, touch, cat
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -67,6 +67,7 @@ def txt2opf(path, tdir, opts):
 def pdf2opf(path, tdir, opts):
    from calibre.ebooks.lrf.pdf.convert_from import generate_html
    generate_html(path, tdir)
+    opts.dont_split_on_page_breaks = True
    return os.path.join(tdir, 'metadata.opf')

 def epub2opf(path, tdir, opts):
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print):
                html_files.append(os.path.abspath(content(f)))
        
        for path in html_files:
+            if not os.access(path, os.R_OK):
+                continue
            base = os.path.dirname(path)
            root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
            for element, attribute, link, pos in list(root.iterlinks()):
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -335,7 +335,7 @@ class PreProcessor(object):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
@ -560,7 +560,7 @@ class Processor(Parser):
                hr = etree.Element('hr')
                if elem.getprevious() is None:
                    elem.getparent()[:0] = [hr]
-                else:
+                elif elem.getparent() is not None:
                    insert = None
                    for i, c in enumerate(elem.getparent()):
                        if c is elem:
@ -796,7 +796,19 @@ class Processor(Parser):
                setting = ''
            face = font.attrib.pop('face', None)
            if face is not None:
-                setting += 'font-face:%s;'%face
+                faces = []
+                for face in face.split(','):
+                    face = face.strip()
+                    if ' ' in face and not (face[0] == face[-1] == '"'):
+                        face = '"%s"' % face.replace('"', r'\"')
+                    faces.append(face)
+                for generic in ('serif', 'sans-serif', 'monospace'):
+                    if generic in faces:
+                        break
+                else:
+                    faces.append('serif')
+                family = ', '.join(faces)
+                setting += 'font-family: %s;' % family
            color = font.attrib.pop('color', None)
            if color is not None:
                setting += 'color:%s'%color
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,24 +7,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import sys, struct, os
+import sys, struct, cStringIO, os
 import functools
 import re
 from urlparse import urldefrag
-from cStringIO import StringIO
-from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
-from calibre.ebooks.oeb.base import XML_PARSER, urlnormalize
+from calibre.ebooks.oeb.base import urlnormalize
 from calibre.ebooks import DRMError
 from calibre import plugins
 lzx, lxzerror = plugins['lzx']
 msdes, msdeserror = plugins['msdes']

-__all__ = ["LitReader"]
-
 XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 """
 OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -112,9 +108,6 @@ def consume_sized_utf8_string(bytes, zpad=False):
        pos += 1
    return u''.join(result), bytes[pos:]

-def encode(string):
-    return unicode(string).encode('ascii', 'xmlcharrefreplace')
-
 class UnBinary(object):
    AMPERSAND_RE = re.compile(
        r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -125,13 +118,13 @@ class UnBinary(object):
    def __init__(self, bin, path, manifest={}, map=HTML_MAP):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
-        self.is_html = map is HTML_MAP
+        self.opf = map is OPF_MAP
+        self.bin = bin
        self.dir = os.path.dirname(path)
-        buf = StringIO()
-        self.binary_to_text(bin, buf)
-        self.raw = buf.getvalue().lstrip()
+        self.buf = cStringIO.StringIO()
+        self.binary_to_text()
+        self.raw = self.buf.getvalue().lstrip().decode('utf-8')
        self.escape_reserved()
-        self._tree = None

    def escape_reserved(self):
        raw = self.raw
@ -158,28 +151,18 @@ class UnBinary(object):
        return '/'.join(relpath)
    
    def __unicode__(self):
-        return self.raw.decode('utf-8')
-
-    def __str__(self):
        return self.raw
-
-    def tree():
-        def fget(self):
-            if not self._tree:
-                self._tree = etree.fromstring(self.raw, parser=XML_PARSER)
-            return self._tree
-        return property(fget=fget)
-    tree = tree()
    
-    def binary_to_text(self, bin, buf, index=0, depth=0):
+    def binary_to_text(self, base=0, depth=0):
        tag_name = current_map = None
        dynamic_tag = errors = 0
        in_censorship = is_goingdown = False
        state = 'text'
+        index = base
        flags = 0
        
-        while index < len(bin):
-            c, index = read_utf8_char(bin, index)
+        while index < len(self.bin):
+            c, index = read_utf8_char(self.bin, index)
            oc = ord(c)
            
            if state == 'text':
@ -192,7 +175,7 @@ class UnBinary(object):
                    c = '>>'
                elif c == '<':
                    c = '<<'
-                buf.write(encode(c))
+                self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
            
            elif state == 'get flags':
                if oc == 0:
@ -205,7 +188,7 @@ class UnBinary(object):
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
                    tag = oc
-                    buf.write('<')
+                    self.buf.write('<')
                    if not (flags & FLAG_CLOSING):
                        is_goingdown = True
                    if tag == 0x8000:
@ -222,7 +205,7 @@ class UnBinary(object):
                        tag_name = '?'+unichr(tag)+'?'
                        current_map = self.tag_to_attr_map[tag]
                        print 'WARNING: tag %s unknown' % unichr(tag)
-                    buf.write(encode(tag_name))
+                    self.buf.write(unicode(tag_name).encode('utf-8'))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
                        raise LitError('Extra closing tag')
@ -234,14 +217,15 @@ class UnBinary(object):
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
-                        buf.write(' />')
+                        self.buf.write(' />')
                    else:
-                        buf.write('>')
-                        index = self.binary_to_text(bin, buf, index, depth+1)
+                        self.buf.write('>')
+                        index = self.binary_to_text(base=index, depth=depth+1)
                        is_goingdown = False
                        if not tag_name:
                            raise LitError('Tag ends before it begins.')
-                        buf.write(encode(u''.join(('</', tag_name, '>'))))
+                        self.buf.write(u''.join(
+                                ('</', tag_name, '>')).encode('utf-8'))
                        dynamic_tag = 0
                        tag_name = None
                    state = 'text'
@ -261,7 +245,7 @@ class UnBinary(object):
                        in_censorship = True
                        state = 'get value length'
                        continue
-                    buf.write(' ' + encode(attr) + '=')
+                    self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
                    if attr in ['href', 'src']:
                        state = 'get href length'
                    else:
@ -269,39 +253,40 @@ class UnBinary(object):
            
            elif state == 'get value length':
                if not in_censorship:
-                    buf.write('"')
+                    self.buf.write('"')
                count = oc - 1
                if count == 0:
                    if not in_censorship:
-                        buf.write('"')
+                        self.buf.write('"')
                    in_censorship = False
                    state = 'get attr'
                    continue
                state = 'get value'
                if oc == 0xffff:
                    continue
-                if count < 0 or count > (len(bin) - index):
+                if count < 0 or count > (len(self.bin) - index):
                    raise LitError('Invalid character count %d' % count)
            
            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
-                        buf.write('%s"' % (oc - 1))
+                        self.buf.write('%s"' % (oc - 1))
                    in_censorship = False
                    state = 'get attr'
                elif count > 0:
                    if not in_censorship:
-                        buf.write(encode(c))
+                        self.buf.write(c.encode(
+                            'ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
-                        buf.write('"')
+                        self.buf.write('"')
                    in_censorship = False
                    state = 'get attr'
            
            elif state == 'get custom length':
                count = oc - 1
-                if count <= 0 or count > len(bin)-index:
+                if count <= 0 or count > len(self.bin)-index:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
@ -311,26 +296,26 @@ class UnBinary(object):
                tag_name += c
                count -= 1
                if count == 0:
-                    buf.write(encode(tag_name))
+                    self.buf.write(unicode(tag_name).encode('utf-8'))
                    state = 'get attr'
            
            elif state == 'get attr length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(self.bin) - index):
                    raise LitError('Invalid character count %d' % count)
-                buf.write(' ')
+                self.buf.write(' ')
                state = 'get custom attr'
            
            elif state == 'get custom attr':
-                buf.write(encode(c))
+                self.buf.write(unicode(c).encode('utf-8'))
                count -= 1
                if count == 0:
-                    buf.write('=')
+                    self.buf.write('=')
                    state = 'get value length'

            elif state == 'get href length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(self.bin) - index):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
@ -344,11 +329,10 @@ class UnBinary(object):
                    if frag:
                        path = '#'.join((path, frag))
                    path = urlnormalize(path)
-                    buf.write(encode(u'"%s"' % path))
+                    self.buf.write((u'"%s"' % path).encode('utf-8'))
                    state = 'get attr'
        return index
    
-
 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
        self.name = name
@ -363,7 +347,6 @@ class DirectoryEntry(object):
    def __str__(self):
        return repr(self)

-
 class ManifestItem(object):
    def __init__(self, original, internal, mime_type, offset, root, state):
        self.original = original
@ -391,87 +374,65 @@ class ManifestItem(object):
            % (self.internal, self.path, self.mime_type, self.offset,
               self.root, self.state)

-
 def preserve(function):
    def wrapper(self, *args, **kwargs):
-        opos = self.stream.tell()
+        opos = self._stream.tell()
        try:
            return function(self, *args, **kwargs)
        finally:
-            self.stream.seek(opos)
+            self._stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
    
-class LitFile(object):
+class LitReader(object):
    PIECE_SIZE = 16
-
-    def __init__(self, filename_or_stream):
-        if hasattr(filename_or_stream, 'read'):
-            self.stream = filename_or_stream
-        else:
-            self.stream = open(filename_or_stream, 'rb')
-        try:
-            self.opf_path = os.path.splitext(
-                os.path.basename(self.stream.name))[0] + '.opf'
-        except AttributeError:
-            self.opf_path = 'content.opf'
-        if self.magic != 'ITOLITLS':
-            raise LitError('Not a valid LIT file')
-        if self.version != 1:
-            raise LitError('Unknown LIT version %d' % (self.version,))
-        self.read_secondary_header()
-        self.read_header_pieces()
-        self.read_section_names()
-        self.read_manifest()
-        self.read_drm()
-
-    def warn(self, msg):
-        print "WARNING: %s" % (msg,)
+    XML_PARSER = etree.XMLParser(
+        recover=True, resolve_entities=False)

    def magic():
        @preserve
        def fget(self):
-            self.stream.seek(0)
-            return self.stream.read(8)
+            self._stream.seek(0)
+            return self._stream.read(8)
        return property(fget=fget)
    magic = magic()
    
    def version():
        def fget(self):
-            self.stream.seek(8)
-            return u32(self.stream.read(4))
+            self._stream.seek(8)
+            return u32(self._stream.read(4))
        return property(fget=fget)
    version = version()
    
    def hdr_len():
        @preserve
        def fget(self):
-            self.stream.seek(12)
-            return int32(self.stream.read(4))
+            self._stream.seek(12)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()
    
    def num_pieces():
        @preserve
        def fget(self):
-            self.stream.seek(16)
-            return int32(self.stream.read(4))
+            self._stream.seek(16)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()
    
    def sec_hdr_len():
        @preserve
        def fget(self):
-            self.stream.seek(20)
-            return int32(self.stream.read(4))
+            self._stream.seek(20)
+            return int32(self._stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
    
    def guid():
        @preserve
        def fget(self):
-            self.stream.seek(24)
-            return self.stream.read(16)
+            self._stream.seek(24)
+            return self._stream.read(16)
        return property(fget=fget)
    guid = guid()
    
@ -481,27 +442,44 @@ class LitFile(object):
            size = self.hdr_len \
                + (self.num_pieces * self.PIECE_SIZE) \
                + self.sec_hdr_len
-            self.stream.seek(0)
-            return self.stream.read(size)
+            self._stream.seek(0)
+            return self._stream.read(size)
        return property(fget=fget)
    header = header()
    
+    def __init__(self, filename_or_stream):
+        if hasattr(filename_or_stream, 'read'):
+            self._stream = filename_or_stream
+        else:
+            self._stream = open(filename_or_stream, 'rb')
+        if self.magic != 'ITOLITLS':
+            raise LitError('Not a valid LIT file')
+        if self.version != 1:
+            raise LitError('Unknown LIT version %d' % (self.version,))
+        self.entries = {}
+        self._read_secondary_header()
+        self._read_header_pieces()
+        self._read_section_names()
+        self._read_manifest()
+        self._read_meta()
+        self._read_drm()
+
    @preserve
    def __len__(self):
-        self.stream.seek(0, 2)
-        return self.stream.tell()
+        self._stream.seek(0, 2)
+        return self._stream.tell()

    @preserve
-    def read_raw(self, offset, size):
-        self.stream.seek(offset)
-        return self.stream.read(size)
+    def _read_raw(self, offset, size):
+        self._stream.seek(offset)
+        return self._stream.read(size)

-    def read_content(self, offset, size):
-        return self.read_raw(self.content_offset + offset, size)
+    def _read_content(self, offset, size):
+        return self._read_raw(self.content_offset + offset, size)
    
-    def read_secondary_header(self):
+    def _read_secondary_header(self):
        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
-        bytes = self.read_raw(offset, self.sec_hdr_len)
+        bytes = self._read_raw(offset, self.sec_hdr_len)
        offset = int32(bytes[4:])
        while offset < len(bytes):
            blocktype = bytes[offset:offset+4]
@ -529,21 +507,21 @@ class LitFile(object):
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')
    
-    def read_header_pieces(self):
+    def _read_header_pieces(self):
        src = self.header[self.hdr_len:]
        for i in xrange(self.num_pieces):
            piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
                raise LitError('Piece %s has 64bit value' % repr(piece))
            offset, size = u32(piece), int32(piece[8:])
-            piece = self.read_raw(offset, size)
+            piece = self._read_raw(offset, size)
            if i == 0:
                continue # Dont need this piece
            elif i == 1:
                if u32(piece[8:])  != self.entry_chunklen or \
                   u32(piece[12:]) != self.entry_unknown:
                    raise LitError('Secondary header does not match piece')
-                self.read_directory(piece)
+                self._read_directory(piece)
            elif i == 2:
                if u32(piece[8:])  != self.count_chunklen or \
                   u32(piece[12:]) != self.count_unknown:
@ -554,13 +532,12 @@ class LitFile(object):
            elif i == 4:
                self.piece4_guid = piece
                
-    def read_directory(self, piece):
+    def _read_directory(self, piece):
        if not piece.startswith('IFCM'):
            raise LitError('Header piece #1 is not main directory.')
        chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
        if (32 + (num_chunks * chunk_size)) != len(piece):
-            raise LitError('IFCM header has incorrect length')
-        self.entries = {}
+            raise LitError('IFCM HEADER has incorrect length')
        for i in xrange(num_chunks):
            offset = 32 + (i * chunk_size)
            chunk = piece[offset:offset + chunk_size]
@ -594,17 +571,17 @@ class LitFile(object):
                entry = DirectoryEntry(name, section, offset, size)
                self.entries[name] = entry

-    def read_section_names(self):
+    def _read_section_names(self):
        if '::DataSpace/NameList' not in self.entries:
            raise LitError('Lit file does not have a valid NameList')
        raw = self.get_file('::DataSpace/NameList')
        if len(raw) < 4:
            raise LitError('Invalid Namelist section')
        pos = 4
-        num_sections = u16(raw[2:pos])
-        self.section_names = [""] * num_sections
-        self.section_data = [None] * num_sections
-        for section in xrange(num_sections):
+        self.num_sections = u16(raw[2:pos])
+        self.section_names = [""]*self.num_sections
+        self.section_data = [None]*self.num_sections
+        for section in xrange(self.num_sections):
            size = u16(raw[pos:pos+2])
            pos += 2
            size = size*2 + 2
@ -614,12 +591,11 @@ class LitFile(object):
                raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
            pos += size

-    def read_manifest(self):
+    def _read_manifest(self):
        if '/manifest' not in self.entries:
            raise LitError('Lit file does not have a valid manifest')
        raw = self.get_file('/manifest')
        self.manifest = {}
-        self.paths = {self.opf_path: None}
        while raw:
            slen, raw = ord(raw[0]), raw[1:]
            if slen == 0: break
@ -658,9 +634,28 @@ class LitFile(object):
        for item in mlist:
            if item.path[0] == '/':
                item.path = os.path.basename(item.path)
-            self.paths[item.path] = item

-    def read_drm(self):
+    def _pretty_print(self, xml):
+        f = cStringIO.StringIO(xml.encode('utf-8'))
+        doc = etree.parse(f, parser=self.XML_PARSER)
+        pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
+        return XML_DECL + unicode(pretty)
+                
+    def _read_meta(self):
+        path = 'content.opf'
+        raw = self.get_file('/meta')
+        xml = OPF_DECL
+        try:
+            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
+        except LitError:
+            if 'PENGUIN group' not in raw: raise
+            print "WARNING: attempting PENGUIN malformed OPF fix"
+            raw = raw.replace(
+                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
+            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
+        self.meta = xml
+
+    def _read_drm(self):
        self.drmlevel = 0
        if '/DRMStorage/Licenses/EUL' in self.entries:
            self.drmlevel = 5
@ -671,7 +666,7 @@ class LitFile(object):
        else:
            return
        if self.drmlevel < 5:
-            msdes.deskey(self.calculate_deskey(), msdes.DE1)
+            msdes.deskey(self._calculate_deskey(), msdes.DE1)
            bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
            if bookkey[0] != '\000':
                raise LitError('Unable to decrypt title key!')
@ -679,7 +674,7 @@ class LitFile(object):
        else:
            raise DRMError("Cannot access DRM-protected book")

-    def calculate_deskey(self):
+    def _calculate_deskey(self):
        hashfiles = ['/meta', '/DRMStorage/DRMSource']
        if self.drmlevel == 3:
            hashfiles.append('/DRMStorage/DRMBookplate')
@ -703,18 +698,18 @@ class LitFile(object):
    def get_file(self, name):
        entry = self.entries[name]
        if entry.section == 0:
-            return self.read_content(entry.offset, entry.size)
+            return self._read_content(entry.offset, entry.size)
        section = self.get_section(entry.section)
        return section[entry.offset:entry.offset+entry.size]

    def get_section(self, section):
        data = self.section_data[section]
        if not data:
-            data = self.get_section_uncached(section)
+            data = self._get_section(section)
            self.section_data[section] = data
        return data

-    def get_section_uncached(self, section):
+    def _get_section(self, section):
        name = self.section_names[section]
        path = '::DataSpace/Storage/' + name
        transform = self.get_file(path + '/Transform/List')
@ -726,29 +721,29 @@ class LitFile(object):
                raise LitError("ControlData is too short")
            guid = msguid(transform)
            if guid == DESENCRYPT_GUID:
-                content = self.decrypt(content)
+                content = self._decrypt(content)
                control = control[csize:]
            elif guid == LZXCOMPRESS_GUID:
                reset_table = self.get_file(
                    '/'.join(('::DataSpace/Storage', name, 'Transform',
                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
-                content = self.decompress(content, control, reset_table)
+                content = self._decompress(content, control, reset_table)
                control = control[csize:]
            else:
                raise LitError("Unrecognized transform: %s." % repr(guid))
            transform = transform[16:]
        return content

-    def decrypt(self, content):
+    def _decrypt(self, content):
        length = len(content)
        extra = length & 0x7
        if extra > 0:
-            self.warn("content length not a multiple of block size")
+            self._warn("content length not a multiple of block size")
            content += "\0" * (8 - extra)
        msdes.deskey(self.bookkey, msdes.DE1)
        return msdes.des(content)

-    def decompress(self, content, control, reset_table):
+    def _decompress(self, content, control, reset_table):
        if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
            raise LitError("Invalid ControlData tag value")
        if len(reset_table) < (RESET_INTERVAL + 8):
@ -789,7 +784,7 @@ class LitFile(object):
                        result.append(
                            lzx.decompress(content[base:size], window_bytes))
                    except lzx.LZXError:
-                        self.warn("LZX decompression error; skipping chunk")
+                        self._warn("LZX decompression error; skipping chunk")
                    bytes_remaining -= window_bytes
                    base = size
            accum += int32(reset_table[RESET_INTERVAL:])
@ -799,88 +794,55 @@ class LitFile(object):
            try:
                result.append(lzx.decompress(content[base:], bytes_remaining))
            except lzx.LZXError:
-                self.warn("LZX decompression error; skipping chunk")
+                self._warn("LZX decompression error; skipping chunk")
            bytes_remaining = 0
        if bytes_remaining > 0:
            raise LitError("Failed to completely decompress section")
        return ''.join(result)

-
-class LitReader(object):
-    def __init__(self, filename_or_stream):
-        self._litfile = LitFile(filename_or_stream)
-    
-    def namelist(self):
-        return self._litfile.paths.keys()
-
-    def exists(self, name):
-        return urlunquote(name) in self._litfile.paths
-    
-    def read_xml(self, name):
-        entry = self._litfile.paths[urlunquote(name)] if name else None
-        if entry is None:
-            content = self._read_meta()
-        elif 'spine' in entry.state:
-            internal = '/'.join(('/data', entry.internal, 'content'))
-            raw = self._litfile.get_file(internal)
-            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
-            content = unbin.tree
-        else:
-            raise LitError('Requested non-XML content as XML')
-        return content
-    
-    def read(self, name, pretty_print=False):
-        entry = self._litfile.paths[urlunquote(name)] if name else None
-        if entry is None:
-            meta = self._read_meta()
-            content = OPF_DECL + etree.tostring(
-                meta, encoding='ascii', pretty_print=pretty_print)
-        elif 'spine' in entry.state:
-            internal = '/'.join(('/data', entry.internal, 'content'))
-            raw = self._litfile.get_file(internal)
-            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
-            content = HTML_DECL
+    def get_entry_content(self, entry, pretty_print=False):
+        if 'spine' in entry.state:
+            name = '/'.join(('/data', entry.internal, 'content'))
+            path = entry.path
+            raw = self.get_file(name)
+            decl, map = (OPF_DECL, OPF_MAP) \
+                if name == '/meta' else (HTML_DECL, HTML_MAP)
+            content = decl + unicode(UnBinary(raw, path, self.manifest, map))
            if pretty_print:
-                content += etree.tostring(unbin.tree,
-                    encoding='ascii', pretty_print=True)
-            else:
-                content += str(unbin)
+                content = self._pretty_print(content)
+            content = content.encode('utf-8')
        else:
-            internal = '/'.join(('/data', entry.internal))
-            content = self._litfile.get_file(internal)
+            name = '/'.join(('/data', entry.internal))
+            content = self.get_file(name)
        return content
-    
-    def meta():
-        def fget(self):
-            return self.read(self._litfile.opf_path)
-        return property(fget=fget)
-    meta = meta()
-    
+                    
+    def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
+        output_dir = os.path.abspath(output_dir)
+        try:
+            opf_path = os.path.splitext(
+                os.path.basename(self._stream.name))[0] + '.opf'
+        except AttributeError:
+            opf_path = 'content.opf'
+        opf_path = os.path.join(output_dir, opf_path)
+        self._ensure_dir(opf_path)
+        with open(opf_path, 'wb') as f:
+            xml = self.meta
+            if pretty_print:
+                xml = self._pretty_print(xml)
+            f.write(xml.encode('utf-8'))
+        for entry in self.manifest.values():
+            path = os.path.join(output_dir, entry.path)
+            self._ensure_dir(path)
+            with open(path, 'wb') as f:
+                f.write(self.get_entry_content(entry, pretty_print))
+
    def _ensure_dir(self, path):
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
-    
-    def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
-        for name in self.namelist():
-            path = os.path.join(output_dir, name)
-            self._ensure_dir(path)
-            with open(path, 'wb') as f:
-                f.write(self.read(name, pretty_print=pretty_print))
-    
-    def _read_meta(self):
-        path = 'content.opf'
-        raw = self._litfile.get_file('/meta')
-        try:
-            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
-        except LitError:
-            if 'PENGUIN group' not in raw: raise
-            print "WARNING: attempting PENGUIN malformed OPF fix"
-            raw = raw.replace(
-                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
-            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
-        return unbin.tree

+    def _warn(self, msg):
+        print "WARNING: %s" % (msg,)

 def option_parser():
    from calibre.utils.config import OptionParser
@ -890,8 +852,7 @@ def option_parser():
        help=_('Output directory. Defaults to current directory.'))
    parser.add_option(
        '-p', '--pretty-print', default=False, action='store_true',
-        help=_('Legibly format extracted markup.' \
-                   ' May modify meaningful whitespace.'))
+        help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
    parser.add_option(
        '--verbose', default=False, action='store_true',
        help=_('Useful for debugging.'))
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -27,11 +27,16 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
    CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
    urlnormalize, xpath
-from calibre.ebooks.oeb.base import FauxLogger, OEBBook
+from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
+from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
+from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
+from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
+from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 from calibre.ebooks.lit.lzx import Compressor
 import calibre
-from calibre import LoggingInterface
 from calibre import plugins
 msdes, msdeserror = plugins['msdes']
 import calibre.ebooks.lit.mssha1 as mssha1
@ -138,17 +143,16 @@ def warn(x):
 class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}
    
-    def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
-        self.path = path
-        self.logger = logger
-        self.dir = os.path.dirname(path)
+    def __init__(self, root, path, oeb, map=HTML_MAP):
+        self.item = item
+        self.logger = oeb.logger
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
-        self.stylizer = Stylizer(root, path, oeb) if is_html else None
+        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
@ -205,6 +209,8 @@ class ReBinary(object):
            if attr in ('href', 'src'):
                value = urlnormalize(value)
                path, frag = urldefrag(value)
+                if self.item:
+                    path = self.item.abshref(path)
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
@ -217,7 +223,7 @@ class ReBinary(object):
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
-                value = OEB_CSS_MIME
+                value = CSS_MIME
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
@ -270,7 +276,7 @@ class ReBinary(object):
    def build_ahc(self):
        if len(self.anchors) > 6:
            self.logger.log_warn("More than six anchors in file %r. " \
-                "Some links may not work properly." % self.path)
+                "Some links may not work properly." % self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
@ -294,10 +300,9 @@ def preserve(function):
    return wrapper
    
 class LitWriter(object):
-    def __init__(self, oeb, logger=FauxLogger()):
-        self._oeb = oeb
-        self._logger = logger
-        self._litize_oeb()
+    def __init__(self):
+        # Wow, no options
+        pass

    def _litize_oeb(self):
        oeb = self._oeb
@ -306,32 +311,27 @@ class LitWriter(object):
        if oeb.metadata.cover:
            id = str(oeb.metadata.cover[0])
            cover = oeb.manifest[id]
-        elif MS_COVER_TYPE in oeb.guide:
-            href = oeb.guide[MS_COVER_TYPE].href
-            cover = oeb.manifest.hrefs[href]
-        elif 'cover' in oeb.guide:
-            href = oeb.guide['cover'].href
-            cover = oeb.manifest.hrefs[href]
-        else:
-            html = oeb.spine[0].data
-            imgs = xpath(html, '//img[position()=1]')
-            href = imgs[0].get('src') if imgs else None
-            cover = oeb.manifest.hrefs[href] if href else None
-        if cover:
-            if not oeb.metadata.cover:
-                oeb.metadata.add('cover', cover.id)
            for type, title in ALL_MS_COVER_TYPES:
                if type not in oeb.guide:
                    oeb.guide.add(type, title, cover.href)
        else:
-            self._logger.log_warn('No suitable cover image found.')
+            self._logger.warn('No suitable cover image found.')

-    def dump(self, stream):
+    def dump(self, oeb, path):
+        if hasattr(path, 'write'):
+            return self._dump_stream(oeb, path)
+        with open(path, 'w+b') as stream:
+            return self._dump_stream(oeb, stream)
+        
+    def _dump_stream(self, oeb, stream):
+        self._oeb = oeb
+        self._logger = oeb.logger
        self._stream = stream
        self._sections = [StringIO() for i in xrange(4)]
        self._directory = []
        self._meta = None
-        self._dump()
+        self._litize_oeb()
+        self._write_content()
        
    def _write(self, *data):
        for datum in data:
@ -345,7 +345,7 @@ class LitWriter(object):
    def _tell(self):
        return self._stream.tell()
        
-    def _dump(self):
+    def _write_content(self):
        # Build content sections
        self._build_sections()

@ -474,8 +474,7 @@ class LitWriter(object):
            secnum = 0
            if not isinstance(data, basestring):
                self._add_folder(name)
-                rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
-                                 logger=self._logger)
+                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
                self._add_file(name + '/ahc', rebin.ahc, 0)
                self._add_file(name + '/aht', rebin.aht, 0)
                item.page_breaks = rebin.page_breaks
@ -554,8 +553,7 @@ class LitWriter(object):
        meta.attrib['ms--minimum_level'] = '0'
        meta.attrib['ms--attr5'] = '1'
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
-                         logger=self._logger)
+        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
        meta = rebin.content
        self._meta = meta
        self._add_file('/meta', meta)
@ -719,19 +717,31 @@ def option_parser():
        help=_('Useful for debugging.'))
    return parser

-def oeb2lit(opts, opfpath):
-    logger = LoggingInterface(logging.getLogger('oeb2lit'))
+def oeb2lit(opts, inpath):
+    logger = Logger(logging.getLogger('oeb2lit'))
    logger.setup_cli_handler(opts.verbose)
-    litpath = opts.output
-    if litpath is None:
-        litpath = os.path.basename(opfpath)
-        litpath = os.path.splitext(litpath)[0] + '.lit'
-    litpath = os.path.abspath(litpath)
-    lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger)
-    with open(litpath, 'wb') as f:
-        lit.dump(f)
-    run_plugins_on_postprocess(litpath, 'lit')
-    logger.log_info(_('Output written to ')+litpath)
+    outpath = opts.output
+    if outpath is None:
+        outpath = os.path.basename(inpath)
+        outpath = os.path.splitext(outpath)[0] + '.lit'
+    outpath = os.path.abspath(outpath)
+    context = Context('Firefox', 'MSReader')
+    oeb = OEBBook(inpath, logger=logger)
+    tocadder = HTMLTOCAdder()
+    tocadder.transform(oeb, context)
+    mangler = CaseMangler()
+    mangler.transform(oeb, context)
+    fbase = context.dest.fbase
+    flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
+    flattener.transform(oeb, context)
+    rasterizer = SVGRasterizer()
+    rasterizer.transform(oeb, context)
+    trimmer = ManifestTrimmer()
+    trimmer.transform(oeb, context)
+    lit = LitWriter()
+    lit.dump(oeb, outpath)
+    run_plugins_on_postprocess(outpath, 'lit')
+    logger.info(_('Output written to ') + outpath)
    

 def main(argv=sys.argv):
@ -740,8 +750,8 @@ def main(argv=sys.argv):
    if len(args) != 1:
        parser.print_help()
        return 1
-    opfpath = args[0]
-    oeb2lit(opts, opfpath)
+    inpath = args[0]
+    oeb2lit(opts, inpath)
    return 0
    
 if __name__ == '__main__':
--- a/src/calibre/ebooks/lrf/comic/convert_from.py
+++ b/src/calibre/ebooks/lrf/comic/convert_from.py
@ -425,7 +425,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
    thumbnail = None
    if not pages:
        raise ValueError('Could not find any pages in the comic: %s'%source)
-    if not opts.no_process:
+    if not getattr(opts, 'no_process', False):
        pages, failures, tdir2 = process_pages(pages, opts, notification)
        if not pages:
            raise ValueError('Could not find any valid pages in the comic: %s'%source)
@ -443,7 +443,7 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
    if output_format == 'pdf':
        create_pdf(pages, opts.profile, opts, thumbnail=thumbnail)
    shutil.rmtree(tdir)
-    if not opts.no_process:
+    if not getattr(opts, 'no_process', False):
        shutil.rmtree(tdir2)


@ -457,7 +457,7 @@ def main(args=sys.argv, notification=None, output_format='lrf'):
    
    if not callable(notification):
        pb = ProgressBar(terminal_controller, _('Rendering comic pages...'), 
-                         no_progress_bar=opts.no_progress_bar)
+                         no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
        notification = pb.update
    
    source = os.path.abspath(args[1])
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -109,6 +109,10 @@ class HTMLConverter(object, LoggingInterface):
                        # Remove self closing script tags as they also mess up BeautifulSoup
                        (re.compile(r'(?i)<script[^<>]+?/>'), lambda match: ''),
                        
+                        # BeautifulSoup treats self closing <div> tags as open <div> tags
+                        (re.compile(r'(?i)<\s*div([^>]*)/\s*>'), 
+                         lambda match: '<div%s></div>'%match.group(1))
+                        
                        ]
    # Fix Baen markup
    BAEN = [ 
@ -122,7 +126,7 @@ class HTMLConverter(object, LoggingInterface):
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
-                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
                  # Remove page numbers
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
@ -576,20 +580,20 @@ class HTMLConverter(object, LoggingInterface):
        if (css.has_key('display') and css['display'].lower() == 'none') or \
           (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
            return ''
-        text = u''
+        text, alt_text = u'', u''
        for c in tag.contents:
            if limit != None and len(text) > limit:
                break
            if isinstance(c, HTMLConverter.IGNORED_TAGS):
-                return u''
+                continue
            if isinstance(c, NavigableString):
                text += unicode(c)                
            elif isinstance(c, Tag):
                if c.name.lower() == 'img' and c.has_key('alt'):
-                    text += c['alt']
-                    return text
+                    alt_text += c['alt']
+                    continue
                text += self.get_text(c)
-        return text
+        return text if text.strip() else alt_text
    
    def process_links(self):
        def add_toc_entry(text, target):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -700,7 +700,7 @@ class Text(LRFStream):
    def add_text(self, text):
        s = unicode(text, "utf-16-le")
        if s:
-            s = s.translate(self.text_map)            
+            s = s.translate(self.text_map)
            self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
    
    def end_container(self, tag, stream):
@ -799,18 +799,39 @@ class Text(LRFStream):
        length = len(self.stream)
        style = self.style.as_dict()
        current_style = style.copy()
+        text_tags = set(list(TextAttr.tag_map.keys()) + \
+                        list(Text.text_tags.keys()) + \
+                        list(ruby_tags.keys()))
+        text_tags -= set([0xf500+i for i in range(10)])
+        text_tags.add(0xf5cc)
        
        while stream.tell() < length:
        
-            # Is there some text beofre a tag?
-            pos = self.stream.find('\xf5', stream.tell()) - 1
-            if pos > 0:
-                self.add_text(self.stream[stream.tell():pos])
-                stream.seek(pos)
-            elif pos == -2: # No tags in this stream
+            # Is there some text before a tag?
+            def find_first_tag(start):
+                pos = self.stream.find('\xf5', start)
+                if pos == -1:
+                    return -1
+                try:
+                    stream.seek(pos-1)
+                    _t = Tag(stream)
+                    if _t.id in text_tags:
+                        return pos-1
+                    return find_first_tag(pos+1)
+                    
+                    
+                except:
+                    return find_first_tag(pos+1)
+                    
+            start_pos = stream.tell()        
+            tag_pos = find_first_tag(start_pos)
+            if tag_pos >= start_pos:
+                if tag_pos > start_pos:
+                    self.add_text(self.stream[start_pos:tag_pos])
+                stream.seek(tag_pos)
+            else: # No tags in this stream
                self.add_text(self.stream)
                stream.seek(0, 2)
-                print repr(self.stream)
                break
            
            tag = Tag(stream)
@ -1166,7 +1187,8 @@ class TOCObject(LRFStream):
            refpage = struct.unpack("<I", stream.read(4))[0]
            refobj  = struct.unpack("<I", stream.read(4))[0]
            cnt = struct.unpack("<H", stream.read(2))[0]
-            label = unicode(stream.read(cnt), "utf_16")
+            raw = stream.read(cnt)
+            label = raw.decode('utf_16_le')
            self._contents.append(TocLabel(refpage, refobj, label))
            c -= 1
            
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -249,7 +249,7 @@ class MetaInformation(object):
        ans = u''
        ans += u'Title    : ' + unicode(self.title) + u'\n'
        if self.authors:
-            ans += u'Author   : ' + (', '.join(self.authors) if self.authors is not None else u'None')
+            ans += u'Author   : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
            ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
        if self.publisher:
            ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
--- a/src/calibre/ebooks/mobi/from_any.py
+++ b/src/calibre/ebooks/mobi/from_any.py
@ -0,0 +1,63 @@
+'''
+Convert any ebook format to Mobipocket.
+'''
+
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
+    'and Marshall T. Vandegrift <llasram@gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+import sys, os, glob, logging
+
+from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
+from calibre.ebooks.epub import config as common_config
+from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
+
+def config(defaults=None):
+    return common_config(defaults=defaults, name='mobi')
+
+def option_parser(usage=USAGE):
+    usage = usage % ('Mobipocket', formats())
+    parser = config().option_parser(usage=usage)
+    add_mobi_options(parser)
+    return parser
+
+def any2mobi(opts, path):
+    ext = os.path.splitext(path)[1]
+    if not ext:
+        raise ValueError('Unknown file type: '+path)
+    ext = ext.lower()[1:]
+    
+    if opts.output is None:
+        opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
+    
+    opts.output = os.path.abspath(opts.output)
+    orig_output = opts.output
+    
+    with TemporaryDirectory('_any2mobi') as tdir:
+        oebdir = os.path.join(tdir, 'oeb')
+        os.mkdir(oebdir)
+        opts.output = os.path.join(tdir, 'dummy.epub')
+        opts.profile = 'None'
+        any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
+        opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
+        opts.output = orig_output
+        logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
+        oeb2mobi(opts, opf)
+    
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 2:
+        parser.print_help()
+        print 'No input file specified.'
+        return 1
+    any2mobi(opts, args[1])
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -114,10 +114,10 @@ class MobiMLizer(object):
    def mobimlize_measure(self, ptsize):
        if isinstance(ptsize, basestring):
            return ptsize
-        fbase = self.profile.fbase
-        if ptsize < fbase:
+        embase = self.profile.fbase
+        if round(ptsize) < embase:
            return "%dpt" % int(round(ptsize))
-        return "%dem" % int(round(ptsize / fbase))
+        return "%dem" % int(round(ptsize / embase))

    def preize_text(self, text):
        text = unicode(text).replace(u' ', u'\xa0')
@ -171,8 +171,7 @@ class MobiMLizer(object):
                    para = etree.SubElement(para, XHTML('blockquote'))
                    emleft -= 1
            else:
-                ptag = 'p' #tag if tag in HEADER_TAGS else 'p'
-                para = wrapper = etree.SubElement(parent, XHTML(ptag))
+                para = wrapper = etree.SubElement(parent, XHTML('p'))
            bstate.inline = bstate.para = para
            vspace = bstate.vpadding + bstate.vmargin
            bstate.vpadding = bstate.vmargin = 0
@ -213,11 +212,11 @@ class MobiMLizer(object):
                inline = etree.SubElement(inline, XHTML('sup'))
            elif valign == 'sub':
                inline = etree.SubElement(inline, XHTML('sub'))
-            if istate.family == 'monospace':
-                inline = etree.SubElement(inline, XHTML('tt'))
-            if fsize != 3:
+            elif fsize != 3:
                inline = etree.SubElement(inline, XHTML('font'),
                                          size=str(fsize))
+            if istate.family == 'monospace':
+                inline = etree.SubElement(inline, XHTML('tt'))
            if istate.italic:
                inline = etree.SubElement(inline, XHTML('i'))
            if istate.bold:
@ -241,7 +240,8 @@ class MobiMLizer(object):
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
-        if style['display'] == 'none' \
+        # <mbp:frame-set/> does not exist lalalala
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            return
        tag = barename(elem.tag)
@ -303,7 +303,7 @@ class MobiMLizer(object):
        else:
            istate.family = 'serif'
        valign = style['vertical-align']
-        if valign in ('super', 'sup') or asfloat(valign) > 0:
+        if valign in ('super', 'text-top') or asfloat(valign) > 0:
            istate.valign = 'super'
        elif valign == 'sub'  or asfloat(valign) < 0:
            istate.valign = 'sub'
--- a/src/calibre/ebooks/mobi/palmdoc.py
+++ b/src/calibre/ebooks/mobi/palmdoc.py
@ -69,15 +69,15 @@ def compress_doc(data):
                out.write(pack('>B', onch ^ 0x80))
                i += 1
                continue
-        if och == 0 or (och >= 9 and och < 0x80):
+        if och == 0 or (och > 8 and och < 0x80):
            out.write(ch)
        else:
            j = i
            binseq = [ch]
-            while j < ldata:
+            while j < ldata and len(binseq) < 8:
                ch = data[j]
                och = ord(ch)
-                if och < 1 or (och > 8 and och < 0x80):
+                if och == 0 or (och > 8 and och < 0x80):
                    break
                binseq.append(ch)
                j += 1
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -33,8 +33,7 @@ class EXTHHeader(object):
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
-        
-        self.mi = MetaInformation('Unknown', ['Unknown'])
+        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
        self.has_fake_cover = True
        
        for i in range(self.num_items):
@ -49,14 +48,24 @@ class EXTHHeader(object):
                self.cover_offset, = struct.unpack('>L', content)
            elif id == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
+            #else:
+            #    print 'unknown record', id, repr(content)
        title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
        if title:
-            self.mi.title = title.group(1).decode(codec, 'ignore')
+            title = title.group(1).decode(codec, 'replace')
+            if len(title) > 2:
+                self.mi.title = title
+            else:
+                title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
+                if title:
+                    self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
            
                
    def process_metadata(self, id, content, codec):
        if id == 100:
-            self.mi.authors   = [content.decode(codec, 'ignore').strip()]
+            if self.mi.authors == [_('Unknown')]:
+                self.mi.authors = []
+            self.mi.authors.append(content.decode(codec, 'ignore').strip())
        elif id == 101:
            self.mi.publisher = content.decode(codec, 'ignore').strip()
        elif id == 103:
@ -67,7 +76,8 @@ class EXTHHeader(object):
            if not self.mi.tags:
                self.mi.tags = []
            self.mi.tags.append(content.decode(codec, 'ignore'))
-         
+        #else:
+        #    print 'unhandled metadata record', id, repr(content), codec 
            

 class BookHeader(object):
@ -466,6 +476,10 @@ def get_metadata(stream):
            cover =  os.path.join(tdir, mi.cover)
            if os.access(cover, os.R_OK):
                mi.cover_data = ('JPEG', open(os.path.join(tdir, mi.cover), 'rb').read())
+        else:
+            path = os.path.join(tdir, 'images', '00001.jpg')
+            if os.access(path, os.R_OK):
+                mi.cover_data = ('JPEG', open(path, 'rb').read())
    return mi
        
 def option_parser():
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -17,26 +17,30 @@ import re
 from itertools import izip, count
 from collections import defaultdict
 from urlparse import urldefrag
+import logging
 from lxml import etree
 from PIL import Image
 from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
    OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
-from calibre.ebooks.oeb.base import FauxLogger, OEBBook
+from calibre.ebooks.oeb.base import Logger, OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
 from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
 from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
 from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
+from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
+from calibre.customize.ui import run_plugins_on_postprocess
+from calibre.utils.config import OptionParser
+from optparse import OptionGroup

 # TODO:
 # - Allow override CSS (?)
 # - Generate index records
-# - Generate in-content ToC
-# - Command line options, etc.
+# - Optionally rasterize tables

 EXTH_CODES = {
    'creator': 100,
@ -59,7 +63,8 @@ UNCOMPRESSED = 1
 PALMDOC = 2
 HUFFDIC = 17480

-MAX_IMAGE_SIZE = 63 * 1024
+PALM_MAX_IMAGE_SIZE = 63 * 1024
+OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
 MAX_THUMB_SIZE = 16 * 1024
 MAX_THUMB_DIMEN = (180, 240)

@ -88,7 +93,6 @@ class Serializer(object):
    NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
    
    def __init__(self, oeb, images):
-        oeb.logger.info('Serializing markup content...')
        self.oeb = oeb
        self.images = images
        self.id_offsets = {}
@ -117,10 +121,16 @@ class Serializer(object):
            path, frag = urldefrag(ref.href)
            if hrefs[path].media_type not in OEB_DOCS:
                continue
-            buffer.write('<reference title="%s" type="%s" '
-                         % (ref.title, ref.type))
+            buffer.write('<reference type="')
+            self.serialize_text(ref.type, quot=True)
+            buffer.write('" ')
+            if ref.title is not None:
+                buffer.write('title="')
+                self.serialize_text(ref.title, quot=True)
+                buffer.write('" ')
            self.serialize_href(ref.href)
-            buffer.write('/>')
+            # Space required or won't work, I kid you not
+            buffer.write(' />')
        buffer.write('</guide>')

    def serialize_href(self, href, base=None):
@ -144,6 +154,12 @@ class Serializer(object):
    def serialize_body(self):
        buffer = self.buffer
        buffer.write('<body>')
+        # CybookG3 'Start Reading' link
+        if 'text' in self.oeb.guide:
+            href = self.oeb.guide['text'].href
+            buffer.write('<a ')
+            self.serialize_href(href)
+            buffer.write(' />')
        spine = [item for item in self.oeb.spine if item.linear]
        spine.extend([item for item in self.oeb.spine if not item.linear])
        for item in spine:
@ -185,10 +201,12 @@ class Serializer(object):
                if attr == 'href':
                    if self.serialize_href(val, item):
                        continue
-                elif attr == 'src' and val in hrefs:
-                    index = self.images[val]
-                    buffer.write('recindex="%05d"' % index)
-                    continue
+                elif attr == 'src':
+                    href = item.abshref(val)
+                    if href in hrefs:
+                        index = self.images[href]
+                        buffer.write('recindex="%05d"' % index)
+                        continue
                buffer.write(attr)
                buffer.write('="')
                self.serialize_text(val, quot=True)
@ -223,9 +241,11 @@ class Serializer(object):

    
 class MobiWriter(object):
-    def __init__(self, compression=None, logger=FauxLogger()):
+    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
+    
+    def __init__(self, compression=None, imagemax=None):
        self._compression = compression or UNCOMPRESSED
-        self._logger = logger
+        self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE

    def dump(self, oeb, path):
        if hasattr(path, 'write'):
@ -293,6 +313,7 @@ class MobiWriter(object):
        return data, overlap
                
    def _generate_text(self):
+        self._oeb.logger.info('Serializing markup content...')
        serializer = Serializer(self._oeb, self._images)
        breaks = serializer.breaks
        text = serializer.text
@ -300,6 +321,8 @@ class MobiWriter(object):
        text = StringIO(text)
        nrecords = 0
        offset = 0
+        if self._compression != UNCOMPRESSED:
+            self._oeb.logger.info('Compressing markup content...')
        data, overlap = self._read_text_record(text)
        while len(data) > 0:
            if self._compression == PALMDOC:
@ -335,7 +358,9 @@ class MobiWriter(object):
        format = image.format
        changed = False
        if image.format not in ('JPEG', 'GIF'):
-            format = 'GIF'
+            width, height = image.size
+            area = width * height
+            format = 'GIF' if area <= 40000 else 'JPEG'
            changed = True
        if dimen is not None:
            image.thumbnail(dimen, Image.ANTIALIAS)
@ -368,13 +393,14 @@ class MobiWriter(object):
        return data
        
    def _generate_images(self):
+        self._oeb.logger.warn('Serializing images...')
        images = [(index, href) for href, index in self._images.items()]
        images.sort()
        metadata = self._oeb.metadata
        coverid = metadata.cover[0] if metadata.cover else None
        for _, href in images:
            item = self._oeb.manifest.hrefs[href]
-            data = self._rescale_image(item.data, MAX_IMAGE_SIZE)
+            data = self._rescale_image(item.data, self._imagemax)
            self._records.append(data)
    
    def _generate_record0(self):
@ -418,7 +444,8 @@ class MobiWriter(object):
            if term not in EXTH_CODES: continue
            code = EXTH_CODES[term]
            for item in oeb.metadata[term]:
-                data = unicode(item).encode('utf-8')
+                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                data = data.encode('utf-8')
                exth.write(pack('>II', code, len(data) + 8))
                exth.write(data)
                nrecs += 1
@ -467,29 +494,90 @@ class MobiWriter(object):
            self._write(record)


-def main(argv=sys.argv):
-    from calibre.ebooks.oeb.base import DirWriter
-    inpath, outpath = argv[1:]
-    context = Context('Firefox', 'MobiDesktop')
-    oeb = OEBBook(inpath)
-    #writer = MobiWriter(compression=PALMDOC)
-    writer = MobiWriter(compression=UNCOMPRESSED)
-    #writer = DirWriter()
+def add_mobi_options(parser):
+    profiles = Context.PROFILES.keys()
+    profiles.sort()
+    profiles = ', '.join(profiles)
+    group = OptionGroup(parser, _('Mobipocket'),
+        _('Mobipocket-specific options.'))
+    group.add_option(
+        '-c', '--compress', default=False, action='store_true',
+        help=_('Compress file text using PalmDOC compression.'))
+    group.add_option(
+        '-r', '--rescale-images', default=False, action='store_true',
+        help=_('Modify images to meet Palm device size limitations.'))
+    parser.add_option_group(group)
+    group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
+        'Affects conversion of default font sizes and rasterization '
+        'resolution.  Valid profiles are: %s.') % profiles)
+    group.add_option(
+        '--source-profile', default='Browser', metavar='PROFILE',
+        help=_("Source renderer profile. Default is 'Browser'."))
+    group.add_option(
+        '--dest-profile', default='CybookG3', metavar='PROFILE',
+        help=_("Destination renderer profile. Default is 'CybookG3'."))
+    parser.add_option_group(group)
+    return
+            
+def option_parser():
+    parser = OptionParser(usage=_('%prog [options] OPFFILE'))
+    parser.add_option(
+        '-o', '--output', default=None, 
+        help=_('Output file. Default is derived from input filename.'))
+    parser.add_option(
+        '-v', '--verbose', default=False, action='store_true',
+        help=_('Useful for debugging.'))
+    add_mobi_options(parser)
+    return parser
+
+def oeb2mobi(opts, inpath):
+    logger = Logger(logging.getLogger('oeb2mobi'))
+    logger.setup_cli_handler(opts.verbose)
+    outpath = opts.output
+    if outpath is None:
+        outpath = os.path.basename(inpath)
+        outpath = os.path.splitext(outpath)[0] + '.mobi'
+    source = opts.source_profile
+    if source not in Context.PROFILES:
+        logger.error(_('Unknown source profile %r') % source)
+        return 1
+    dest = opts.dest_profile
+    if dest not in Context.PROFILES:
+        logger.error(_('Unknown destination profile %r') % dest)
+        return 1
+    compression = PALMDOC if opts.compress else UNCOMPRESSED
+    imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
+    context = Context(source, dest)
+    oeb = OEBBook(inpath, logger=logger)
+    tocadder = HTMLTOCAdder()
+    tocadder.transform(oeb, context)
+    mangler = CaseMangler()
+    mangler.transform(oeb, context)
    fbase = context.dest.fbase
    fkey = context.dest.fnums.values()
-    tocadder = HTMLTOCAdder()
    flattener = CSSFlattener(
        fbase=fbase, fkey=fkey, unfloat=True, untable=True)
-    rasterizer = SVGRasterizer()
-    trimmer = ManifestTrimmer()
-    mobimlizer = MobiMLizer()
-    tocadder.transform(oeb, context)
    flattener.transform(oeb, context)
+    rasterizer = SVGRasterizer()
    rasterizer.transform(oeb, context)
-    mobimlizer.transform(oeb, context)
+    trimmer = ManifestTrimmer()
    trimmer.transform(oeb, context)
+    mobimlizer = MobiMLizer()
+    mobimlizer.transform(oeb, context)
+    writer = MobiWriter(compression=compression, imagemax=imagemax)
    writer.dump(oeb, outpath)
-    return 0
+    run_plugins_on_postprocess(outpath, 'mobi')
+    logger.info(_('Output written to ') + outpath)
+    
+def main(argv=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(argv[1:])
+    if len(args) != 1:
+        parser.print_help()
+        return 1
+    inpath = args[0]
+    retval = oeb2mobi(opts, inpath)
+    return retval

 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -67,11 +67,13 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])

 MS_COVER_TYPE = 'other.ms-coverimage-standard'

-ENTITYDEFS = dict(htmlentitydefs.entitydefs)
+recode = lambda s: s.decode('iso-8859-1').encode('ascii', 'xmlcharrefreplace')
+ENTITYDEFS = dict((k, recode(v)) for k, v in htmlentitydefs.entitydefs.items())
 del ENTITYDEFS['lt']
 del ENTITYDEFS['gt']
 del ENTITYDEFS['quot']
 del ENTITYDEFS['amp']
+del recode


 def element(parent, *args, **kwargs):
@ -341,16 +343,19 @@ class Manifest(object):
                self._data = None
            return property(fget, fset, fdel)
        data = data()
-
+        
        def __str__(self):
            data = self.data
            if isinstance(data, etree._Element):
                return xml2str(data)
            return str(data)
-
+        
        def __eq__(self, other):
            return id(self) == id(other)
-
+        
+        def __ne__(self, other):
+            return not self.__eq__(other)
+        
        def __cmp__(self, other):
            result = cmp(self.spine_position, other.spine_position)
            if result != 0:
@ -534,52 +539,81 @@ class Spine(object):

 class Guide(object):
    class Reference(object):
+        _TYPES_TITLES = [('cover', 'Cover'), ('title-page', 'Title Page'),
+            ('toc', 'Table of Contents'), ('index', 'Index'),
+            ('glossary', 'Glossary'), ('acknowledgements', 'Acknowledgements'),
+            ('bibliography', 'Bibliography'), ('colophon', 'Colophon'),
+            ('copyright-page', 'Copyright'), ('dedication', 'Dedication'),
+            ('epigraph', 'Epigraph'), ('foreword', 'Foreword'),
+            ('loi', 'List of Illustrations'), ('lot', 'List of Tables'),
+            ('notes', 'Notes'), ('preface', 'Preface'),
+            ('text', 'Main Text')]
+        TYPES = set(t for t, _ in _TYPES_TITLES)
+        TITLES = dict(_TYPES_TITLES)
+        ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0)))
+        
        def __init__(self, type, title, href):
+            if type.lower() in self.TYPES:
+                type = type.lower()
+            elif type not in self.TYPES and \
+                 not type.startswith('other.'):
+                type = 'other.' + type
+            if not title:
+                title = self.TITLES.get(type, None)
            self.type = type
            self.title = title
            self.href = urlnormalize(href)
-
+        
        def __repr__(self):
            return 'Reference(type=%r, title=%r, href=%r)' \
                % (self.type, self.title, self.href)
+        
+        def _order():
+            def fget(self):
+                return self.ORDER.get(self.type, self.type)
+            return property(fget=fget)
+        _order = _order()
+        
+        def __cmp__(self, other):
+            if not isinstance(other, Guide.Reference):
+                return NotImplemented
+            return cmp(self._order, other._order)
    
    def __init__(self, oeb):
        self.oeb = oeb
        self.refs = {}
-
+    
    def add(self, type, title, href):
        ref = self.Reference(type, title, href)
        self.refs[type] = ref
        return ref
-
-    def by_type(self, type):
-        return self.ref_types[type]
-
+    
    def iterkeys(self):
        for type in self.refs:
            yield type
    __iter__ = iterkeys
-
+    
    def values(self):
-        for ref in self.refs.values():
-            yield ref
-
+        values = list(self.refs.values())
+        values.sort()
+        return values
+    
    def items(self):
        for type, ref in self.refs.items():
            yield type, ref
    
    def __getitem__(self, key):
        return self.refs[key]
-
+    
    def __delitem__(self, key):
        del self.refs[key]
    
    def __contains__(self, key):
        return key in self.refs
-
+    
    def __len__(self):
        return len(self.refs)
-
+    
    def to_opf1(self, parent=None):
        elem = element(parent, 'guide')
        for ref in self.refs.values():
@ -914,11 +948,11 @@ class OEBBook(object):
            cover = self.manifest.hrefs[href]
        elif xpath(html, '//h:img[position()=1]'):
            img = xpath(html, '//h:img[position()=1]')[0]
-            href = img.get('src')
+            href = spine0.abshref(img.get('src'))
            cover = self.manifest.hrefs[href]
        elif xpath(html, '//h:object[position()=1]'):
            object = xpath(html, '//h:object[position()=1]')[0]
-            href = object.get('data')
+            href = spine0.abshref(object.get('data'))
            cover = self.manifest.hrefs[href]
        elif xpath(html, '//svg:svg[position()=1]'):
            svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])
--- a/src/calibre/ebooks/oeb/profile.py
+++ b/src/calibre/ebooks/oeb/profile.py
@ -36,26 +36,36 @@ PROFILES = {
                fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),

    'MSReader':
-        Profile(width=480, height=652, dpi=100.0, fbase=13,
+        Profile(width=480, height=652, dpi=96, fbase=13,
                fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),

    # Not really, but let's pretend
-    'MobiDesktop':
-        Profile(width=280, height=300, dpi=96, fbase=18,
-                fsizes=[14, 14, 16, 18, 20, 22, 22, 24]),
+    'Mobipocket':
+        Profile(width=600, height=800, dpi=96, fbase=18,
+                fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
    
-    # No clue on usable screen size and DPI
-    'CybookG3':
-        Profile(width=584, height=754, dpi=168.451, fbase=12,
-                fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
+    # No clue on usable screen size; DPI should be good
+    'HanlinV3':
+        Profile(width=584, height=754, dpi=168.451, fbase=16,
+                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),

-    'Firefox':
+    'CybookG3':
+        Profile(width=600, height=800, dpi=168.451, fbase=16,
+                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
+
+    'Kindle':
+        Profile(width=525, height=640, dpi=168.451, fbase=16,
+                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
+    
+    'Browser':
        Profile(width=800, height=600, dpi=100.0, fbase=12,
                fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
    }


 class Context(object):
+    PROFILES = PROFILES
+    
    def __init__(self, source, dest):
        if source in PROFILES:
            source = PROFILES[source]
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -23,7 +23,7 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
-from calibre.ebooks.oeb.base import barename, urlnormalize
+from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
 from calibre.resources import html_css

@ -87,10 +87,6 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                       'x-large', 'xx-large'])


-XPNSMAP = {'h': XHTML_NS,}
-def xpath(elem, expr):
-    return elem.xpath(expr, namespaces=XPNSMAP)
-
 class CSSSelector(etree.XPath):
    MIN_SPACE_RE = re.compile(r' *([>~+]) *')
    LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
@ -269,6 +265,7 @@ class Style(object):
        self._fontSize = None
        self._width = None
        self._height = None
+        self._lineHeight = None
        stylizer._styles[element] = self

    def _update_cssdict(self, cssdict):
@ -288,13 +285,13 @@ class Style(object):
        if elem is None:
            return None
        return self._stylizer.style(elem)
-    
+
    def __getitem__(self, name):
        domname = cssproperties._toDOMname(name)
        if hasattr(self, domname):
            return getattr(self, domname)
        return self._unit_convert(self._get(name))
-    
+
    def _get(self, name):
        result = None
        if name in self._style:
@ -324,7 +321,7 @@ class Style(object):
            unit = m.group(2)
            if unit == '%':
                base = base or self.width
-                result = (value/100.0) * base
+                result = (value / 100.0) * base
            elif unit == 'px':
                result = value * 72.0 / self._profile.dpi
            elif unit == 'in':
@ -388,7 +385,7 @@ class Style(object):
    @property
    def width(self):
        if self._width is None:
-            result = None
+            width = None
            base = None
            parent = self._get_parent()
            if parent is not None:
@ -399,9 +396,9 @@ class Style(object):
                width = self._element.attrib['width']
            elif 'width' in self._style:
                width = self._style['width']
-            else:
+            if not width or width == 'auto':
                result = base
-            if not result:
+            else:
                result = self._unit_convert(width, base=base)
            self._width = result
        return self._width
@ -409,7 +406,7 @@ class Style(object):
    @property
    def height(self):
        if self._height is None:
-            result = None
+            height = None
            base = None
            parent = self._get_parent()
            if parent is not None:
@ -420,12 +417,53 @@ class Style(object):
                height = self._element.attrib['height']
            elif 'height' in self._style:
                height = self._style['height']
-            else:
+            if not height or height == 'auto':
                result = base
-            if not result:
+            else:
                result = self._unit_convert(height, base=base)
            self._height = result
        return self._height
+
+    @property
+    def lineHeight(self):
+        if self._lineHeight is None:
+            result = None
+            parent = self._getparent()
+            if 'line-height' in self._style:
+                lineh = self._style['line-height']
+                try:
+                    float(lineh)
+                except ValueError:
+                    result = self._unit_convert(lineh, base=self.fontSize)
+                else:
+                    result = float(lineh) * self.fontSize
+            elif parent is not None:
+                # TODO: proper inheritance
+                result = parent.lineHeight
+            else:
+                result = 1.2 * self.fontSize
+            self._lineHeight = result
+        return self._lineHeight
+    
+    @property
+    def marginTop(self):
+        return self._unit_convert(
+            self._get('margin-top'), base=self.height)
+    
+    @property
+    def marginBottom(self):
+        return self._unit_convert(
+            self._get('margin-bottom'), base=self.height)
+    
+    @property
+    def paddingTop(self):
+        return self._unit_convert(
+            self._get('padding-top'), base=self.height)
+    
+    @property
+    def paddingBottom(self):
+        return self._unit_convert(
+            self._get('padding-bottom'), base=self.height)
    
    def __str__(self):
        items = self._style.items()
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -33,12 +33,13 @@ class KeyMapper(object):
    def relate(size, base):
        size = float(size)
        base = float(base)
-        if size == base: return 0
+        if abs(size - base) < 0.1: return 0
        sign = -1 if size < base else 1
        endp = 0 if size < base else 36
        diff = (abs(base - size) * 3) + ((36 - size) / 100)
        logb = abs(base - endp) 
-        return sign * math.log(diff, logb)
+        result = sign * math.log(diff, logb)
+        return result
        
    def __getitem__(self, ssize):
        if ssize in self.cache:
@ -122,6 +123,8 @@ class CSSFlattener(object):
            fsize = self.context.source.fbase
            self.baseline_node(body, stylizer, sizes, fsize)
        sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
+        self.oeb.logger.info(
+            "Source base font size is %0.05fpt" % sbase)
        return sbase

    def clean_edges(self, cssdict, style, fsize):
@ -154,14 +157,14 @@ class CSSFlattener(object):
        if node.tag == XHTML('font'):
            node.tag = XHTML('span')
            if 'size' in node.attrib:
-                size = node.attrib['size']
-                if size.startswith('+'):
-                    cssdict['font-size'] = 'larger'
-                elif size.startswith('-'):
-                    cssdict['font-size'] = 'smaller'
-                else:
+                size = node.attrib['size'].strip()
+                if size:
                    fnums = self.context.source.fnums
-                    cssdict['font-size'] = fnums[int(size)]
+                    if size[0] in ('+', '-'):
+                        # Oh, the warcrimes
+                        cssdict['font-size'] = fnums[3+int(size)]
+                    else:
+                        cssdict['font-size'] = fnums[int(size)]
                del node.attrib['size']
        if 'color' in node.attrib:
            cssdict['color'] = node.attrib['color']
@ -182,10 +185,11 @@ class CSSFlattener(object):
                percent = (margin - style['text-indent']) / style['width']
                cssdict['margin-left'] = "%d%%" % (percent * 100)
                left -= style['text-indent']
+            if 'display' in cssdict and cssdict['display'] == 'in-line':
+                cssdict['display'] = 'inline'
            if self.unfloat and 'float' in cssdict \
-               and tag not in ('img', 'object') \
               and cssdict.get('display', 'none') != 'none':
-                    del cssdict['display']
+                del cssdict['display']
            if self.untable and 'display' in cssdict \
               and cssdict['display'].startswith('table'):
                display = cssdict['display']
@ -218,7 +222,9 @@ class CSSFlattener(object):
        for child in node:
            self.flatten_node(child, stylizer, names, styles, psize, left)

-    def flatten_head(self, head, stylizer, href):
+    def flatten_head(self, item, stylizer, href):
+        html = item.data
+        head = html.find(XHTML('head'))
        for node in head:
            if node.tag == XHTML('link') \
               and node.get('rel', 'stylesheet') == 'stylesheet' \
@ -227,6 +233,7 @@ class CSSFlattener(object):
            elif node.tag == XHTML('style') \
                 and node.get('type', CSS_MIME) in OEB_STYLES:
                head.remove(node)
+        href = item.relhref(href)
        etree.SubElement(head, XHTML('link'),
            rel='stylesheet', type=CSS_MIME, href=href)
        if stylizer.page_rule:
@ -259,7 +266,5 @@ class CSSFlattener(object):
        css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
        href = self.replace_css(css)
        for item in self.oeb.spine:
-            html = item.data
            stylizer = self.stylizers[item]
-            head = html.find(XHTML('head'))
-            self.flatten_head(head, stylizer, href)
+            self.flatten_head(item, stylizer, href)
--- a/src/calibre/ebooks/oeb/transforms/htmltoc.py
+++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py
@ -0,0 +1,87 @@
+'''
+HTML-TOC-adding transform.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys
+import os
+from lxml import etree
+from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
+from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
+from calibre.ebooks.oeb.base import element
+
+STYLE_CSS = {
+    'nested': """
+.calibre_toc_header {
+  text-align: center;
+}
+.calibre_toc_block {
+  margin-left: 1.2em;
+  text-indent: -1.2em;
+}
+.calibre_toc_block .calibre_toc_block {
+  margin-left: 2.4em;
+}
+.calibre_toc_block .calibre_toc_block .calibre_toc_block {
+  margin-left: 3.6em;
+}
+""",
+   
+    'centered': """
+.calibre_toc_header {
+  text-align: center;
+}
+.calibre_toc_block {
+  text-align: center;
+}
+body > .calibre_toc_block {
+  margin-top: 1.2em;
+}
+"""
+    }
+
+class HTMLTOCAdder(object):
+    def __init__(self, style='nested'):
+        self.style = style
+    
+    def transform(self, oeb, context):
+        if 'toc' in oeb.guide:
+            return
+        oeb.logger.info('Generating in-line TOC...')
+        style = self.style
+        if style not in STYLE_CSS:
+            oeb.logger.error('Unknown TOC style %r' % style)
+            style = 'nested'
+        id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
+        oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
+        language = str(oeb.metadata.language[0])
+        contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
+                           attrib={XML('lang'): language})
+        head = element(contents, XHTML('head'))
+        title = element(head, XHTML('title'))
+        title.text = 'Table of Contents'
+        element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
+                href=css_href)
+        body = element(contents, XHTML('body'),
+                       attrib={'class': 'calibre_toc'})
+        h1 = element(body, XHTML('h1'),
+                     attrib={'class': 'calibre_toc_header'})
+        h1.text = 'Table of Contents'
+        self.add_toc_level(body, oeb.toc)
+        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
+        item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
+        oeb.spine.add(item, linear=False)
+        oeb.guide.add('toc', 'Table of Contents', href)
+
+    def add_toc_level(self, elem, toc):
+        for node in toc:
+            block = element(elem, XHTML('div'),
+                            attrib={'class': 'calibre_toc_block'})
+            line = element(block, XHTML('a'),
+                           attrib={'href': node.href,
+                                   'class': 'calibre_toc_line'})
+            line.text = node.title
+            self.add_toc_level(block, node)
--- a/src/calibre/ebooks/oeb/transforms/manglecase.py
+++ b/src/calibre/ebooks/oeb/transforms/manglecase.py
@ -0,0 +1,112 @@
+'''
+CSS case-mangling transform.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys
+import os
+import re
+import operator
+import math
+from itertools import chain
+from collections import defaultdict
+from lxml import etree
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS
+from calibre.ebooks.oeb.base import CSS_MIME
+from calibre.ebooks.oeb.base import namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+
+CASE_MANGLER_CSS = """
+.calibre_lowercase {
+    font-variant: normal;
+    font-size: 0.65em;
+}
+"""
+
+TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
+
+class CaseMangler(object):
+    def transform(self, oeb, context):
+        oeb.logger.info('Applying case-transforming CSS...')
+        self.oeb = oeb
+        self.profile = context.source
+        self.mangle_spine()
+    
+    def mangle_spine(self):
+        id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
+        self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
+        for item in self.oeb.spine:
+            html = item.data
+            relhref = item.relhref(href)
+            etree.SubElement(html.find(XHTML('head')), XHTML('link'),
+                             rel='stylesheet', href=relhref, type=CSS_MIME)
+            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
+            self.mangle_elem(html.find(XHTML('body')), stylizer)
+    
+    def text_transform(self, transform, text):
+        if transform == 'capitalize':
+            return text.title()
+        elif transform == 'uppercase':
+            return text.upper()
+        elif transform == 'lowercase':
+            return text.lower()
+        return text
+    
+    def split_text(self, text):
+        results = ['']
+        isupper = text[0].isupper()
+        for char in text:
+            if char.isupper() == isupper:
+                results[-1] += char
+            else:
+                isupper = not isupper
+                results.append(char)
+        return results
+    
+    def smallcaps_elem(self, elem, attr):
+        texts = self.split_text(getattr(elem, attr))
+        setattr(elem, attr, None)
+        last = elem if attr == 'tail' else None
+        attrib = {'class': 'calibre_lowercase'}
+        for text in texts:
+            if text.isupper():
+                if last is None:
+                    elem.text = text
+                else:
+                    last.tail = text
+            else:
+                child = etree.Element(XHTML('span'), attrib=attrib)
+                child.text = text.upper()
+                if last is None:
+                    elem.insert(0, child)
+                else:
+                    # addnext() moves the tail for some reason
+                    tail = last.tail
+                    last.addnext(child)
+                    last.tail = tail
+                    child.tail = None
+                last = child
+    
+    def mangle_elem(self, elem, stylizer):
+        if not isinstance(elem.tag, basestring) or \
+           namespace(elem.tag) != XHTML_NS:
+            return
+        children = list(elem)
+        style = stylizer.style(elem)
+        transform = style['text-transform']
+        variant = style['font-variant']
+        if elem.text:
+            if transform in TEXT_TRANSFORMS:
+                elem.text = self.text_transform(transform, elem.text)
+            if variant == 'small-caps':
+                self.smallcaps_elem(elem, 'text')
+        for child in children:
+            self.mangle_elem(child, stylizer)
+            if child.tail:
+                if transform in TEXT_TRANSFORMS:
+                    child.tail = self.text_transform(transform, child.tail)
+                if variant == 'small-caps':
+                    self.smallcaps_elem(child, 'tail')
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -21,11 +21,12 @@ from PyQt4.QtGui import QPainter
 from PyQt4.QtSvg import QSvgRenderer
 from PyQt4.QtGui import QApplication
 from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
-from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
+from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
 from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
 from calibre.ebooks.oeb.stylizer import Stylizer

 IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
+KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])

 class SVGRasterizer(object):
    def __init__(self):
@ -41,7 +42,7 @@ class SVGRasterizer(object):
        self.rasterize_spine()
        self.rasterize_cover()

-    def rasterize_svg(self, elem, width=0, height=0):
+    def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
        data = QByteArray(xml2str(elem))
        svg = QSvgRenderer(data)
        size = svg.defaultSize()
@ -52,6 +53,9 @@ class SVGRasterizer(object):
            size.setHeight(box[3] - box[1])
        if width or height:
            size.scale(width, height, Qt.KeepAspectRatio)
+        logger = self.oeb.logger
+        logger.info('Rasterizing %r to %dx%d'
+                    % (elem, size.width(), size.height()))
        image = QImage(size, QImage.Format_ARGB32_Premultiplied)
        image.fill(QColor("white").rgb())
        painter = QPainter(image)
@ -60,7 +64,7 @@ class SVGRasterizer(object):
        array = QByteArray()
        buffer = QBuffer(array)
        buffer.open(QIODevice.WriteOnly)
-        image.save(buffer, 'PNG')
+        image.save(buffer, format)
        return str(array)

    def dataize_manifest(self):
@ -113,11 +117,7 @@ class SVGRasterizer(object):

    def rasterize_inline(self, elem, style, item):
        width = style['width']
-        if width == 'auto':
-            width = self.profile.width
        height = style['height']
-        if height == 'auto':
-            height = self.profile.height
        width = (width / 72) * self.profile.dpi
        height = (height / 72) * self.profile.dpi
        elem = self.dataize_svg(item, elem)
@ -134,11 +134,7 @@ class SVGRasterizer(object):

    def rasterize_external(self, elem, style, item, svgitem):
        width = style['width']
-        if width == 'auto':
-            width = self.profile.width
        height = style['height']
-        if height == 'auto':
-            height = self.profile.height
        width = (width / 72) * self.profile.dpi
        height = (height / 72) * self.profile.dpi
        data = QByteArray(str(svgitem))
@ -168,11 +164,16 @@ class SVGRasterizer(object):
            manifest.add(id, href, PNG_MIME, data=data)
            self.images[key] = href
        elem.tag = XHTML('img')
+        for attr in elem.attrib:
+            if attr not in KEEP_ATTRS:
+                del elem.attrib[attr]
        elem.attrib['src'] = item.relhref(href)
-        elem.text = None
+        if elem.text:
+            elem.attrib['alt'] = elem.text
+            elem.text = None
        for child in elem:
            elem.remove(child)
-            
+    
    def rasterize_cover(self):
        covers = self.oeb.metadata.cover
        if not covers:
@ -180,9 +181,9 @@ class SVGRasterizer(object):
        cover = self.oeb.manifest.ids[str(covers[0])]
        if not cover.media_type == SVG_MIME:
            return
-        logger = self.oeb.logger
-        logger.info('Rasterizing %r to %dx%d' % (cover.href, 600, 800))
-        data = self.rasterize_svg(cover.data, 600, 800)
+        width = (self.profile.width / 72) * self.profile.dpi
+        height = (self.profile.height / 72) * self.profile.dpi
+        data = self.rasterize_svg(cover.data, width, height)
        href = os.path.splitext(cover.href)[0] + '.png'
        id, href = self.oeb.manifest.generate(cover.id, href)
        self.oeb.manifest.add(id, href, PNG_MIME, data=data)
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -9,6 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import sys
 import os
 from itertools import chain
+from urlparse import urldefrag
 from lxml import etree
 import cssutils
 from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
@ -29,6 +30,11 @@ class ManifestTrimmer(object):
                    used.add(oeb.manifest.hrefs[item.value])
                elif item.value in oeb.manifest.ids:
                    used.add(oeb.manifest.ids[item.value])
+        for ref in oeb.guide.values():
+            path, _ = urldefrag(ref.href)
+            if path in oeb.manifest.hrefs:
+                used.add(oeb.manifest.hrefs[path])
+        # TOC items are required to be in the spine
        for item in oeb.spine:
            used.add(item)
        unchecked = used
@ -56,7 +62,6 @@ class ManifestTrimmer(object):
                    cssutils.replaceUrls(sheet, replacer)
            used.update(new)
            unchecked = new
-        # All guide and TOC items are required to be in the spine
        for item in oeb.manifest.values():
            if item not in used:
                oeb.logger.info('Trimming %r from manifest' % item.href)
--- a/src/calibre/ebooks/pdf/pdftrim.py
+++ b/src/calibre/ebooks/pdf/pdftrim.py
@ -29,7 +29,7 @@ def config(defaults=None):
    c.add_opt('top_right_y', [ '-w', '--righty'], default=default_crop,
          help=_('Number of pixels to crop from the right most y (default is %d)')%default_crop )
    c.add_opt('bounding', ['-b', '--bounding'],
-          help=_('A file generated by ghostscript which allows each page to be individually cropped'))
+          help=_('A file generated by ghostscript which allows each page to be individually cropped [gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox > bounding] '))
    return c


@ -38,14 +38,28 @@ def option_parser():
    return c.option_parser(usage=_('''\
 	%prog [options] file.pdf

-	Crop a pdf. 
+	Crops a pdf. 
 	'''))

 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
-    source = os.path.abspath(args[1])
-    input_pdf = PdfFileReader(file(source, "rb"))
+    try:
+        source = os.path.abspath(args[1])
+        input_pdf = PdfFileReader(file(source, "rb"))
+    except:
+        print "Unable to read input"
+        return 2
+    title   = _('Unknown')
+    author  = _('Unknown')
+    try:
+        info = input_pdf.getDocumentInfo()
+        if info.title:
+            title   = info.title
+        if info.author:
+            author  = info.author
+    except:
+        pass
    if opts.bounding != None:
        try:
            bounding = open( opts.bounding , 'r' )
@ -53,7 +67,7 @@ def main(args=sys.argv):
        except:
            print 'Error opening %s' % opts.bounding 
            return 1
-    output_pdf = PdfFileWriter()
+    output_pdf = PdfFileWriter(title=title,author=author)
    for page_number in range (0, input_pdf.getNumPages() ):
        page = input_pdf.getPage(page_number)
        if opts.bounding != None:
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -136,16 +136,18 @@ class DeviceManager(Thread):
        return self.create_job(self._sync_booklists, done, args=[booklists],
                        description=_('Send metadata to device'))
    
-    def _upload_books(self, files, names, on_card=False):
+    def _upload_books(self, files, names, on_card=False, metadata=None):
        '''Upload books to device: '''
-        return self.device.upload_books(files, names, on_card, end_session=False)
+        return self.device.upload_books(files, names, on_card, 
+                                        metadata=metadata, end_session=False)
        
-    def upload_books(self, done, files, names, on_card=False, titles=None):
+    def upload_books(self, done, files, names, on_card=False, titles=None, 
+                     metadata=None):
        desc = _('Upload %d books to device')%len(names)
        if titles:
            desc += u':' + u', '.join(titles)
        return self.create_job(self._upload_books, done, args=[files, names], 
-                kwargs={'on_card':on_card}, description=desc)
+                kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
        
    def add_books_to_metadata(self, locations, metadata, booklists):
        self.device.add_books_to_metadata(locations, metadata, booklists)
--- a/src/calibre/gui2/dialogs/job_view.ui
+++ b/src/calibre/gui2/dialogs/job_view.ui
@ -28,9 +28,6 @@
     <property name="readOnly" >
      <bool>true</bool>
     </property>
-     <property name="maximumBlockCount" >
-      <number>400</number>
-     </property>
    </widget>
   </item>
  </layout>
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -75,7 +75,13 @@ def save_recipes(recipes):
    
 def load_recipes():
    config.refresh()
-    return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])]
+    recipes = []
+    for r in config.get('scheduled_recipes', []):
+        r = Recipe().unpickle(r)
+        if r.builtin and not str(r.id).startswith('recipe_'):
+            continue
+        recipes.append(r)
+    return recipes

 class RecipeModel(QAbstractListModel, SearchQueryParser):
    
@ -438,7 +444,7 @@ class Scheduler(QObject):
            self.lock.unlock()

 def main(args=sys.argv):
-    app = QApplication([])
+    QApplication([])
    from calibre.library.database2 import LibraryDatabase2
    d = SchedulerDialog(LibraryDatabase2('/home/kovid/documents/library'))
    d.exec_()
--- a/src/calibre/gui2/images/news/telepolis.png
+++ b/src/calibre/gui2/images/news/telepolis.png
--- a/src/calibre/gui2/images/news/tomshardware_de.png
+++ b/src/calibre/gui2/images/news/tomshardware_de.png
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -385,13 +385,35 @@ class BooksModel(QAbstractTableModel):
            metadata.append(mi)
        return metadata

+    def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
+        ans = []
+        for id in ids:
+            format = None
+            fmts = self.db.formats(id, index_is_id=True)
+            if not fmts:
+                fmts = ''
+            available_formats = set(fmts.lower().split(','))
+            for f in all_formats:
+                if f.lower() in available_formats:
+                    format = f.lower()
+                    break
+            if format is None:
+                ans.append(format)
+            else:
+                f = self.db.format(id, format, index_is_id=True, as_file=True, 
+                                   mode=mode)
+                ans.append(f)
+        return ans
+                     
+            
+    
    def get_preferred_formats(self, rows, formats, paths=False):
        ans = []
        for row in (row.row() for row in rows):
            format = None
            fmts = self.db.formats(row)
            if not fmts:
-                return []
+                fmts = ''
            db_formats = set(fmts.lower().split(','))
            available_formats = set([f.lower() for f in formats]) 
            u = available_formats.intersection(db_formats)
--- a/src/calibre/gui2/lrf_renderer/document.py
+++ b/src/calibre/gui2/lrf_renderer/document.py
@ -406,7 +406,8 @@ class Document(QGraphicsScene):
        for font in lrf.font_map:
            fdata = QByteArray(lrf.font_map[font].data)
            id = QFontDatabase.addApplicationFontFromData(fdata)
-            font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0]
+            if id != -1:
+                font_map[font] = [str(i) for i in QFontDatabase.applicationFontFamilies(id)][0]
        
        if load_substitutions:
            from calibre.ebooks.lrf.fonts.liberation import LiberationMono_BoldItalic
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -745,8 +745,8 @@ class Main(MainWindow, Ui_MainWindow):
        '''
        titles = [i['title'] for i in metadata]
        job = self.device_manager.upload_books(Dispatcher(self.books_uploaded),
-                                        files, names, on_card=on_card,
-                                        titles=titles
+                                        files, names, on_card=on_card, 
+                                        metadata=metadata, titles=titles
                                        )
        self.upload_memory[job] = (metadata, on_card, memory, files)
    
@ -887,8 +887,12 @@ class Main(MainWindow, Ui_MainWindow):
        if self.device_connected:
            ids = list(dynamic.get('news_to_be_synced', set([])))
            ids = [id for id in ids if self.library_view.model().db.has_id(id)]
-            files = [self.library_view.model().db.format(id, prefs['output_format'], index_is_id=True, as_file=True) for id in ids]
+            files = self.library_view.model().get_preferred_formats_from_ids(
+                                ids, self.device_manager.device_class.FORMATS)
            files = [f for f in files if f is not None]
+            if not files:
+                dynamic.set('news_to_be_synced', set([]))
+                return
            metadata = self.library_view.model().get_metadata(ids, rows_are_ids=True)
            names = []
            for mi in metadata: 
@ -919,7 +923,7 @@ class Main(MainWindow, Ui_MainWindow):
            if cdata:
                mi['cover'] = self.cover_to_thumbnail(cdata)
        metadata = iter(metadata)
-        _files = self.library_view.model().get_preferred_formats(rows,
+        _files   = self.library_view.model().get_preferred_formats(rows,
                                    self.device_manager.device_class.FORMATS, paths=True)
        files = [getattr(f, 'name', None) for f in _files]
        bad, good, gf, names = [], [], [], []
@ -1479,8 +1483,9 @@ in which you want to store your books files. Any existing books will be automati
        return True

    
-    def shutdown(self):
-        self.write_settings()
+    def shutdown(self, write_settings=True):
+        if write_settings:
+            self.write_settings()
        self.job_manager.terminate_all_jobs()
        self.device_manager.keep_going = False
        self.cover_cache.stop()
@ -1500,6 +1505,7 @@ in which you want to store your books files. Any existing books will be automati

    
    def closeEvent(self, e):
+        self.write_settings()
        if self.system_tray_icon.isVisible():
            if not dynamic['systray_msg'] and not isosx:
                info_dialog(self, 'calibre', 'calibre '+_('will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray.')).exec_()
@ -1509,7 +1515,7 @@ in which you want to store your books files. Any existing books will be automati
        else:
            if self.confirm_quit():
                try:
-                    self.shutdown()
+                    self.shutdown(write_settings=False)
                except:
                    pass
                e.accept()
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -1551,9 +1551,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
            


-    def has_book(self, mi):
-        return bool(self.conn.get('SELECT id FROM books where title=?', (mi.title,), all=False))
-
    def has_id(self, id):
        return self.conn.get('SELECT id FROM books where id=?', (id,), all=False) is not None

--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -217,7 +217,11 @@ class ResultCache(SearchQueryParser):
        return self.index(id)
    
    def has_id(self, id):
-        return self._data[id] is not None
+        try:
+            return self._data[id] is not None
+        except IndexError:
+            pass
+        return False
    
    def refresh_ids(self, conn, ids):
        for id in ids:
@ -557,7 +561,15 @@ class LibraryDatabase2(LibraryDatabase):
                img.loadFromData(f.read())
                return img
            return f if as_file else f.read()
-        
+    
+    def has_book(self, mi):
+        title = mi.title
+        if title:
+            if not isinstance(title, unicode):
+                title = title.decode(preferred_encoding, 'replace')
+            return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
+        return False
+    
    def has_cover(self, index, index_is_id=False):
        id = index if  index_is_id else self.id(index)
        path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
--- a/src/calibre/library/static/gui.js
+++ b/src/calibre/library/static/gui.js
@ -7,8 +7,8 @@ var column_titles = {
    'rating'   : 'Rating',
    'date'     : 'Date',
    'tags'     : 'Tags',
-    'series'   : 'Series',
-}
+    'series'   : 'Series'
+};

 String.prototype.format = function() {
    var pattern = /\{\d+\}/g;
@ -47,7 +47,7 @@ function render_book(book) {
    // Render title cell
    var title = '<i>{0}</i>'.format(book.attr("title")) + '<br /><span class="subtitle">';
    var id    = book.attr("id");
-    var comments = $.trim(book.text()).replace(/\n\n/, '<br/>'); 
+    var comments = $.trim(book.text()).replace(/\n\n/, '<br/>');
    var formats = new Array();
    var size = (parseFloat(book.attr('size'))/(1024*1024)).toFixed(1);
    var tags = book.attr('tags').replace(/,/g, ', ');
@ -70,22 +70,22 @@ function render_book(book) {
        authors += jQuery.trim(_authors[i]).replace(/ /g, '&nbsp;')+'<br />';
    }
    if (authors) { authors = authors.slice(0, authors.length-6); }
-    
+
    // Render rating cell
    var _rating = parseFloat(book.attr('rating'))/2.;
    var rating = '';
    for (i = 0; i < _rating; i++) { rating += '&#9733;'}
-    
+
    // Render date cell
    var _date = Date.parseExact(book.attr('timestamp'), 'yyyy/MM/dd HH:mm:ss');
    var date = _date.toString('d MMM yyyy').replace(/ /g, '&nbsp;');
-    
+
    // Render series cell
    var series = book.attr("series")
    if (series) {
        series += '&nbsp;[{0}]'.format(book.attr('series_index'));
    }
-    
+
    var cells = {
        'title'   : title,
        'authors' : authors,
@ -93,12 +93,12 @@ function render_book(book) {
        'date'    : date,
        'series'  : series
    };
-    
+
    var row = '';
    for (i = 0; i < cmap.length; i++) {
        row += '<td class="{0}">{1}</td>'.format(cmap[i], cells[cmap[i]]);
    }
-    return '<tr id="{0}">{1}</tr>'.format(id, row);    
+    return '<tr id="{0}">{1}</tr>'.format(id, row);
 }

 function fetch_library_books(start, num, timeout, sort, order, search) {
@ -112,15 +112,15 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
    last_search = search;
    last_sort = sort;
    last_sort_order = order;
-    
+
    if (current_library_request != null) {
        current_library_request.abort();
        current_library_request = null;
    }
-    
+
    $('#cover_pane').css('visibility', 'hidden');
    $('#loading').css('visibility', 'visible');
-    
+
    current_library_request = $.ajax({
      type: "GET",
      url: "library",
@ -128,18 +128,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
      cache: false,
      timeout: timeout, //milliseconds
      dataType: "xml",
-      
+
      error : function(XMLHttpRequest, textStatus, errorThrown) {
-          alert('Error: '+textStatus+'\n\n'+errorThrown);       
+          alert('Error: '+textStatus+'\n\n'+errorThrown);
      },
-      
+
      success : function(xml, textStatus) {
          var library = $(xml).find('library');
          total = parseInt(library.attr('total'));
          var num   = parseInt(library.attr('num'));
          var start = parseInt(library.attr('start'));
          update_count_bar(start, num, total);
-          var display = ''; 
+          var display = '';
          library.find('book').each( function() {
              var book = $(this);
              var row = render_book(book);
@ -170,18 +170,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) {
                  $('#cover_pane').css('visibility', 'visible');
              }
          });
-          
-          
+
+
          layout();
          $('#book_list tbody tr:even()').css('background-color', '#eeeeee');
      },
-      
+
      complete : function(XMLHttpRequest, textStatus) {
          current_library_request = null;
          document.getElementById('main').scrollTop = 0;
          $('#loading').css('visibility', 'hidden');
      }
-    
+
    });

 }
@ -196,7 +196,7 @@ function update_count_bar(start, num, total) {
    left.css('opacity', (start <= 0) ? 0.3 : 1);
    var right = cb.find('#right');
    right.css('opacity', (start + num >= total) ? 0.3 : 1);
-    
+
 }

 function setup_count_bar() {
@ -205,7 +205,7 @@ function setup_count_bar() {
            fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
        }
    });
-    
+
    $('#count_bar * img:eq(1)').click(function(){
        if (last_start > 0) {
            var new_start = last_start - last_num;
@ -215,14 +215,14 @@ function setup_count_bar() {
            fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
        }
    });
-    
+
    $('#count_bar * img:eq(2)').click(function(){
        if (last_start + last_num < total) {
            var new_start = last_start + last_num;
            fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
        }
    });
-    
+
    $('#count_bar * img:eq(3)').click(function(){
        if (total - last_num > 0) {
            fetch_library_books(total - last_num, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search);
@ -234,7 +234,7 @@ function setup_count_bar() {

 function search() {
    var search = $.trim($('#search_box * #s').val());
-    fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, 
+    fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT,
                        last_sort, last_sort_order, search);
 }

@ -245,11 +245,11 @@ function setup_sorting() {
    $('table#book_list  thead tr td').mouseover(function() {
        this.style.backgroundColor = "#fff2a8";
    });
-    
+
    $('table#book_list  thead tr td').mouseout(function() {
        this.style.backgroundColor = "inherit";
    });
-    
+
    for (i = 0; i < cmap.length; i++) {
        $('table#book_list span#{0}_sort'.format(cmap[i])).parent().click(function() {
            var sort_indicator = $($(this).find('span'));
@ -258,7 +258,7 @@ function setup_sorting() {
            var col = id.slice(0, id.indexOf("_"));
            var order = 'ascending';
            var html = '↑';
-            
+
            if (sort_indicator.html() == '↑') {
                order = 'descending'; html = '↓';
            }
@ -291,13 +291,13 @@ function layout() {
 $(function() {
 	// document is ready
    create_table_headers();
-    
+
    // Setup widgets
    setup_sorting();
    setup_count_bar();
    $('#search_box * #s').val('');
    $(window).resize(layout);
-    
+
    $($('#book_list * span#date_sort').parent()).click();

 });
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -48,12 +48,14 @@ entry_points = {
                             'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
                             'any2epub  = calibre.ebooks.epub.from_any:main',
                             'any2lit   = calibre.ebooks.lit.from_any:main',
+                             'any2mobi  = calibre.ebooks.mobi.from_any:main',
                             'lrf2lrs   = calibre.ebooks.lrf.lrfparser:main',
                             'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
                             'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
                             'isbndb    = calibre.ebooks.metadata.isbndb:main',
                             'librarything = calibre.ebooks.metadata.library_thing:main',
                             'mobi2oeb  = calibre.ebooks.mobi.reader:main',
+                             'oeb2mobi  = calibre.ebooks.mobi.writer:main',
                             'lrf2html  = calibre.ebooks.lrf.html.convert_to:main',
                             'lit2oeb   = calibre.ebooks.lit.reader:main',
                             'oeb2lit   = calibre.ebooks.lit.writer:main',
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -102,7 +102,7 @@ Device Integration

 What devices does |app| support?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
+At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.

 I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/parallel.py
+++ b/src/calibre/parallel.py
@ -286,7 +286,7 @@ def write(socket, msg, timeout=5):
 def read(socket, timeout=5):
    '''
    Read a message from `socket`. The message must have been sent with the :function:`write`
-    function. Raises a `RuntimeError` if the message is corrpted. Can return an
+    function. Raises a `RuntimeError` if the message is corrupted. Can return an
    empty string.
    '''
    if isworker:
@ -299,7 +299,12 @@ def read(socket, timeout=5):
            if not msg:
                break
            if length is None:
-                length, msg = int(msg[:12]), msg[12:]
+                try:
+                    length, msg = int(msg[:12]), msg[12:]
+                except ValueError:
+                    if DEBUG:
+                        print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg
+                    return ''
            buf.write(msg)
            if buf.tell() >= length:
                break
--- a/src/calibre/trac/donations/server.py
+++ b/src/calibre/trac/donations/server.py
@ -217,8 +217,7 @@ class Server(object):
                    pos = pos.replace(month = 1)
                else:
                    pos = pos.replace(month = pos.month + 1)
-
-        _months = list(months(self.earliest, self.latest))[:-1][:12]
+        _months = list(months(self.earliest, self.latest))[:-1][-12:]
        _months = [range_for_month(*m) for m in _months]
        _months = [self.get_slice(*m) for m in _months]
        x = [m.min for m in _months]
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@ -35,7 +35,7 @@ class Distribution(object):
        ('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
        ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
        ('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
-        ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-beautifulsoup'),
+        ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
        ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
        ]
    
@ -205,23 +205,7 @@ select Install.</li>
 <ol>
 <li>Before trying to use the command line tools, you must run the app at least once. This will ask you for you password and then setup the symbolic links for the command line tools.</li>
 <li>The app cannot be run from within the dmg. You must drag it to a folder on your filesystem (The Desktop, Applications, wherever).</li> 
-<li>In order for the conversion of RTF to LRF to support WMF images (common in older RTF files) you need to install ImageMagick.</li>
-<li>In order for localization of the user interface in your language you must create the file <code>~/.MacOSX/environment.plist</code> as shown below:
-<pre class="wiki">
-&lt;?xml version="1.0" encoding="UTF-8"?&gt;
-&lt;!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"&gt;
-&lt;plist version="1.0"&gt;
-&lt;dict&gt;
-        &lt;key&gt;LANG&lt;/key&gt;
-        &lt;string&gt;de_DE&lt;/string&gt;
-&lt;/dict&gt;
-&lt;/plist&gt;
-</pre>
-The example above is for the German language. Substitute the language code you need. 
-After creating the file you need to log out and log in again for the changes to become
-active. Of course, this will only work if calibre has been translated for your language.
-If not, head over to <a href="http://calibre.kovidgoyal.net/wiki/Development#Translations">Translations</a> to see how you can translate it.
-</li>
+<li>In order for localization of the user interface in your language, select your language in the configuration dialog (by clicking the hammer icon next to the search bar) and select your language.</li>
 </ol>
 '''))
        return 'binary.html', data, None
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/zh.po
+++ b/src/calibre/translations/zh.po
--- a/src/calibre/utils/pyparsing.py
+++ b/src/calibre/utils/pyparsing.py
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -338,7 +338,7 @@ class ZipInfo (object):
        if isinstance(self.filename, unicode):
            try:
                return self.filename.encode('ascii'), self.flag_bits
-            except UnicodeEncodeError:
+            except:
                return self.filename.encode('utf-8'), self.flag_bits | 0x800
        else:
            return self.filename, self.flag_bits
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -765,6 +765,8 @@ class BasicNewsRecipe(object, LoggingInterface):
            self.log_debug(traceback.format_exc())
        if cu is not None:
            ext = cu.rpartition('.')[-1]
+            if '?' in ext:
+                ext = ''
            ext = ext.lower() if ext else 'jpg'
            self.report_progress(1, _('Downloading cover from %s')%cu)
            cpath = os.path.join(self.output_dir, 'cover.'+ext)
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
           'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
           'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
           'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
-           'joelonsoftware',
+           'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
+           
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_chr_mon.py
+++ b/src/calibre/web/feeds/recipes/recipe_chr_mon.py
@ -42,3 +42,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                    feeds[-1][1].append(art)
        return feeds
      
+    def postprocess_html(self, soup, first_fetch):
+        html = soup.find('html')
+        if html is None:
+            return soup
+        html.extract()
+        return html
--- a/src/calibre/web/feeds/recipes/recipe_common_dreams.py
+++ b/src/calibre/web/feeds/recipes/recipe_common_dreams.py
@ -0,0 +1,16 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CommonDreams(BasicNewsRecipe):
+    title          = u'Common Dreams'
+    description    = u'Progressive news and views'
+    __author__     = u'XanthanGum'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    
+    feeds          = [
+                       (u'Common Dreams Headlines', 
+                       u'http://www.commondreams.org/feed/headlines_rss'), 
+                       (u'Common Dreams Views', u'http://www.commondreams.org/feed/views_rss'), 
+                       (u'Common Dreams Newswire', u'http://www.commondreams.org/feed/newswire_rss')
+                       ]
--- a/src/calibre/web/feeds/recipes/recipe_economist.py
+++ b/src/calibre/web/feeds/recipes/recipe_economist.py
@ -49,8 +49,10 @@ class Economist(BasicNewsRecipe):
                if not index_started:
                    continue
                text = string.capwords(text)
-                feeds[text] = []
-                ans.append(text)
+                if text not in feeds.keys():
+                    feeds[text] = []
+                if text not in ans:
+                    ans.append(text)
                key = text
                continue
            if key is None:
--- a/src/calibre/web/feeds/recipes/recipe_nin.py
+++ b/src/calibre/web/feeds/recipes/recipe_nin.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+nin.co.yu
+'''
+
+import re, urllib
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Nin(BasicNewsRecipe):
+    title                  = 'NIN online'
+    __author__             = 'Darko Miletic'
+    description            = 'Nedeljne informativne novine'
+    no_stylesheets         = True
+    oldest_article         = 15
+    simultaneous_downloads = 1
+    delay                  = 1
+    encoding               = 'utf8'
+    needs_subscription     = True
+    PREFIX                 = 'http://www.nin.co.yu'
+    INDEX                  = PREFIX + '/?change_lang=ls'
+    LOGIN                  = PREFIX + '/?logout=true'
+    html2lrf_options = [
+                          '--comment'       , description
+                        , '--category'      , 'news, politics, Serbia'
+                        , '--publisher'     , 'NIN'
+                        ]
+                          
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open(self.INDEX)
+        if self.username is not None and self.password is not None:
+            data = urllib.urlencode({ 'login_name':self.username
+                                     ,'login_password':self.password
+                                     ,'imageField.x':'32'
+                                     ,'imageField.y':'15'                                 
+                                   })
+            br.open(self.LOGIN,data)
+        return br
+
+    keep_only_tags    =[dict(name='td', attrs={'width':'520'})]
+    remove_tags_after =dict(name='html')
+    feeds             =[(u'NIN', u'http://www.nin.co.yu/misc/rss.php?feed=RSS2.0')]
+    
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.INDEX)
+        link_item = soup.find('img',attrs={'width':'100','height':'137','border':'0'})
+        if link_item:
+           cover_url = self.PREFIX + link_item['src']
+        return cover_url
--- a/src/calibre/web/feeds/recipes/recipe_telepolis.py
+++ b/src/calibre/web/feeds/recipes/recipe_telepolis.py
@ -0,0 +1,34 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.heise.de/tp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Telepolis(BasicNewsRecipe):
+    title                 = 'Telepolis'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Germany in German'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    
+    html2lrf_options = [  '--comment'       , description
+                        , '--category'      , 'blog,news'
+                       ]
+
+    keep_only_tags = [
+                       dict(name='table', attrs={'class':'inhalt-table'})
+                      ,dict(name='table', attrs={'class':'blogtable'   })
+                     ]
+    remove_tags = [
+                     dict(name='table', attrs={'class':'img'    })
+                    ,dict(name='img'  , attrs={'src':'/tp/r4/icons/inline/extlink.gif'})
+                  ]
+
+    feeds       = [(u'Telepolis Newsfeed', u'http://www.heise.de/tp/news.rdf')]
--- a/src/calibre/web/feeds/recipes/recipe_times_online.py
+++ b/src/calibre/web/feeds/recipes/recipe_times_online.py
@ -33,6 +33,7 @@ class TimesOnline(BasicNewsRecipe):
                        ('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'),
                        ('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'),
                        ('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'),
+                        ('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'),
                     ]

    def print_version(self, url):
--- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py
+++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py
@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 tomshardware.com
 '''

-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class Tomshardware(BasicNewsRecipe):
@ -50,7 +49,7 @@ class Tomshardware(BasicNewsRecipe):
        rmain, rsep, article_id = main.rpartition(',')
        tmain, tsep, trest = rmain.rpartition('/reviews/')
        if tsep:
-           return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
+            return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
        return 'http://www.tomshardware.com/news_print.php?p1=' + article_id        

    def preprocess_html(self, soup):
--- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
+++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py
@ -0,0 +1,54 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch tomshardware.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class TomsHardwareDe(BasicNewsRecipe):
+    
+    title = 'Tom\'s Hardware German'
+    description = 'Computer news in german'
+    __author__ = 'Oliver Niesner'
+    use_embedded_content   = False
+    timefmt = ' [%d %b %Y]'
+    max_articles_per_feed = 50
+    no_stylesheets = True
+    encoding = 'utf-8'
+
+    #preprocess_regexps = \
+#	[(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+#		[
+#		(r'<84>', lambda match: ''),
+#		(r'<93>', lambda match: ''),
+#		]
+#	]
+    
+    remove_tags = [dict(id='outside-advert'),
+		   dict(id='advertRightWhite'),
+		   dict(id='header-advert'),
+		   dict(id='header-banner'),
+		   dict(id='header-menu'),
+		   dict(id='header-top'),
+		   dict(id='header-tools'),
+		   dict(id='nbComment'),
+		   dict(id='internalSidebar'),
+		   dict(id='header-news-infos'),
+		   dict(id='breadcrumbs'),
+		   dict(id=''),
+		   dict(name='div', attrs={'class':'pyjama'}),
+		   dict(name='href', attrs={'class':'comment'}),
+		   dict(name='div', attrs={'class':'greyBoxR clearfix'}),
+		   dict(name='div', attrs={'class':'greyBoxL clearfix'}),
+		   dict(name='div', attrs={'class':'greyBox clearfix'}),
+		   dict(id='')]
+    #remove_tags_before = [dict(id='header-news-title')]
+    remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
+    #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
+    
+    feeds =  [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] 
+    
--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
@ -55,7 +55,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
 # This class supports writing PDF files out, given pages produced by another
 # class (typically {@link #PdfFileReader PdfFileReader}).
 class PdfFileWriter(object):
-    def __init__(self):
+    def __init__(self,title=u"Unknown",author=u"Unknown"):
        self._header = "%PDF-1.3"
        self._objects = []  # array of indirect objects

@ -71,7 +71,9 @@ class PdfFileWriter(object):
        # info object
        info = DictionaryObject()
        info.update({
-                NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/")
+                NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/"),
+                NameObject("/Author"): createStringObject(author),
+                NameObject("/Title"): createStringObject(title),
                })
        self._info = self._addObject(info)