Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-25 09:24:36 +00:00 · 2011-02-25 09:24:36 +00:00 · 0ace21eb7e
commit 0ace21eb7e
parent 2b24a487fa c626b60a29
29 changed files with 864 additions and 269 deletions
--- a/resources/recipes/espn.recipe
+++ b/resources/recipes/espn.recipe
@ -41,7 +41,8 @@ class ESPN(BasicNewsRecipe):
                '''


-    feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
+    feeds = [
+            ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
             'http://sports.espn.go.com/espn/rss/nfl/news',
             'http://sports.espn.go.com/espn/rss/nba/news',
             'http://sports.espn.go.com/espn/rss/mlb/news',
@ -107,10 +108,11 @@ class ESPN(BasicNewsRecipe):
        if match and 'soccernet'  not in url and 'bassmaster' not in url:
            return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
        else:
-            if match and 'soccernet' in url:
-                splitlist = url.split("&", 5)
-                newurl =  'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] )
-                return newurl
+            if 'soccernet' in url:
+                match = re.search(r'/id/(\d+)/', url)
+                if match:
+                    return \
+                        'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
            #else:
            #    if 'bassmaster' in url:
            #        return url
--- a/resources/recipes/flickr.recipe
+++ b/resources/recipes/flickr.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__author__    = 'Ricardo Jurado'
+__copyright__ = 'Ricardo Jurado'
+__version__     = 'v0.1'
+__date__        = '22 February 2011'
+
+'''
+http://blog.flickr.net/
+'''
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1297031650(BasicNewsRecipe):
+
+    title          = u'Flickr Blog'
+    masthead_url   = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    cover_url      = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    publisher      = u''
+
+    __author__            = 'Ricardo Jurado'
+    description           = 'Pictures Blog'
+    category              = 'Blog,Pictures'
+
+    oldest_article = 120
+    max_articles_per_feed = 10
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'UTF-8'
+    remove_javascript = True
+    language = 'en'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
+                               .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                               .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                                 """
+
+    keep_only_tags = [
+                 dict(name='div', attrs={'class':'entry'})
+                 ]
+
+    feeds          = [
+                     (u'BLOG', u'http://feeds.feedburner.com/Flickrblog'),
+                     #(u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
+                     ]
--- a/resources/recipes/flickr_es.recipe
+++ b/resources/recipes/flickr_es.recipe
@ -0,0 +1,47 @@
+__license__   = 'GPL v3'
+__author__    = 'Ricardo Jurado'
+__copyright__ = 'Ricardo Jurado'
+__version__     = 'v0.1'
+__date__        = '22 February 2011'
+
+'''
+http://blog.flickr.net/
+'''
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1297031650(BasicNewsRecipe):
+
+    title          = u'Flickr Blog'
+    masthead_url   = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    cover_url      = 'http://flickrtheblog.files.wordpress.com/2008/11/flickblog_logo.gif'
+    publisher      = u''
+
+    __author__            = 'Ricardo Jurado'
+    description           = 'Pictures Blog'
+    category              = 'Blog,Pictures'
+
+    oldest_article = 120
+    max_articles_per_feed = 10
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'UTF-8'
+    remove_javascript = True
+    language = 'es'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h2{font-family: sans-serif; font-size:130%; font-weight:bold; text-align: justify; }
+                               .published{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                               .posted{font-family:Arial,Helvetica,sans-serif; font-size:80%; }
+                                 """
+
+    keep_only_tags = [
+                 dict(name='div', attrs={'class':'entry'})
+                 ]
+
+    feeds          = [
+                     (u'BLOG', u'http://blog.flickr.net/es/feed/atom/')
+                     ]
--- a/resources/recipes/gizmodo.recipe
+++ b/resources/recipes/gizmodo.recipe
@ -17,10 +17,9 @@ class Gizmodo(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
-    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '

    conversion_options = {
                          'comment'   : description
@ -29,13 +28,12 @@ class Gizmodo(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width','height']
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
-    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')]
+
+    remove_tags = [
+            {'class': 'feedflare'},
+    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/lifehacker.recipe
+++ b/resources/recipes/lifehacker.recipe
@ -16,15 +16,9 @@ class Lifehacker(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
-    extra_css             = '''
-	body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
-	img{margin-bottom: 1em}
-	h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
-	h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
-	              '''
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
@ -32,20 +26,12 @@ class Lifehacker(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width', 'height', 'style']
-    remove_tags_before = dict(name='h1')
-    keep_only_tags = [dict(id='container')]
-    remove_tags_after  = dict(attrs={'class':'post-body'})
    remove_tags = [
-            dict(id="sharemenu"),
-            {'class': 'related'},
+            {'class': 'feedflare'},
    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
+    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)

-    def print_version(self, url):
-        return url.replace('#!', '?_escaped_fragment_=')
-
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@ -391,11 +391,6 @@ noembed, param, link {
   display: none;
 }

-/* Page breaks at body tags, to help out with LIT-generation */
-body {
-  page-break-before: always;
-}
-
 /* Explicit line-breaks are blocks, sure... */
 br {
  display: block;
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import sys, os, shutil, platform, subprocess, stat, py_compile, glob, \
-        textwrap, tarfile
+        textwrap, tarfile, re

 from setup import Command, modules, basenames, functions, __version__, \
    __appname__
@ -19,7 +19,7 @@ SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize',

 QTDIR          = '/usr/lib/qt4'
 QTDLLS         = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus')
-
+MAGICK_PREFIX = '/usr'
 binary_includes = [
                '/usr/bin/pdftohtml',
                '/usr/lib/libwmflite-0.2.so.7',
@ -41,8 +41,8 @@ binary_includes = [
                '/usr/lib/libgthread-2.0.so.0',
                '/usr/lib/libpng14.so.14',
                '/usr/lib/libexslt.so.0',
-                '/usr/lib/libMagickWand.so.4',
-                '/usr/lib/libMagickCore.so.4',
+                MAGICK_PREFIX+'/lib/libMagickWand.so.4',
+                MAGICK_PREFIX+'/lib/libMagickCore.so.4',
                '/usr/lib/libgcrypt.so.11',
                '/usr/lib/libgpg-error.so.0',
                '/usr/lib/libphonon.so.4',
@ -116,9 +116,25 @@ class LinuxFreeze(Command):
            if x not in ('designer', 'sqldrivers', 'codecs'):
                shutil.copytree(y, self.j(dest, x))

-        im = glob.glob('/usr/lib/ImageMagick-*')[0]
-        dest = self.j(self.lib_dir, 'ImageMagick')
+        im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1]
+        self.magick_base = os.path.basename(im)
+        dest = self.j(self.lib_dir, self.magick_base)
        shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a'))
+        from calibre import walk
+        for x in walk(dest):
+            if x.endswith('.la'):
+                raw = open(x).read()
+                raw = re.sub('libdir=.*', '', raw)
+                open(x, 'wb').write(raw)
+
+        dest = self.j(dest, 'config')
+        src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config')
+        for x in glob.glob(src+'/*'):
+            d = self.j(dest, os.path.basename(x))
+            if os.path.isdir(x):
+                shutil.copytree(x, d)
+            else:
+                shutil.copyfile(x, d)

    def compile_mount_helper(self):
        self.info('Compiling mount helper...')
@ -278,9 +294,10 @@ class LinuxFreeze(Command):
                base=`dirname $path`
                lib=$base/lib
                export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
-                export MAGICK_CONFIGURE_PATH=$lib/ImageMagick/config
-                export MAGICK_CODER_MODULE_PATH=$lib/ImageMagick/modules-Q16/coders
-                export MAGICK_CODER_FILTER_PATH=$lib/ImageMagick/modules-Q16/filters
+                export MAGICK_HOME=$base
+                export MAGICK_CONFIGURE_PATH=$lib/{1}/config
+                export MAGICK_CODER_MODULE_PATH=$lib/{1}/modules-Q16/coders
+                export MAGICK_CODER_FILTER_PATH=$lib/{1}/modules-Q16/filters
                $base/bin/{0} "$@"
                ''')

@ -292,7 +309,7 @@ class LinuxFreeze(Command):
                exe = self.j(self.bin_dir, bname)
                sh = self.j(self.base, bname)
                with open(sh, 'wb') as f:
-                    f.write(launcher.format(bname))
+                    f.write(launcher.format(bname, self.magick_base))
                os.chmod(sh,
                    stat.S_IREAD|stat.S_IEXEC|stat.S_IWRITE|stat.S_IRGRP|stat.S_IXGRP|stat.S_IROTH|stat.S_IXOTH)

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -571,7 +571,7 @@ from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
-        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
+        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
@ -679,7 +679,7 @@ plugins += [
    ELONEX,
    TECLAST_K3,
    NEWSMY,
-    PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH,
+    PICO, SUNSTECH_EB700, ARCHOS7O, SOVOS, STASH, WEXLER,
    IPAPYRUS,
    EDGE,
    SNE,
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -78,9 +78,13 @@ class KOBO(USBMS):
                 else self._main_prefix

        # Determine the firmware version
-        f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r')
-        self.fwversion = f.readline().split(',')[2]
-        f.close()
+        try:
+            with open(self.normalize_path(self._main_prefix + '.kobo/version'),
+                    'rb') as f:
+                self.fwversion = f.readline().split(',')[2]
+        except:
+            self.fwversion = 'unknown'
+
        if self.fwversion != '1.0' and self.fwversion != '1.4':
            self.has_kepubs = True
        debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)
@ -161,7 +165,7 @@ class KOBO(USBMS):
            return changed

        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

@ -234,7 +238,7 @@ class KOBO(USBMS):

        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

@ -511,7 +515,7 @@ class KOBO(USBMS):
        # the last book from the collection the list of books is empty
        # and the removal of the last book would not occur
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
-        
+
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -9,6 +9,8 @@ __docformat__ = 'restructuredtext en'
 import os

 from calibre.devices.usbms.driver import USBMS
+from calibre import prints
+prints

 class PALMPRE(USBMS):

@ -268,5 +270,36 @@ class NEXTBOOK(USBMS):
    EBOOK_DIR_MAIN = ''

    VENDOR_NAME = 'NEXT2'
-    WINDOWS_MAIN_MEM = '1.0.14'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14'
+    SUPPORTS_SUB_DIRS = True
+
+    '''
+    def upload_cover(self, path, filename, metadata, filepath):
+        if metadata.thumbnail and metadata.thumbnail[-1]:
+            path = path.replace('/', os.sep)
+            is_main = path.startswith(self._main_prefix)
+            prefix = None
+            if is_main:
+                prefix = self._main_prefix
+            else:
+                if self._card_a_prefix and \
+                    path.startswith(self._card_a_prefix):
+                    prefix = self._card_a_prefix
+                elif self._card_b_prefix and \
+                        path.startswith(self._card_b_prefix):
+                    prefix = self._card_b_prefix
+            if prefix is None:
+                prints('WARNING: Failed to find prefix for:', filepath)
+                return
+            thumbnail_dir = os.path.join(prefix, '.Cover')
+
+            relpath = os.path.relpath(filepath, prefix)
+            if relpath.startswith('..\\'):
+                relpath = relpath[3:]
+            thumbnail_dir = os.path.join(thumbnail_dir, relpath)
+            if not os.path.exists(thumbnail_dir):
+                os.makedirs(thumbnail_dir)
+            with open(os.path.join(thumbnail_dir, filename+'.jpg'), 'wb') as f:
+                f.write(metadata.thumbnail[-1])
+    '''

--- a/src/calibre/devices/teclast/driver.py
+++ b/src/calibre/devices/teclast/driver.py
@ -104,3 +104,14 @@ class STASH(TECLAST_K3):
    VENDOR_NAME = 'STASH'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'W950'

+class WEXLER(TECLAST_K3):
+
+    name = 'Wexler device interface'
+    gui_name = 'Wexler'
+    description    = _('Communicate with the Wexler reader.')
+
+    FORMATS = ['epub', 'fb2', 'pdf', 'txt']
+
+    VENDOR_NAME = 'WEXLER'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'T7001'
+
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -304,6 +304,10 @@ class ComicInput(InputFormatPlugin):
            help=_('Specify the image size as widthxheight pixels. Normally,'
                ' an image size is automatically calculated from the output '
                'profile, this option overrides it.')),
+        OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
+            help=_('When converting a CBC do not add links to each page to'
+                ' the TOC. Note this only applies if the TOC has more than one'
+                ' section')),
        ])

    recommendations = set([
@ -449,10 +453,11 @@ class ComicInput(InputFormatPlugin):
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
-                for i, x in enumerate(wrappers):
-                    stoc.add_item(href(x), None,
-                            _('Page')+' %d'%(i+1), play_order=po)
-                    po += 1
+                if not opts.dont_add_comic_pages_to_toc:
+                    for i, x in enumerate(wrappers):
+                        stoc.add_item(href(x), None,
+                                _('Page')+' %d'%(i+1), play_order=po)
+                        po += 1
        opf.set_toc(toc)
        m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, 'toc.ncx')
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -984,7 +984,9 @@ OptionRecommendation(name='sr3_replace',
        flattener = CSSFlattener(fbase=fbase, fkey=fkey,
                lineh=line_height,
                untable=self.output_plugin.file_type in ('mobi','lit'),
-                unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
+                unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
+                page_break_on_body=self.output_plugin.file_type in ('mobi',
+                    'lit'))
        flattener(self.oeb, self.opts)
        self.opts.insert_blank_line = oibl
        self.opts.remove_paragraph_spacing = orps
--- a/src/calibre/ebooks/lit/output.py
+++ b/src/calibre/ebooks/lit/output.py
@ -22,7 +22,8 @@ class LITOutput(OutputFormatPlugin):
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        from calibre.ebooks.lit.writer import LitWriter
        from calibre.ebooks.oeb.transforms.split import Split
-        split = Split(split_on_page_breaks=True, max_flow_size=0)
+        split = Split(split_on_page_breaks=True, max_flow_size=0,
+                remove_css_pagebreaks=False)
        split(self.oeb, self.opts)


--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -2256,22 +2256,22 @@ class MobiWriter(object):
        return sectionIndices, sectionParents

    def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
-                sectionArticles = list(section.iter())[1:]
-                # Iterate over the section's articles
+        sectionArticles = list(section.iter())[1:]
+        # Iterate over the section's articles

-                for (j, article) in enumerate(sectionArticles):
-                    # Recompute offset and length for each article
-                    offset, length = self._compute_offset_length(i, article, entries)
-                    if self.opts.verbose > 2 :
-                        self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
+        for (j, article) in enumerate(sectionArticles):
+            # Recompute offset and length for each article
+            offset, length = self._compute_offset_length(i, article, entries)
+            if self.opts.verbose > 2 :
+                self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )

-                    ctoc_map_index = i + j + 1
+            ctoc_map_index = i + j + 1

-                    #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
-                    #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
-                    mySectionParent = sectionParents[sectionIndices[i-1]]
-                    myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
-                    mySectionParent.addArticle( myNewArticle )
+            #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
+            #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
+            mySectionParent = sectionParents[sectionIndices[i-1]]
+            myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
+            mySectionParent.addArticle( myNewArticle )

    def _add_book_chapters(self, myDoc, indxt, indices):
        chapterCount = myDoc.documentStructure.chapterCount()
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -32,6 +32,12 @@ class OEBOutput(OutputFormatPlugin):
            for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
                href, root = results.pop(key, [None, None])
                if root is not None:
+                    if key == OPF_MIME:
+                        try:
+                            self.workaround_nook_cover_bug(root)
+                        except:
+                            self.log.exception('Something went wrong while trying to'
+                                    ' workaround Nook cover bug, ignoring')
                    raw = etree.tostring(root, pretty_print=True,
                            encoding='utf-8', xml_declaration=True)
                    if key == OPF_MIME:
@ -49,3 +55,24 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
+
+    def workaround_nook_cover_bug(self, root): # {{{
+        cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
+                ' @content != "cover"]')
+        if len(cov) == 1:
+            manpath = ('//*[local-name() = "manifest"]/*[local-name() = "item" '
+                ' and @id="%s" and @media-type]')
+            cov = cov[0]
+            covid = cov.get('content')
+            manifest_item = root.xpath(manpath%covid)
+            has_cover = root.xpath(manpath%'cover')
+            if len(manifest_item) == 1 and not has_cover and \
+                    manifest_item[0].get('media-type',
+                            '').startswith('image/'):
+                self.log.warn('The cover image has an id != "cover". Renaming'
+                        ' to work around Nook Color bug')
+                manifest_item = manifest_item[0]
+                manifest_item.set('id', 'cover')
+                cov.set('content', 'cover')
+    # }}}
+
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -100,12 +100,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None):

 class CSSFlattener(object):
    def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
-                 untable=False):
+                 untable=False, page_break_on_body=False):
        self.fbase = fbase
        self.fkey = fkey
        self.lineh = lineh
        self.unfloat = unfloat
        self.untable = untable
+        self.page_break_on_body = page_break_on_body

    @classmethod
    def config(cls, cfg):
@ -139,6 +140,8 @@ class CSSFlattener(object):
            bs.append('margin-right : %fpt'%\
                    float(self.context.margin_right))
            bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
+            if self.page_break_on_body:
+                bs.extend(['page-break-before: always'])
            if self.context.change_justification != 'original':
                bs.append('text-align: '+ self.context.change_justification)
            body.set('style', '; '.join(bs))
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -38,11 +38,12 @@ class SplitError(ValueError):
 class Split(object):

    def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
-            max_flow_size=0):
+            max_flow_size=0, remove_css_pagebreaks=True):
        self.split_on_page_breaks = split_on_page_breaks
        self.page_breaks_xpath = page_breaks_xpath
        self.max_flow_size = max_flow_size
        self.page_break_selectors = None
+        self.remove_css_pagebreaks = remove_css_pagebreaks
        if self.page_breaks_xpath is not None:
            self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]

@ -83,12 +84,16 @@ class Split(object):
                    if before and before != 'avoid':
                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
                            True))
+                        if self.remove_css_pagebreaks:
+                            rule.style.removeProperty('page-break-before')
                except:
                    pass
                try:
                    if after and after != 'avoid':
                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
                            False))
+                        if self.remove_css_pagebreaks:
+                            rule.style.removeProperty('page-break-after')
                except:
                    pass
        page_breaks = set([])
--- a/src/calibre/gui2/convert/comic_input.py
+++ b/src/calibre/gui2/convert/comic_input.py
@ -22,7 +22,8 @@ class PluginWidget(Widget, Ui_Form):
                ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
                    'despeckle', 'no_sort', 'no_process', 'landscape',
                    'dont_sharpen', 'disable_trim', 'wide', 'output_format',
-                    'dont_grayscale', 'comic_image_size']
+                    'dont_grayscale', 'comic_image_size',
+                    'dont_add_comic_pages_to_toc']
                )
        self.db, self.book_id = db, book_id
        for x in get_option('output_format').option.choices:
--- a/src/calibre/gui2/convert/comic_input.ui
+++ b/src/calibre/gui2/convert/comic_input.ui
@ -14,7 +14,7 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_3">
     <property name="text">
      <string>&amp;Number of Colors:</string>
@ -24,7 +24,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QSpinBox" name="opt_colors">
     <property name="minimum">
      <number>8</number>
@ -37,70 +37,70 @@
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QCheckBox" name="opt_dont_normalize">
     <property name="text">
      <string>Disable &amp;normalize</string>
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <widget class="QCheckBox" name="opt_keep_aspect_ratio">
     <property name="text">
      <string>Keep &amp;aspect ratio</string>
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <widget class="QCheckBox" name="opt_dont_sharpen">
     <property name="text">
      <string>Disable &amp;Sharpening</string>
     </property>
    </widget>
   </item>
-   <item row="7" column="0">
+   <item row="8" column="0">
    <widget class="QCheckBox" name="opt_disable_trim">
     <property name="text">
      <string>Disable &amp;Trimming</string>
     </property>
    </widget>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="opt_wide">
     <property name="text">
      <string>&amp;Wide</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="opt_landscape">
     <property name="text">
      <string>&amp;Landscape</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="opt_right2left">
     <property name="text">
      <string>&amp;Right to left</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="opt_no_sort">
     <property name="text">
      <string>Don't so&amp;rt</string>
     </property>
    </widget>
   </item>
-   <item row="12" column="0">
+   <item row="13" column="0">
    <widget class="QCheckBox" name="opt_despeckle">
     <property name="text">
      <string>De&amp;speckle</string>
     </property>
    </widget>
   </item>
-   <item row="14" column="0">
+   <item row="15" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
     </property>
    </widget>
   </item>
-   <item row="13" column="0">
+   <item row="14" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
     </property>
    </widget>
   </item>
-   <item row="13" column="1">
+   <item row="14" column="1">
    <widget class="QComboBox" name="opt_output_format"/>
   </item>
   <item row="1" column="0">
@ -140,7 +140,7 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>Override image  &amp;size:</string>
@ -150,9 +150,16 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="1">
+   <item row="4" column="1">
    <widget class="QLineEdit" name="opt_comic_image_size"/>
   </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_dont_add_comic_pages_to_toc">
+     <property name="text">
+      <string>Don't add links to &amp;pages to the Table of Contents for CBC files</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -2459,7 +2459,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        stream.seek(0)
        mi = get_metadata(stream, format, use_libprs_metadata=False)
        stream.seek(0)
-        mi.series_index = 1.0
+        if not mi.series_index:
+            mi.series_index = 1.0
        mi.tags = [_('News')]
        if arg['add_title_tag']:
            mi.tags += [arg['title']]
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@ -136,7 +136,7 @@ class FeedTemplate(Template):
            head.append(STYLE(style, type='text/css'))
        if extra_css:
            head.append(STYLE(extra_css, type='text/css'))
-        body = BODY(style='page-break-before:always')
+        body = BODY()
        body.append(self.get_navbar(f, feeds))

        div = DIV(
@ -322,7 +322,7 @@ class TouchscreenFeedTemplate(Template):
            head.append(STYLE(style, type='text/css'))
        if extra_css:
            head.append(STYLE(extra_css, type='text/css'))
-        body = BODY(style='page-break-before:always')
+        body = BODY()
        div = DIV(
                top_navbar,
                H2(feed.title, CLASS('feed_title'))
--- a/src/odf/attrconverters.py
+++ b/src/odf/attrconverters.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):

 # Potentially accept color values
 def cnv_color(attribute, arg, element):
+    """ A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
+        rr, gg and bb are 8-bit hexadecimal digits.
+    """
    return str(arg)

 def cnv_configtype(attribute, arg, element):
@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):

 # Understand different date formats
 def cnv_date(attribute, arg, element):
+    """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+        value.
+    """
    return str(arg)

 def cnv_dateTime(attribute, arg, element):
+    """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+        value.
+    """
    return str(arg)

 def cnv_double(attribute, arg, element):
@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
    return str(arg)

 def cnv_family(attribute, arg, element):
+    """ A style family """
    if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
      "graphic", "presentation", "drawing-page", "chart"):
        raise ValueError, "'%s' not allowed" % str(arg)
    return str(arg)

+def __save_prefix(attribute, arg, element):
+    prefix = arg.split(':',1)[0]
+    if prefix == arg:
+        return unicode(arg)
+    namespace = element.get_knownns(prefix)
+    if namespace is None:
+        #raise ValueError, "'%s' is an unknown prefix" % str(prefix)
+        return unicode(arg)
+    p = element.get_nsprefix(namespace)
+    return unicode(arg)
+
+def cnv_formula(attribute, arg, element):
+    """ A string containing a formula. Formulas do not have a predefined syntax, but the string should
+        begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text
+        of the formula. The namespace bound to the prefix determines the syntax and semantics of the
+        formula.
+    """
+    return __save_prefix(attribute, arg, element)
+
 def cnv_ID(attribute, arg, element):
    return str(arg)

@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
 pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')

 def cnv_length(attribute, arg, element):
+    """ A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
+        Units of Measure defined in §5.9.13 of [XSL].
+    """
    global pattern_length
    if not pattern_length.match(arg):
        raise ValueError, "'%s' is not a valid length" % arg
@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):

    if not pattern_namespacedToken.match(arg):
        raise ValueError, "'%s' is not a valid namespaced token" % arg
-    return arg
+    return __save_prefix(attribute, arg, element)

-# Must accept string as argument
-# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
-# Essentially an XML name minus ':'
 def cnv_NCName(attribute, arg, element):
+    """ NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
+        Essentially an XML name minus ':'
+    """
    if type(arg) in types.StringTypes:
        return make_NCName(arg)
    else:
@ -226,6 +258,7 @@ attrconverters = {
 	((ANIMNS,u'name'), None): cnv_string,
 	((ANIMNS,u'sub-item'), None): cnv_string,
 	((ANIMNS,u'value'), None): cnv_string,
+#	((DBNS,u'type'), None): cnv_namespacedToken,
 	((CHARTNS,u'attached-axis'), None): cnv_string,
 	((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
 	((CHARTNS,u'class'), None): cnv_namespacedToken,
@ -288,7 +321,7 @@ attrconverters = {
 	((CHARTNS,u'values-cell-range-address'), None): cnv_string,
 	((CHARTNS,u'vertical'), None): cnv_boolean,
 	((CHARTNS,u'visible'), None): cnv_boolean,
-	((CONFIGNS,u'name'), None): cnv_string,
+	((CONFIGNS,u'name'), None): cnv_formula,
 	((CONFIGNS,u'type'), None): cnv_configtype,
 	((DR3DNS,u'ambient-color'), None): cnv_string,
 	((DR3DNS,u'back-scale'), None): cnv_string,
@ -369,11 +402,11 @@ attrconverters = {
 	((DRAWNS,u'decimal-places'), None): cnv_string,
 	((DRAWNS,u'display'), None): cnv_string,
 	((DRAWNS,u'display-name'), None): cnv_string,
-	((DRAWNS,u'distance'), None): cnv_string,
+	((DRAWNS,u'distance'), None): cnv_lengthorpercent,
 	((DRAWNS,u'dots1'), None): cnv_integer,
-	((DRAWNS,u'dots1-length'), None): cnv_length,
+	((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
 	((DRAWNS,u'dots2'), None): cnv_integer,
-	((DRAWNS,u'dots2-length'), None): cnv_length,
+	((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
 	((DRAWNS,u'end-angle'), None): cnv_double,
 	((DRAWNS,u'end'), None): cnv_string,
 	((DRAWNS,u'end-color'), None): cnv_string,
@ -383,7 +416,7 @@ attrconverters = {
 	((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
 	((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
 	((DRAWNS,u'end-shape'), None): cnv_IDREF,
-	((DRAWNS,u'engine'), None): cnv_string,
+	((DRAWNS,u'engine'), None): cnv_namespacedToken,
 	((DRAWNS,u'enhanced-path'), None): cnv_string,
 	((DRAWNS,u'escape-direction'), None): cnv_string,
 	((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
@ -604,7 +637,7 @@ attrconverters = {
 	((FORMNS,u'button-type'), None): cnv_string,
 	((FORMNS,u'command'), None): cnv_string,
 	((FORMNS,u'command-type'), None): cnv_string,
-	((FORMNS,u'control-implementation'), None): cnv_string,
+	((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
 	((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
 	((FORMNS,u'current-selected'), None): cnv_boolean,
 	((FORMNS,u'current-state'), None): cnv_string,
@ -800,8 +833,8 @@ attrconverters = {
 	((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
 	((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
 	((PRESENTATIONNS,u'visibility'), None): cnv_string,
-	((SCRIPTNS,u'event-name'), None): cnv_string,
-	((SCRIPTNS,u'language'), None): cnv_string,
+	((SCRIPTNS,u'event-name'), None): cnv_formula,
+	((SCRIPTNS,u'language'), None): cnv_formula,
 	((SCRIPTNS,u'macro-name'), None): cnv_string,
 	((SMILNS,u'accelerate'), None): cnv_double,
 	((SMILNS,u'accumulate'), None): cnv_string,
@ -1087,7 +1120,7 @@ attrconverters = {
 	((SVGNS,u'y2'), None): cnv_lengthorpercent,
 	((TABLENS,u'acceptance-state'), None): cnv_string,
 	((TABLENS,u'add-empty-lines'), None): cnv_boolean,
-	((TABLENS,u'algorithm'), None): cnv_string,
+	((TABLENS,u'algorithm'), None): cnv_formula,
 	((TABLENS,u'align'), None): cnv_string,
 	((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
 	((TABLENS,u'application-data'), None): cnv_string,
@ -1106,7 +1139,7 @@ attrconverters = {
 	((TABLENS,u'cell-range'), None): cnv_string,
 	((TABLENS,u'column'), None): cnv_integer,
 	((TABLENS,u'comment'), None): cnv_string,
-	((TABLENS,u'condition'), None): cnv_string,
+	((TABLENS,u'condition'), None): cnv_formula,
 	((TABLENS,u'condition-source'), None): cnv_string,
 	((TABLENS,u'condition-source-range-address'), None): cnv_string,
 	((TABLENS,u'contains-error'), None): cnv_boolean,
@ -1144,13 +1177,13 @@ attrconverters = {
 	((TABLENS,u'end-x'), None): cnv_length,
 	((TABLENS,u'end-y'), None): cnv_length,
 	((TABLENS,u'execute'), None): cnv_boolean,
-	((TABLENS,u'expression'), None): cnv_string,
+	((TABLENS,u'expression'), None): cnv_formula,
 	((TABLENS,u'field-name'), None): cnv_string,
 	((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
 	((TABLENS,u'field-number'), None): cnv_string,
 	((TABLENS,u'filter-name'), None): cnv_string,
 	((TABLENS,u'filter-options'), None): cnv_string,
-	((TABLENS,u'formula'), None): cnv_string,
+	((TABLENS,u'formula'), None): cnv_formula,
 	((TABLENS,u'function'), None): cnv_string,
 	((TABLENS,u'function'), None): cnv_string,
 	((TABLENS,u'grand-total'), None): cnv_string,
@ -1290,7 +1323,7 @@ attrconverters = {
 	((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
 	((TEXTNS,u'comma-separated'), None): cnv_boolean,
 	((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
-	((TEXTNS,u'condition'), None): cnv_string,
+	((TEXTNS,u'condition'), None): cnv_formula,
 	((TEXTNS,u'connection-name'), None): cnv_string,
 	((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
 	((TEXTNS,u'continue-numbering'), None): cnv_boolean,
@ -1321,7 +1354,7 @@ attrconverters = {
 	((TEXTNS,u'first-row-start-column'), None): cnv_string,
 	((TEXTNS,u'fixed'), None): cnv_boolean,
 	((TEXTNS,u'footnotes-position'), None): cnv_string,
-	((TEXTNS,u'formula'), None): cnv_string,
+	((TEXTNS,u'formula'), None): cnv_formula,
 	((TEXTNS,u'global'), None): cnv_boolean,
 	((TEXTNS,u'howpublished'), None): cnv_string,
 	((TEXTNS,u'id'), None): cnv_ID,
@ -1437,7 +1470,10 @@ attrconverters = {

 class AttrConverters:
    def convert(self, attribute, value, element):
-        conversion = attrconverters.get((attribute,element), None)
+        """ Based on the element, figures out how to check/convert the attribute value
+            All values are converted to string
+        """
+        conversion = attrconverters.get((attribute, element.qname), None)
        if conversion is not None:
            return conversion(attribute, value, element)
        else:
--- a/src/odf/element.py
+++ b/src/odf/element.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -112,6 +112,9 @@ class Node(xml.dom.Node):
            return self.childNodes[-1]

    def insertBefore(self, newChild, refChild):
+        """ Inserts the node newChild before the existing child node refChild.
+            If refChild is null, insert newChild at the end of the list of children.
+        """
        if newChild.nodeType not in self._child_node_types:
            raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
        if newChild.parentNode is not None:
@ -135,21 +138,26 @@ class Node(xml.dom.Node):
            newChild.parentNode = self
        return newChild

-    def appendChild(self, node):
-        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
-            for c in tuple(node.childNodes):
+    def appendChild(self, newChild):
+        """ Adds the node newChild to the end of the list of children of this node.
+            If the newChild is already in the tree, it is first removed.
+        """
+        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
+            for c in tuple(newChild.childNodes):
                self.appendChild(c)
            ### The DOM does not clearly specify what to return in this case
-            return node
-        if node.nodeType not in self._child_node_types:
-            raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName)
-        if node.parentNode is not None:
-            node.parentNode.removeChild(node)
-        _append_child(self, node)
-        node.nextSibling = None
-        return node
+            return newChild
+        if newChild.nodeType not in self._child_node_types:
+            raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
+        if newChild.parentNode is not None:
+            newChild.parentNode.removeChild(newChild)
+        _append_child(self, newChild)
+        newChild.nextSibling = None
+        return newChild

    def removeChild(self, oldChild):
+        """ Removes the child node indicated by oldChild from the list of children, and returns it.
+        """
        #FIXME: update ownerDocument.element_dict or find other solution
        try:
            self.childNodes.remove(oldChild)
@ -191,8 +199,8 @@ def _append_child(self, node):
    node.__dict__["parentNode"] = self

 class Childless:
-    """Mixin that makes childless-ness easy to implement and avoids
-    the complexity of the Node methods that deal with children.
+    """ Mixin that makes childless-ness easy to implement and avoids
+        the complexity of the Node methods that deal with children.
    """

    attributes = None
@ -207,6 +215,7 @@ class Childless:
        return None

    def appendChild(self, node):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes cannot have children")

@ -214,14 +223,17 @@ class Childless:
        return False

    def insertBefore(self, newChild, refChild):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes do not have children")

    def removeChild(self, oldChild):
+        """ Raises an error """
        raise xml.dom.NotFoundErr(
            self.tagName + " nodes do not have children")

    def replaceChild(self, newChild, oldChild):
+        """ Raises an error """
        raise xml.dom.HierarchyRequestErr(
            self.tagName + " nodes do not have children")

@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
    nodeType = Node.CDATA_SECTION_NODE

    def toXml(self,level,f):
+        """ Generate XML output of the node. If the text contains "]]>", then
+            escape it by going out of CDATA mode (]]>), then write the string
+            and then go into CDATA mode again. (<![CDATA[)
+        """
        if self.data:
-            f.write('<![CDATA[%s]]>' % self.data)
+            f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))

 class Element(Node):
    """ Creates a arbitrary element and is intended to be subclassed not used on its own.
@ -310,7 +326,19 @@ class Element(Node):
                if self.getAttrNS(r[0],r[1]) is None:
                    raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)

+    def get_knownns(self, prefix):
+        """ Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
+            we need to know which namespace it resolves to.
+        """
+        global nsdict
+        for ns,p in nsdict.items():
+            if p == prefix: return ns
+        return None
+        
    def get_nsprefix(self, namespace):
+        """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
+            and needs to look up or assign the prefix for it.
+        """
        if namespace is None: namespace = ""
        prefix = _nsassign(namespace)
        if not self.namespaces.has_key(namespace):
@ -339,6 +367,9 @@ class Element(Node):
            self.ownerDocument.rebuild_caches(element)

    def addText(self, text, check_grammar=True):
+        """ Adds text to an element
+            Setting check_grammar=False turns off grammar checking
+        """
        if check_grammar and self.qname not in grammar.allows_text:
            raise IllegalText, "The <%s> element does not allow text" % self.tagName
        else:
@ -346,6 +377,9 @@ class Element(Node):
                self.appendChild(Text(text))

    def addCDATA(self, cdata, check_grammar=True):
+        """ Adds CDATA to an element
+            Setting check_grammar=False turns off grammar checking
+        """
        if check_grammar and self.qname not in grammar.allows_text:
            raise IllegalText, "The <%s> element does not allow text" % self.tagName
        else:
@ -403,17 +437,18 @@ class Element(Node):
 #       if allowed_attrs and (namespace, localpart) not in allowed_attrs:
 #           raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
        c = AttrConverters()
-        self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
+        self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)

    def getAttrNS(self, namespace, localpart):
        prefix = self.get_nsprefix(namespace)
-        return self.attributes.get(prefix + ":" + localpart)
+        return self.attributes.get((namespace, localpart))

    def removeAttrNS(self, namespace, localpart):
-        prefix = self.get_nsprefix(namespace)
-        del self.attributes[prefix + ":" + localpart]
+        del self.attributes[(namespace, localpart)]

    def getAttribute(self, attr):
+        """ Get an attribute value. The method knows which namespace the attribute is in
+        """
        allowed_attrs = self.allowed_attributes()
        if allowed_attrs is None:
            if type(attr) == type(()):
@ -432,8 +467,9 @@ class Element(Node):
        if level == 0:
            for namespace, prefix in self.namespaces.items():
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
        f.write('>')

    def write_close_tag(self, level, f):
@ -445,8 +481,9 @@ class Element(Node):
        if level == 0:
            for namespace, prefix in self.namespaces.items():
                f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
        if self.childNodes:
            f.write('>')
            for element in self.childNodes:
@ -464,6 +501,7 @@ class Element(Node):
        return accumulator

    def getElementsByType(self, element):
+        """ Gets elements based on the type, which is function from text.py, draw.py etc. """
        obj = element(check_grammar=False)
        return self._getElementsByObj(obj,[])

--- a/src/odf/grammar.py
+++ b/src/odf/grammar.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
--- a/src/odf/load.py
+++ b/src/odf/load.py
@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler):

        self.level = self.level + 1
        # Add any accumulated text content
-        content = ''.join(self.data).strip()
-        if len(content) > 0:
+        content = ''.join(self.data)
+        if len(content.strip()) > 0:
            self.parent.addText(content, check_grammar=False)
            self.data = []
        # Create the element
--- a/src/odf/namespaces.py
+++ b/src/odf/namespaces.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
 #
 # Contributor(s):
 #
-TOOLSVERSION = u"ODFPY/0.9.2dev"
+TOOLSVERSION = u"ODFPY/0.9.4dev"

 ANIMNS         = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
 DBNS           = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -28,19 +28,23 @@ DCNS           = u"http://purl.org/dc/elements/1.1/"
 DOMNS          = u"http://www.w3.org/2001/xml-events"
 DR3DNS         = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
 DRAWNS         = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+FIELDNS        = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
 FONS           = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
 FORMNS         = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+GRDDLNS        = u"http://www.w3.org/2003/g/data-view#"
 KOFFICENS      = u"http://www.koffice.org/2005/"
 MANIFESTNS     = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
 MATHNS         = u"http://www.w3.org/1998/Math/MathML"
 METANS         = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
 NUMBERNS       = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
 OFFICENS       = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+OFNS           = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
 OOONS          = u"http://openoffice.org/2004/office"
 OOOWNS         = u"http://openoffice.org/2004/writer"
 OOOCNS         = u"http://openoffice.org/2004/calc"
 PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
 RDFANS         = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
+RPTNS          = u"http://openoffice.org/2005/report"
 SCRIPTNS       = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
 SMILNS         = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
 STYLENS        = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@ -50,7 +54,8 @@ TEXTNS         = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
 XFORMSNS       = u"http://www.w3.org/2002/xforms"
 XLINKNS        = u"http://www.w3.org/1999/xlink"
 XMLNS          = u"http://www.w3.org/XML/1998/namespace"
-
+XSDNS          = u"http://www.w3.org/2001/XMLSchema"
+XSINS          = u"http://www.w3.org/2001/XMLSchema-instance"

 nsdict = {
   ANIMNS: u'anim',
@ -61,19 +66,23 @@ nsdict = {
   DOMNS: u'dom',
   DR3DNS: u'dr3d',
   DRAWNS: u'draw',
+   FIELDNS: u'field',
   FONS: u'fo',
   FORMNS: u'form',
+   GRDDLNS: u'grddl',
   KOFFICENS: u'koffice',
   MANIFESTNS: u'manifest',
   MATHNS: u'math',
   METANS: u'meta',
   NUMBERNS: u'number',
   OFFICENS: u'office',
+   OFNS: u'of',
   OOONS: u'ooo',
   OOOWNS: u'ooow',
   OOOCNS: u'oooc',
   PRESENTATIONNS: u'presentation',
   RDFANS: u'rdfa',
+   RPTNS:  u'rpt',
   SCRIPTNS: u'script',
   SMILNS: u'smil',
   STYLENS: u'style',
@ -83,4 +92,6 @@ nsdict = {
   XFORMSNS: u'xforms',
   XLINKNS: u'xlink',
   XMLNS: u'xml',
+   XSDNS: u'xsd',
+   XSINS: u'xsi',
 }
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -20,15 +20,18 @@
 #
 #import pdb
 #pdb.set_trace()
-import zipfile
-from xml.sax import handler, expatreader
-from xml.sax.xmlreader import InputSource
+from xml.sax import handler
 from xml.sax.saxutils import escape, quoteattr
-from cStringIO import StringIO
+from xml.dom import Node

-from namespaces import DCNS, DRAWNS, FONS, \
-  METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
-  STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+from opendocument import load
+
+from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
+  FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
+  SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+
+if False: # Added by Kovid
+    DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS

 # Handling of styles
 #
@ -72,8 +75,8 @@ class StyleToCSS:
            (FONS,u"border-left"): self.c_fo,
            (FONS,u"border-right"): self.c_fo,
            (FONS,u"border-top"): self.c_fo,
-            (FONS,u"break-after"): self.c_break,
-            (FONS,u"break-before"): self.c_break,
+            (FONS,u"break-after"): self.c_break, # Added by Kovid
+            (FONS,u"break-before"): self.c_break,# Added by Kovid
            (FONS,u"color"): self.c_fo,
            (FONS,u"font-family"): self.c_fo,
            (FONS,u"font-size"): self.c_fo,
@ -136,7 +139,7 @@ class StyleToCSS:
        selector = rule[1]
        sdict[selector] = val

-    def c_break(self, ruleset, sdict, rule, val):
+    def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
        property = 'page-' + rule[1]
        values = {'auto': 'auto', 'column': 'always', 'page': 'always',
                  'even-page': 'left', 'odd-page': 'right',
@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler):
        self.elements = {
        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
-        (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
+        (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
+        (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
        (DRAWNS, 'image'): (self.s_draw_image, None),
        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
+        (DRAWNS, 'object'): (self.s_draw_object, None),
+        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler):
        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
+        (OFFICENS, "annotation"):(self.s_ignorexml, None),
        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+        (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
        (OFFICENS, "forms"):(self.s_ignorexml, None),
        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler):
        (OFFICENS, "styles"):(self.s_office_styles, None),
        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
        (OFFICENS, "scripts"):(self.s_ignorexml, None),
+        (OFFICENS, "settings"):(self.s_ignorexml, None),
        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
 #       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler):
 #       (STYLENS, "header-style"):(self.s_style_header_style, None),
        (STYLENS, "master-page"):(self.s_style_master_page, None),
        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
-#       (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
-        (STYLENS, "page-layout"):(self.s_ignorexml, None),
+        (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
+#       (STYLENS, "page-layout"):(self.s_ignorexml, None),
        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler):
        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
+        (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
+        (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
+        (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler):
        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
        }
        if embedable:
-            self.elements[(OFFICENS, u"text")] = (None,None)
-            self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
-            self.elements[(OFFICENS, u"presentation")] = (None,None)
-            self.elements[(OFFICENS, u"document-content")] = (None,None)
+            self.make_embedable()
+        self._resetobject()
+
+    def set_plain(self):
+        """ Tell the parser to not generate CSS """
+        self.generate_css = False
+
+    def set_embedable(self):
+        """ Tells the converter to only output the parts inside the <body>"""
+        self.elements[(OFFICENS, u"text")] = (None,None)
+        self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
+        self.elements[(OFFICENS, u"presentation")] = (None,None)
+        self.elements[(OFFICENS, u"document-content")] = (None,None)
+
+
+    def add_style_file(self, stylefilename, media=None):
+        """ Add a link to an external style file.
+            Also turns of the embedding of styles in the HTML
+        """
+        self.use_internal_css = False
+        self.stylefilename = stylefilename
+        if media:
+            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
+        else:
+            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
+
+    def _resetfootnotes(self):
+        # Footnotes and endnotes
+        self.notedict = {}
+        self.currentnote = 0
+        self.notebody = ''
+
+    def _resetobject(self):
+        self.lines = []
+        self._wfunc = self._wlines
+        self.xmlfile = ''
+        self.title = ''
+        self.language = ''
+        self.creator = ''
+        self.data = []
+        self.tagstack = TagStack()
+        self.htmlstack = []
+        self.pstack = []
+        self.processelem = True
+        self.processcont = True
+        self.listtypes = {}
+        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+        self.use_internal_css = True
+        self.cs = StyleToCSS()
+        self.anchors = {}
+
+        # Style declarations
+        self.stylestack = []
+        self.styledict = {}
+        self.currentstyle = None
+
+        self._resetfootnotes()
+
+        # Tags from meta.xml
+        self.metatags = []


    def writeout(self, s):
@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler):

    def opentag(self, tag, attrs={}, block=False):
        """ Create an open HTML tag """
+        self.htmlstack.append((tag,attrs,block))
        a = []
        for key,val in attrs.items():
            a.append('''%s=%s''' % (key, quoteattr(val)))
@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler):
            self.writeout("\n")

    def closetag(self, tag, block=True):
+        """ Close an open HTML tag """
+        self.htmlstack.pop()
        self.writeout("</%s>" % tag)
        if block == True:
            self.writeout("\n")
@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler):
            a.append('''%s=%s''' % (key, quoteattr(val)))
        self.writeout("<%s %s/>\n" % (tag, " ".join(a)))

+#--------------------------------------------------
+# Interface to parser
 #--------------------------------------------------
    def characters(self, data):
        if self.processelem and self.processcont:
            self.data.append(data)

-    def handle_starttag(self, tag, method, attrs):
-        method(tag,attrs)
-
-    def handle_endtag(self, tag, attrs, method):
-        method(tag, attrs)
-
    def startElementNS(self, tag, qname, attrs):
        self.pstack.append( (self.processelem, self.processcont) )
        if self.processelem:
@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler):
                self.unknown_endtag(tag, attrs)
        self.processelem, self.processcont = self.pstack.pop()

+#--------------------------------------------------
+    def handle_starttag(self, tag, method, attrs):
+        method(tag,attrs)
+
+    def handle_endtag(self, tag, attrs, method):
+        method(tag, attrs)
+
    def unknown_starttag(self, tag, attrs):
        pass

@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler):
        self.processelem = False

    def s_ignorecont(self, tag, attrs):
+        """ Stop processing the text nodes """
        self.processcont = False

    def s_processcont(self, tag, attrs):
+        """ Start processing the text nodes """
        self.processcont = True

    def classname(self, attrs):
        """ Generate a class name from a style name """
-        c = attrs[(TEXTNS,'style-name')]
+        c = attrs.get((TEXTNS,'style-name'),'')
        c = c.replace(".","_")
        return c

    def get_anchor(self, name):
+        """ Create a unique anchor id for a href name """
        if not self.anchors.has_key(name):
            # Changed by Kovid
            self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler):
    def e_dc_title(self, tag, attrs):
        """ Get the title from the meta data and create a HTML <title>
        """
-        self.metatags.append('<title>%s</title>\n' % escape(''.join(self.data)))
        self.title = ''.join(self.data)
+        #self.metatags.append('<title>%s</title>\n' % escape(self.title))
        self.data = []

    def e_dc_metatag(self, tag, attrs):
@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler):
    def e_dc_contentlanguage(self, tag, attrs):
        """ Set the content language. Identifies the targeted audience
        """
-        self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % ''.join(self.data))
+        self.language = ''.join(self.data)
+        self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
        self.data = []

+    def e_dc_creator(self, tag, attrs):
+        """ Set the content creator. Identifies the targeted audience
+        """
+        self.creator = ''.join(self.data)
+        self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
+        self.data = []
+
+    def s_custom_shape(self, tag, attrs):
+        """ A <draw:custom-shape> is made into a <div> in HTML which is then styled
+        """
+        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
+        htmltag = 'div'
+        name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
+        if name == 'G-':
+            name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
+        name = name.replace(".","_")
+        if anchor_type == "paragraph":
+            style = 'position:absolute;'
+        elif anchor_type == 'char':
+            style = "position:absolute;"
+        elif anchor_type == 'as-char':
+            htmltag = 'div'
+            style = ''
+        else:
+            style = "position: absolute;"
+        if attrs.has_key( (SVGNS,"width") ):
+            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
+        if attrs.has_key( (SVGNS,"height") ):
+            style = style + "height:" +  attrs[(SVGNS,"height")] + ";"
+        if attrs.has_key( (SVGNS,"x") ):
+            style = style + "left:" +  attrs[(SVGNS,"x")] + ";"
+        if attrs.has_key( (SVGNS,"y") ):
+            style = style + "top:" +  attrs[(SVGNS,"y")] + ";"
+        if self.generate_css:
+            self.opentag(htmltag, {'class': name, 'style': style})
+        else:
+            self.opentag(htmltag)
+
+    def e_custom_shape(self, tag, attrs):
+        """ End the <draw:frame>
+        """
+        self.closetag('div')
+
    def s_draw_frame(self, tag, attrs):
        """ A <draw:frame> is made into a <div> in HTML which is then styled
        """
-        anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
+        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
        htmltag = 'div'
        name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
        if name == 'G-':
@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler):
            htmltag = 'div'
            style = ''
        else:
-            style = "position: absolute;"
+            style = "position:absolute;"
        if attrs.has_key( (SVGNS,"width") ):
            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
        if attrs.has_key( (SVGNS,"height") ):
@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler):
                htmlattrs['style'] = "display: block;"
        self.emptytag('img', htmlattrs)

+    def s_draw_object(self, tag, attrs):
+        """ A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
+        """
+        return # Added by Kovid
+        objhref = attrs[(XLINKNS,"href")]
+        # Remove leading "./": from "./Object 1" to "Object 1"
+#       objhref = objhref [2:]
+
+        # Not using os.path.join since it fails to find the file on Windows.
+#       objcontentpath = '/'.join([objhref, 'content.xml'])
+
+        for c in self.document.childnodes:
+            if c.folder == objhref:
+                self._walknode(c.topnode)
+
+    def s_draw_object_ole(self, tag, attrs):
+        """ A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
+        """
+        class_id = attrs[(DRAWNS,"class-id")]
+        if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
+            tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
+            self.opentag('a', tagattrs)
+            self.closetag('a', tagattrs)
+
    def s_draw_page(self, tag, attrs):
        """ A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
            Therefore if you convert a ODP file, you get a series of <fieldset>s.
@ -655,13 +801,9 @@ class ODF2XHTML(handler.ContentHandler):

    def html_body(self, tag, attrs):
        self.writedata()
-        if self.generate_css:
+        if self.generate_css and self.use_internal_css:
            self.opentag('style', {'type':"text/css"}, True)
            self.writeout('/*<![CDATA[*/\n')
-            self.writeout('\nimg { width: 100%; height: 100%; }\n')
-            self.writeout('* { padding: 0; margin: 0;  background-color:white; }\n')
-            self.writeout('body { margin: 0 1em; }\n')
-            self.writeout('ol, ul { padding-left: 2em; }\n')
            self.generate_stylesheet()
            self.writeout('/*]]>*/\n')
            self.closetag('style')
@ -669,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler):
        self.closetag('head')
        self.opentag('body', block=True)

+    # background-color: white removed by Kovid for #9118
+    # Specifying an explicit bg color prevents ebook readers
+    # from successfully inverting colors
+    default_styles = """
+img { width: 100%; height: 100%; }
+* { padding: 0; margin: 0; }
+body { margin: 0 1em; }
+ol, ul { padding-left: 2em; }
+"""
+
    def generate_stylesheet(self):
        for name in self.stylestack:
            styles = self.styledict.get(name)
@ -688,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler):
                styles = parentstyle
            self.styledict[name] = styles
        # Write the styles to HTML
+        self.writeout(self.default_styles)
        for name in self.stylestack:
            styles = self.styledict.get(name)
            css2 = self.cs.convert_styles(styles)
@ -729,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler):
        self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
        for metaline in self.metatags:
            self.writeout(metaline)
+        self.writeout('<title>%s</title>\n' % escape(self.title))

    def e_office_document_content(self, tag, attrs):
        """ Last tag """
@ -773,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler):
        """ Copy all attributes to a struct.
            We will later convert them to CSS2
        """
-        if self.currentstyle is None:
+        if self.currentstyle is None: # Added by Kovid
            return
        for key,attr in attrs.items():
            self.styledict[self.currentstyle][key] = attr
@ -799,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler):
    def s_style_font_face(self, tag, attrs):
        """ It is possible that the HTML browser doesn't know how to
            show a particular font. Luckily ODF provides generic fallbacks
-            Unluckily they are not the same as CSS2.
+            Unfortunately they are not the same as CSS2.
            CSS2: serif, sans-serif, cursive, fantasy, monospace
            ODF: roman, swiss, modern, decorative, script, system
        """
@ -850,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler):
        """
        name = attrs[(STYLENS,'name')]
        name = name.replace(".","_")
-        self.currentstyle = "@page " + name
+        self.currentstyle = ".PL-" + name
        self.stylestack.append(self.currentstyle)
        self.styledict[self.currentstyle] = {}

@ -881,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler):
        self.s_ignorexml(tag, attrs)

    # Short prefixes for class selectors
-    familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
+    _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
        'text':'S', 'section':'D',
         'table':'T', 'table-cell':'TD', 'table-column':'TC',
         'table-row':'TR', 'graphic':'G' }
@ -897,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler):
        name = name.replace(".","_")
        family = attrs[(STYLENS,'family')]
        htmlfamily = self.familymap.get(family,'unknown')
-        sfamily = self.familyshort.get(family,'X')
+        sfamily = self._familyshort.get(family,'X')
        name = "%s%s-%s" % (self.autoprefix, sfamily, name)
        parent = attrs.get( (STYLENS,'parent-style-name') )
        self.currentstyle = special_styles.get(name,"."+name)
@ -942,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def s_table_table_cell(self, tag, attrs):
+        """ Start a table cell """
        #FIXME: number-columns-repeated § 8.1.3
        #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
        htmlattrs = {}
@ -959,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def e_table_table_cell(self, tag, attrs):
+        """ End a table cell """
        self.writedata()
        self.closetag('td')
        self.purgedata()

    def s_table_table_column(self, tag, attrs):
+        """ Start a table column """
        c = attrs.get( (TABLENS,'style-name'), None)
        repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
        htmlattrs = {}
@ -974,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def s_table_table_row(self, tag, attrs):
+        """ Start a table row """
        #FIXME: table:number-rows-repeated
        c = attrs.get( (TABLENS,'style-name'), None)
        htmlattrs = {}
@ -983,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def e_table_table_row(self, tag, attrs):
+        """ End a table row """
        self.writedata()
        self.closetag('tr')
        self.purgedata()
@ -997,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def e_text_a(self, tag, attrs):
+        """ End an anchor or bookmark reference """
        self.writedata()
        self.closetag('a', False)
        self.purgedata()

+    def s_text_bookmark(self, tag, attrs):
+        """ Bookmark definition """
+        name = attrs[(TEXTNS,'name')]
+        html_id = self.get_anchor(name)
+        self.writedata()
+        self.opentag('span', {'id':html_id})
+        self.closetag('span', False)
+        self.purgedata()
+
+    def s_text_bookmark_ref(self, tag, attrs):
+        """ Bookmark reference """
+        name = attrs[(TEXTNS,'ref-name')]
+        html_id = "#" + self.get_anchor(name)
+        self.writedata()
+        self.opentag('a', {'href':html_id})
+        self.purgedata()
+
    def s_text_h(self, tag, attrs):
        """ Headings start """
        level = int(attrs[(TEXTNS,'outline-level')])
@ -1018,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def e_text_h(self, tag, attrs):
-        """ Headings end """
+        """ Headings end
+            Side-effect: If there is no title in the metadata, then it is taken
+            from the first heading of any level.
+        """
        self.writedata()
        level = int(attrs[(TEXTNS,'outline-level')])
        if level > 6: level = 6 # Heading levels go only to 6 in XHTML
        if level < 1: level = 1
        lev = self.headinglevels[1:level+1]
        outline = '.'.join(map(str,lev) )
+        heading = ''.join(self.data)
+        if self.title == '': self.title = heading
+        # Changed by Kovid
        tail = ''.join(self.data)
        anchor = self.get_anchor("%s.%s" % ( outline, tail))
        anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
@ -1036,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler):
        self.purgedata()

    def s_text_line_break(self, tag, attrs):
+        """ Force a line break (<br/>) """
        self.writedata()
        self.emptytag('br')
        self.purgedata()

    def s_text_list(self, tag, attrs):
-        """ To know which level we're at, we have to count the number
+        """ Start a list (<ul> or <ol>)
+            To know which level we're at, we have to count the number
            of <text:list> elements on the tagstack.
        """
        name = attrs.get( (TEXTNS,'style-name') )
@ -1055,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler):
            name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
        list_class = "%s_%d" % (name, level)
        if self.generate_css:
-            self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class })
+            self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
        else:
-            self.opentag('%s' % self.listtypes.get(list_class,'UL'))
+            self.opentag('%s' % self.listtypes.get(list_class,'ul'))
        self.purgedata()

    def e_text_list(self, tag, attrs):
+        """ End a list """
        self.writedata()
        name = attrs.get( (TEXTNS,'style-name') )
        level = self.tagstack.count_tags(tag) + 1
@ -1072,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler):
            # textbox itself may be nested within another list.
            name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
        list_class = "%s_%d" % (name, level)
-        self.closetag(self.listtypes.get(list_class,'UL'))
+        self.closetag(self.listtypes.get(list_class,'ul'))
        self.purgedata()

    def s_text_list_item(self, tag, attrs):
+        """ Start list item """
        self.opentag('li')
        self.purgedata()

    def e_text_list_item(self, tag, attrs):
+        """ End list item """
        self.writedata()
        self.closetag('li')
        self.purgedata()
@ -1191,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler):
            if specialtag is None:
                specialtag = 'p'
        self.writedata()
-        if not self.data:
+        if not self.data: # Added by Kovid
            # Give substance to empty paragraphs, as rendered by OOo
            self.writeout('&#160;')
        self.closetag(specialtag)
@ -1254,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler):
 #-----------------------------------------------------------------------------

    def load(self, odffile):
-        self._odffile = odffile
+        """ Loads a document into the parser and parses it.
+            The argument can either be a filename or a document in memory.
+        """
+        self.lines = []
+        self._wfunc = self._wlines
+        if isinstance(odffile, basestring) \
+                or hasattr(odffile, 'read'): # Added by Kovid
+            self.document = load(odffile)
+        else:
+            self.document = odffile
+        self._walknode(self.document.topnode)

-    def parseodf(self):
-        self.xmlfile = ''
-        self.title = ''
-        self.data = []
-        self.tagstack = TagStack()
-        self.pstack = []
-        self.processelem = True
-        self.processcont = True
-        self.listtypes = {}
-        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
-        self.cs = StyleToCSS()
-        self.anchors = {}
+    def _walknode(self, node):
+        if node.nodeType == Node.ELEMENT_NODE:
+            self.startElementNS(node.qname, node.tagName, node.attributes)
+            for c in node.childNodes:
+                self._walknode(c)
+            self.endElementNS(node.qname, node.tagName)
+        if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
+            self.characters(unicode(node))

-        # Style declarations
-        self.stylestack = []
-        self.styledict = {}
-        self.currentstyle = None
-
-        # Footnotes and endnotes
-        self.notedict = {}
-        self.currentnote = 0
-        self.notebody = ''
-
-        # Tags from meta.xml
-        self.metatags = []
-
-        # Extract the interesting files
-        z = zipfile.ZipFile(self._odffile)
-
-        # For some reason Trac has trouble when xml.sax.make_parser() is used.
-        # Could it be because PyXML is installed, and therefore a different parser
-        # might be chosen? By calling expatreader directly we avoid this issue
-        parser = expatreader.create_parser()
-        parser.setFeature(handler.feature_namespaces, 1)
-        parser.setContentHandler(self)
-        parser.setErrorHandler(handler.ErrorHandler())
-        inpsrc = InputSource()
-
-        for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
-            self.xmlfile = xmlfile
-            content = z.read(xmlfile)
-            inpsrc.setByteStream(StringIO(content))
-            parser.parse(inpsrc)
-        z.close()

    def odf2xhtml(self, odffile):
-        """ Load a file and return XHTML
+        """ Load a file and return the XHTML
        """
        self.load(odffile)
        return self.xhtml()
@ -1311,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler):
        if s != '': self.lines.append(s)

    def xhtml(self):
-        self.lines = []
-        self._wfunc = self._wlines
-        self.parseodf()
+        """ Returns the xhtml
+        """
        return ''.join(self.lines)

    def _writecss(self, s):
@ -1323,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler):
        pass

    def css(self):
-        self._wfunc = self._writenothing
-        self.parseodf()
+        """ Returns the CSS content """
        self._csslines = []
        self._wfunc = self._writecss
        self.generate_stylesheet()
        res = ''.join(self._csslines)
+        self._wfunc = self._wlines
        del self._csslines
        return res
+
+    def save(self, outputfile, addsuffix=False):
+        """ Save the HTML under the filename.
+            If the filename is '-' then save to stdout
+            We have the last style filename in self.stylefilename
+        """
+        if outputfile == '-':
+            import sys # Added by Kovid
+            outputfp = sys.stdout
+        else:
+            if addsuffix:
+                outputfile = outputfile + ".html"
+            outputfp = file(outputfile, "w")
+        outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
+        outputfp.close()
+
+
+class ODF2XHTMLembedded(ODF2XHTML):
+    """ The ODF2XHTML parses an ODF file and produces XHTML"""
+
+    def __init__(self, lines, generate_css=True, embedable=False):
+        self._resetobject()
+        self.lines = lines
+
+        # Tags
+        self.generate_css = generate_css
+        self.elements = {
+#        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
+#        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
+#        (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
+#        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
+#        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
+        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
+        (DRAWNS, 'image'): (self.s_draw_image, None),
+        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
+        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
+        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
+        (DRAWNS, 'object'): (self.s_draw_object, None),
+        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
+        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
+#        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
+#        (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
+        (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
+        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
+#        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+#        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
+        (OFFICENS, "forms"):(self.s_ignorexml, None),
+#        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
+        (OFFICENS, "meta"):(self.s_ignorecont, None),
+#        (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
+#        (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
+#        (OFFICENS, "styles"):(self.s_office_styles, None),
+#        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
+        (OFFICENS, "scripts"):(self.s_ignorexml, None),
+        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
+##       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
+#        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
+#        (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
+#        (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "font-face"):(self.s_style_font_face, None),
+##       (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
+##       (STYLENS, "footer-style"):(self.s_style_footer_style, None),
+#        (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "handout-master"):(self.s_ignorexml, None),
+##       (STYLENS, "header"):(self.s_style_header, self.e_style_header),
+##       (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
+##       (STYLENS, "header-style"):(self.s_style_header_style, None),
+#        (STYLENS, "master-page"):(self.s_style_master_page, None),
+#        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
+##       (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
+#        (STYLENS, "page-layout"):(self.s_ignorexml, None),
+#        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
+#        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
+#        (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
+        (SVGNS, 'desc'): (self.s_ignorexml, None),
+        (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
+        (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
+        (TABLENS, 'table-column'): (self.s_table_table_column, None),
+        (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
+        (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
+        (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
+        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
+        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
+        (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
+        (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
+        (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
+        (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
+        (TEXTNS, "list-style"):(None, None),
+        (TEXTNS, "note"):(self.s_text_note, None),
+        (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
+        (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
+        (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
+        (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
+        (TEXTNS, 's'): (self.s_text_s, None),
+        (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
+        (TEXTNS, 'tab'): (self.s_text_tab, None),
+        (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
+        (TEXTNS, "page-number"):(None, None),
+        }
+
--- a/src/odf/opendocument.py
+++ b/src/odf/opendocument.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@ -41,7 +41,7 @@ IS_IMAGE = 1
 # We need at least Python 2.2
 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2

-sys.setrecursionlimit=50
+#sys.setrecursionlimit(100)
 #The recursion limit is set conservative so mistakes like
 # s=content() s.addElement(s) won't eat up too much processor time.

@ -128,12 +128,12 @@ class OpenDocument:
            self.element_dict[element.qname] = []
        self.element_dict[element.qname].append(element)
        if element.qname == (STYLENS, u'style'):
-            self._register_stylename(element) # Add to style dictionary
+            self.__register_stylename(element) # Add to style dictionary
        styleref = element.getAttrNS(TEXTNS,u'style-name')
        if styleref is not None and self._styles_ooo_fix.has_key(styleref):
            element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])

-    def _register_stylename(self, element):
+    def __register_stylename(self, element):
        ''' Register a style. But there are three style dictionaries:
            office:styles, office:automatic-styles and office:master-styles
            Chapter 14
@ -165,7 +165,7 @@ class OpenDocument:
        """ Generates the full document as an XML file
            Always written as a bytestream in UTF-8 encoding
        """
-        self._replaceGenerator()
+        self.__replaceGenerator()
        xml=StringIO()
        xml.write(_XMLPROLOGUE)
        self.topnode.toXml(0, xml)
@ -197,8 +197,10 @@ class OpenDocument:
        x.write_close_tag(0, xml)
        return xml.getvalue()

-    def manifestxml(self):
-        """ Generates the manifest.xml file """
+    def __manifestxml(self):
+        """ Generates the manifest.xml file
+            The self.manifest isn't avaible unless the document is being saved
+        """
        xml=StringIO()
        xml.write(_XMLPROLOGUE)
        self.manifest.toXml(0,xml)
@ -206,7 +208,7 @@ class OpenDocument:

    def metaxml(self):
        """ Generates the meta.xml file """
-        self._replaceGenerator()
+        self.__replaceGenerator()
        x = DocumentMeta()
        x.addElement(self.meta)
        xml=StringIO()
@ -344,7 +346,7 @@ class OpenDocument:
            self.thumbnail = filecontent

    def addObject(self, document, objectname=None):
-        """ Add an object. The object must be an OpenDocument class
+        """ Adds an object (subdocument). The object must be an OpenDocument class
            The return value will be the folder in the zipfile the object is stored in
        """
        self.childobjects.append(document)
@ -367,15 +369,16 @@ class OpenDocument:
                zi.compress_type = zipfile.ZIP_STORED
                zi.external_attr = UNIXPERMS
                self._z.writestr(zi, fileobj)
-        if hasPictures:
-            self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype=""))
+        # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
+#       if hasPictures:
+#           self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
        # Look in subobjects
        subobjectnum = 1
        for subobject in object.childobjects:
            self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum))
            subobjectnum += 1

-    def _replaceGenerator(self):
+    def __replaceGenerator(self):
        """ Section 3.1.1: The application MUST NOT export the original identifier
            belonging to the application that created the document.
        """
@ -385,22 +388,29 @@ class OpenDocument:
        self.meta.addElement(meta.Generator(text=TOOLSVERSION))

    def save(self, outputfile, addsuffix=False):
-        """ Save the document under the filename """
+        """ Save the document under the filename.
+            If the filename is '-' then save to stdout
+        """
        if outputfile == '-':
            outputfp = zipfile.ZipFile(sys.stdout,"w")
        else:
            if addsuffix:
                outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx')
            outputfp = zipfile.ZipFile(outputfile, "w")
-        self._zipwrite(outputfp)
+        self.__zipwrite(outputfp)
        outputfp.close()

    def write(self, outputfp):
+        """ User API to write the ODF file to an open file descriptor
+            Writes the ZIP format
+        """
        zipoutputfp = zipfile.ZipFile(outputfp,"w")
-        self._zipwrite(zipoutputfp)
+        self.__zipwrite(zipoutputfp)

-    def _zipwrite(self, outputfp):
-        """ Write the document to an open file pointer """
+    def __zipwrite(self, outputfp):
+        """ Write the document to an open file pointer
+            This is where the real work is done
+        """
        self._z = outputfp
        self._now = time.localtime()[:6]
        self.manifest = manifest.Manifest()
@ -438,7 +448,7 @@ class OpenDocument:
        zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now)
        zi.compress_type = zipfile.ZIP_DEFLATED
        zi.external_attr = UNIXPERMS
-        self._z.writestr(zi, self.manifestxml() )
+        self._z.writestr(zi, self.__manifestxml() )
        del self._z
        del self._now
        del self.manifest
@ -464,8 +474,8 @@ class OpenDocument:
        self._z.writestr(zi, object.contentxml() )

        # Write settings
-        if self == object and self.settings.hasChildNodes():
-            self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml"))
+        if object.settings.hasChildNodes():
+            self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml"))
            zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now)
            zi.compress_type = zipfile.ZIP_DEFLATED
            zi.external_attr = UNIXPERMS
@ -473,7 +483,7 @@ class OpenDocument:

        # Write meta
        if self == object:
-            self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml"))
+            self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml"))
            zi = zipfile.ZipInfo("meta.xml", self._now)
            zi.compress_type = zipfile.ZIP_DEFLATED
            zi.external_attr = UNIXPERMS
@ -497,6 +507,7 @@ class OpenDocument:
        return element.Text(data)

    def createCDATASection(self, data):
+        """ Method to create a CDATA section """
        return element.CDATASection(cdata)

    def getMediaType(self):
@ -504,12 +515,14 @@ class OpenDocument:
        return self.mimetype

    def getStyleByName(self, name):
+        """ Finds a style object based on the name """
        ncname = make_NCName(name)
        if self._styles_dict == {}:
            self.rebuild_caches()
        return self._styles_dict.get(ncname, None)

    def getElementsByType(self, element):
+        """ Gets elements based on the type, which is function from text.py, draw.py etc. """
        obj = element(check_grammar=False)
        if self.element_dict == {}:
            self.rebuild_caches()
@ -517,53 +530,59 @@ class OpenDocument:

 # Convenience functions
 def OpenDocumentChart():
+    """ Creates a chart document """
    doc = OpenDocument('application/vnd.oasis.opendocument.chart')
    doc.chart = Chart()
    doc.body.addElement(doc.chart)
    return doc

 def OpenDocumentDrawing():
+    """ Creates a drawing document """
    doc = OpenDocument('application/vnd.oasis.opendocument.graphics')
    doc.drawing = Drawing()
    doc.body.addElement(doc.drawing)
    return doc

 def OpenDocumentImage():
+    """ Creates an image document """
    doc = OpenDocument('application/vnd.oasis.opendocument.image')
    doc.image = Image()
    doc.body.addElement(doc.image)
    return doc

 def OpenDocumentPresentation():
+    """ Creates a presentation document """
    doc = OpenDocument('application/vnd.oasis.opendocument.presentation')
    doc.presentation = Presentation()
    doc.body.addElement(doc.presentation)
    return doc

 def OpenDocumentSpreadsheet():
+    """ Creates a spreadsheet document """
    doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet')
    doc.spreadsheet = Spreadsheet()
    doc.body.addElement(doc.spreadsheet)
    return doc

 def OpenDocumentText():
+    """ Creates a text document """
    doc = OpenDocument('application/vnd.oasis.opendocument.text')
    doc.text = Text()
    doc.body.addElement(doc.text)
    return doc

+def OpenDocumentTextMaster():
+    """ Creates a text master document """
+    doc = OpenDocument('application/vnd.oasis.opendocument.text-master')
+    doc.text = Text()
+    doc.body.addElement(doc.text)
+    return doc

-def load(odffile):
+def __loadxmlparts(z, manifest, doc, objectpath):
    from load import LoadParser
    from xml.sax import make_parser, handler
-    z = zipfile.ZipFile(odffile)
-    mimetype = z.read('mimetype')
-    doc = OpenDocument(mimetype, add_generator=False)

-    # Look in the manifest file to see if which of the four files there are
-    manifestpart = z.read('META-INF/manifest.xml')
-    manifest =  manifestlist(manifestpart)
-    for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
+    for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'):
        if not manifest.has_key(xmlfile):
            continue
        try:
@ -580,7 +599,19 @@ def load(odffile):
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError, v: pass
-    # FIXME: Add subobjects correctly here
+
+def load(odffile):
+    """ Load an ODF file into memory
+        Returns a reference to the structure
+    """
+    z = zipfile.ZipFile(odffile)
+    mimetype = z.read('mimetype')
+    doc = OpenDocument(mimetype, add_generator=False)
+
+    # Look in the manifest file to see if which of the four files there are
+    manifestpart = z.read('META-INF/manifest.xml')
+    manifest =  manifestlist(manifestpart)
+    __loadxmlparts(z, manifest, doc, '')
    for mentry,mvalue in manifest.items():
        if mentry[:9] == "Pictures/" and len(mentry) > 9:
            doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
@ -588,6 +619,13 @@ def load(odffile):
            doc.addThumbnail(z.read(mentry))
        elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
            pass
+        # Load subobjects into structure
+        elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/":
+            subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
+            doc.addObject(subdoc, "/" + mentry[:-1])
+            __loadxmlparts(z, manifest, subdoc, mentry)
+        elif mentry[:7] == "Object ":
+            pass # Don't load subobjects as opaque objects
        else:
            if mvalue['full-path'][-1] == '/':
                doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
@ -612,4 +650,5 @@ def load(odffile):
    elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
        doc.formula = b[0].firstChild
    return doc
+
 # vim: set expandtab sw=4 :