0.9.21+

2025-07-09 03:04:10 -04:00 · 2013-03-03 08:46:49 -08:00 · 2013-03-03 08:46:49 -08:00 · 61084c2392
commit 61084c2392
parent 88c150fd2e b0220833ba
13 changed files with 332 additions and 159 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -37,7 +37,7 @@ nbproject/
 calibre_plugins/
 recipes/.git
 recipes/.gitignore
-recipes/README
+recipes/README.md
 recipes/katalog_egazeciarz.recipe
 recipes/tv_axnscifi.recipe
 recipes/tv_comedycentral.recipe
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 fronda.pl
 '''
@ -68,7 +68,8 @@ class Fronda(BasicNewsRecipe):
                article_url = 'http://www.fronda.pl' + article_a['href']
                article_title = self.tag_to_string(article_a)
                articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
-            feeds.append((genName, articles[genName]))
+            if articles[genName]:
                feeds.append((genName, articles[genName]))
        return feeds
    keep_only_tags = [
@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe):
        dict(name='h3', attrs={'class':'block-header article comments'}),
        dict(name='ul', attrs={'class':'comment-list'}),
        dict(name='ul', attrs={'class':'category'}),
        dict(name='ul', attrs={'class':'tag-list'}),
        dict(name='p', attrs={'id':'comments-disclaimer'}),
        dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
-        dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}),
+        dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
        dict(name='div', attrs={'class':'related-articles content'}),
        dict(name='div', attrs={'id':'comment-form'})
        ]
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -2,7 +2,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
+__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
                 2013, Tomasz Długosz, tomek3d@gmail.com'
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -12,9 +13,9 @@ import re
 class GN(BasicNewsRecipe):
        EDITION = 0
-        __author__ = 'Piotr Kontek'
+        __author__ = 'Piotr Kontek, Tomasz Długosz'
-        title = u'Gość niedzielny'
+        title = u'Gość Niedzielny'
-        description = 'Weekly magazine'
+        description = 'Ogólnopolski tygodnik katolicki'
        encoding = 'utf-8'
        no_stylesheets = True
        language = 'pl'
@ -38,17 +39,25 @@ class GN(BasicNewsRecipe):
            first = True
            for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
                if first and p.find('img') != None:
-                    article = article + '<p>'
+                    article += '<p>'
-                    article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
+                    article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
-                    article = article + '<font size="-2">'
+                    article += '<font size="-2">'
                    for s in p.findAll('span'):
-                        article = article + self.tag_to_string(s)
+                        article += self.tag_to_string(s)
-                    article = article + '</font></p>'
+                    article += '</font></p>'
                else:
-                    article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
+                    article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
                first = False
            limiter = main_section.find('p', attrs={'class' : 'limiter'})
            if limiter:
                article += str(limiter)
-            html =  unicode(title) + unicode(authors) + unicode(article)
+            html = unicode(title)
            #sometimes authors are not filled in:
            if authors:
                html += unicode(authors) + unicode(article)
            else:
                html += unicode(article)
            self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
            self.temp_files[-1].write(html)
@ -65,7 +74,8 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
-                        self.title = img['alt']
+                        #this was preventing kindles from moving old issues to 'Back Issues'  category:
                        #self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if year != date.today().year or not first:
                            break
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -15,26 +15,31 @@ class tvn24(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True
    keep_only_tags=[
-#   dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
+#       dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
-    dict(name='div', attrs={'class':'mainContainer'}),
+        dict(name='div', attrs={'class':'mainContainer'}),
-#   dict(name='p'),
+#       dict(name='p'),
-#   dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
+#       dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
                   ]
    remove_tags=[
-    dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
+        dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
-    dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
+        dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
-    dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
+        dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
-    dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey  loaded'})
+        dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey  loaded'})
-          ]
+              ]
    remove_tags_after=[dict(name='li', attrs={'class':'share'})]
    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
-        #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+                #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
-               tstr = alink.string
+                tstr = alink.string
-               alink.replaceWith(tstr)
+                alink.replaceWith(tstr)
        return soup
    def postprocess_html(self, soup, first):
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -217,6 +217,8 @@ class Cache(object):
                    field.series_field = self.fields[name[:-len('_index')]]
                elif name == 'series_index':
                    field.series_field = self.fields['series']
                elif name == 'authors':
                    field.author_sort_field = self.fields['author_sort']
    @read_api
    def field_for(self, name, book_id, default_value=None):
--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@ -402,6 +402,13 @@ class AuthorsField(ManyToManyField):
    def category_sort_value(self, item_id, book_ids, lang_map):
        return self.table.asort_map[item_id]
    def db_author_sort_for_book(self, book_id):
        return self.author_sort_field.for_book(book_id)
    def author_sort_for_book(self, book_id):
        return ' & '.join(self.table.asort_map[k] for k in
                          self.table.book_col_map[book_id])
 class FormatsField(ManyToManyField):
    def for_book(self, book_id, default_value=None):
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -168,7 +168,7 @@ class AuthorsTable(ManyToManyTable):
        self.asort_map  = {}
        for row in db.conn.execute(
                'SELECT id, name, sort, link FROM authors'):
-            self.id_map[row[0]] = row[1]
+            self.id_map[row[0]] = self.unserialize(row[1])
            self.asort_map[row[0]] = (row[2] if row[2] else
                    author_to_author_sort(row[1]))
            self.alink_map[row[0]] = row[3]
--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@ -203,10 +203,63 @@ class WritingTest(BaseTest):
    # }}}
    def test_many_many_basic(self): # {{{
        'Test the different code paths for writing to a many-many field'
        cl = self.cloned_library
        cache = self.init_cache(cl)
        ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
        # Tags
        ae(sf('#tags', {1:cache.field_for('tags', 1), 2:cache.field_for('tags', 2)}),
            {1, 2})
        for name in ('tags', '#tags'):
            f = cache.fields[name]
            af(sf(name, {1:('tag one', 'News')}, allow_case_change=False))
            ae(sf(name, {1:'tag one, News'}), {1, 2})
            ae(sf(name, {3:('tag two', 'sep,sep2')}), {2, 3})
            ae(len(f.table.id_map), 4)
            ae(sf(name, {1:None}), set([1]))
            cache2 = self.init_cache(cl)
            for c in (cache, cache2):
                ae(c.field_for(name, 3), ('tag two', 'sep;sep2'))
                ae(len(c.fields[name].table.id_map), 3)
                ae(len(c.fields[name].table.id_map), 3)
                ae(c.field_for(name, 1), ())
                ae(c.field_for(name, 2), ('tag one', 'tag two'))
            del cache2
        # Authors
        ae(sf('#authors', {k:cache.field_for('authors', k) for k in (1,2,3)}),
           {1,2,3})
        for name in ('authors', '#authors'):
            f = cache.fields[name]
            ae(len(f.table.id_map), 3)
            af(cache.set_field(name, {3:None if name == 'authors' else 'Unknown'}))
            ae(cache.set_field(name, {3:'Kovid Goyal & Divok Layog'}), set([3]))
            ae(cache.set_field(name, {1:'', 2:'An, Author'}), {1,2})
            cache2 = self.init_cache(cl)
            for c in (cache, cache2):
                ae(len(c.fields[name].table.id_map), 4 if name =='authors' else 3)
                ae(c.field_for(name, 3), ('Kovid Goyal', 'Divok Layog'))
                ae(c.field_for(name, 2), ('An, Author',))
                ae(c.field_for(name, 1), ('Unknown',) if name=='authors' else ())
                ae(c.field_for('author_sort', 1), 'Unknown')
                ae(c.field_for('author_sort', 2), 'An, Author')
                ae(c.field_for('author_sort', 3), 'Goyal, Kovid & Layog, Divok')
            del cache2
        ae(cache.set_field('authors', {1:'KoviD GoyaL'}), {1, 3})
        ae(cache.field_for('author_sort', 1), 'GoyaL, KoviD')
        ae(cache.field_for('author_sort', 3), 'GoyaL, KoviD & Layog, Divok')
        # TODO: identifiers, languages
    # }}}
 def tests():
-    return unittest.TestLoader().loadTestsFromTestCase(WritingTest)
+    tl = unittest.TestLoader()
    # return tl.loadTestsFromName('writing.WritingTest.test_many_many_basic')
    return tl.loadTestsFromTestCase(WritingTest)
 def run():
    unittest.TextTestRunner(verbosity=2).run(tests())
--- a/src/calibre/db/write.py
+++ b/src/calibre/db/write.py
@ -12,8 +12,11 @@ from functools import partial
 from datetime import datetime
 from calibre.constants import preferred_encoding, ispy3
 from calibre.ebooks.metadata import author_to_author_sort
 from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE,
                                isoformat)
 from calibre.utils.icu import strcmp
 if ispy3:
    unicode = str
@ -45,15 +48,20 @@ def get_series_values(val):
            pass
    return (val, None)
-def multiple_text(sep, x):
+def multiple_text(sep, ui_sep, x):
-    if x is None:
+    if not x:
        return ()
    if isinstance(x, bytes):
        x = x.decode(preferred_encoding, 'replce')
    if isinstance(x, unicode):
        x = x.split(sep)
-    x = (y.strip() for y in x if y.strip())
+    else:
-    return (' '.join(y.split()) for y in x if y)
+        x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
             else y for y in x)
    ui_sep = ui_sep.strip()
    repsep = ',' if ui_sep == ';' else ';'
    x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip())
    return tuple(' '.join(y.split()) for y in x if y)
 def adapt_datetime(x):
    if isinstance(x, (unicode, bytes)):
@ -92,7 +100,8 @@ def get_adapter(name, metadata):
    dt = metadata['datatype']
    if dt == 'text':
        if metadata['is_multiple']:
-            ans = partial(multiple_text, metadata['is_multiple']['ui_to_list'])
+            m = metadata['is_multiple']
            ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui'])
        else:
            ans = single_text
    elif dt == 'series':
@ -132,7 +141,7 @@ def get_adapter(name, metadata):
 def one_one_in_books(book_id_val_map, db, field, *args):
    'Set a one-one field in the books table'
    if book_id_val_map:
-        sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
+        sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
        db.conn.executemany(
            'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
        field.table.book_col_map.update(book_id_val_map)
@ -150,7 +159,7 @@ def one_one_in_other(book_id_val_map, db, field, *args):
    if updated:
        db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
            field.metadata['table'], field.metadata['column']),
-            tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
+            ((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
        field.table.book_col_map.update(updated)
    return set(book_id_val_map)
@ -178,6 +187,44 @@ def safe_lower(x):
    except (TypeError, ValueError, KeyError, AttributeError):
        return x
 def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
              case_changes, val_map, is_authors=False):
    ''' Get the db id for the value val. If val does not exist in the db it is
    inserted into the db. '''
    kval = kmap(val)
    item_id = rid_map.get(kval, None)
    if item_id is None:
        if is_authors:
            aus = author_to_author_sort(val)
            db.conn.execute('INSERT INTO authors(name,sort) VALUES (?,?)',
                            (val.replace(',', '|'), aus))
        else:
            db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
                m['table'], m['column']), (val,))
        item_id = rid_map[kval] = db.conn.last_insert_rowid()
        table.id_map[item_id] = val
        table.col_book_map[item_id] = set()
        if is_authors:
            table.asort_map[item_id] = aus
            table.alink_map[item_id] = ''
    elif allow_case_change and val != table.id_map[item_id]:
        case_changes[item_id] = val
    val_map[val] = item_id
 def change_case(case_changes, dirtied, db, table, m, is_authors=False):
    if is_authors:
        vals = ((val.replace(',', '|'), item_id) for item_id, val in
                case_changes.iteritems())
    else:
        vals = ((val, item_id) for item_id, val in case_changes.iteritems())
    db.conn.executemany(
        'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals)
    for item_id, val in case_changes.iteritems():
        table.id_map[item_id] = val
        dirtied.update(table.col_book_map[item_id])
        if is_authors:
            table.asort_map[item_id] = author_to_author_sort(val)
 def many_one(book_id_val_map, db, field, allow_case_change, *args):
    dirtied = set()
    m = field.metadata
@ -185,108 +232,61 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
    dt = m['datatype']
    is_custom_series = dt == 'series' and table.name.startswith('#')
-    # Map values to their canonical form for later comparison
+    # Map values to db ids, including any new values
    kmap = safe_lower if dt in {'text', 'series'} else lambda x:x
    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    val_map = {None:None}
    case_changes = {}
    for val in book_id_val_map.itervalues():
        if val is not None:
            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
                    case_changes, val_map)
    if case_changes:
        change_case(case_changes, dirtied, db, table, m)
    book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()}
    # Ignore those items whose value is the same as the current value
-    no_changes = {k:nval for k, nval in book_id_val_map.iteritems() if
+    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
-                  kmap(nval) == kmap(field.for_book(k, default_value=None))}
+        if v != table.book_col_map.get(k, None)}
-    for book_id in no_changes:
+    dirtied |= set(book_id_item_id_map)
        del book_id_val_map[book_id]
-    # If we are allowed case changes check that none of the ignored items are
+    # Update the book->col and col->book maps
-    # case changes. If they are, update the item's case in the db.
+    deleted = set()
-    if allow_case_change:
+    updated = {}
-        for book_id, nval in no_changes.iteritems():
+    for book_id, item_id in book_id_item_id_map.iteritems():
-            if nval is not None and nval != field.for_book(
+        old_item_id = table.book_col_map.get(book_id, None)
-                book_id, default_value=None):
+        if old_item_id is not None:
-                # Change of case
+            table.col_book_map[old_item_id].discard(book_id)
-                item_id = table.book_col_map[book_id]
+        if item_id is None:
-                db.conn.execute('UPDATE %s SET %s=? WHERE id=?'%(
+            table.book_col_map.pop(book_id, None)
-                    m['table'], m['column']), (nval, item_id))
+            deleted.add(book_id)
-                table.id_map[item_id] = nval
+        else:
-                dirtied |= table.col_book_map[item_id]
+            table.book_col_map[book_id] = item_id
-
+            table.col_book_map[item_id].add(book_id)
-    deleted = {k:v for k, v in book_id_val_map.iteritems() if v is None}
+            updated[book_id] = item_id
    updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
    link_table = table.link_table
    # Update the db link table
    if deleted:
-        db.conn.executemany('DELETE FROM %s WHERE book=?'%link_table,
+        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
-            tuple((book_id,) for book_id in deleted))
+                            ((k,) for k in deleted))
        for book_id in deleted:
            item_id = table.book_col_map.pop(book_id, None)
            if item_id is not None:
                table.col_book_map[item_id].discard(book_id)
        dirtied |= set(deleted)
    if updated:
-        rid_map = {kmap(v):k for k, v in table.id_map.iteritems()}
+        sql = (
-        book_id_item_id_map = {k:rid_map.get(kmap(v), None) for k, v in
+            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
-                               book_id_val_map.iteritems()}
+            if is_custom_series else
-
+            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
-        # items that dont yet exist
+        )
-        new_items = {k:v for k, v in updated.iteritems() if
+        db.conn.executemany(sql.format(table.link_table, m['link_column']),
-                     book_id_item_id_map[k] is None}
+            ((book_id, book_id, item_id) for book_id, item_id in
-        # items that already exist
+                    updated.iteritems()))
        changed_items = {k:book_id_item_id_map[k] for k in updated if
                         book_id_item_id_map[k] is not None}
        def sql_update(imap):
            sql = (
                'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
                if is_custom_series else
                'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
            )
            db.conn.executemany(sql.format(link_table, m['link_column']),
                tuple((book_id, book_id, item_id) for book_id, item_id in
                       imap.iteritems()))
        if new_items:
            item_ids = {}
            val_map = {}
            for val in set(new_items.itervalues()):
                lval = kmap(val)
                if lval in val_map:
                    item_id = val_map[lval]
                else:
                    db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
                        m['table'], m['column']), (val,))
                    item_id = val_map[lval] = db.conn.last_insert_rowid()
                item_ids[val] = item_id
                table.id_map[item_id] = val
            imap = {}
            for book_id, val in new_items.iteritems():
                item_id = item_ids[val]
                old_item_id = table.book_col_map.get(book_id, None)
                if old_item_id is not None:
                    table.col_book_map[old_item_id].discard(book_id)
                if item_id not in table.col_book_map:
                    table.col_book_map[item_id] = set()
                table.col_book_map[item_id].add(book_id)
                table.book_col_map[book_id] = imap[book_id] = item_id
            sql_update(imap)
            dirtied |= set(imap)
        if changed_items:
            imap = {}
            sql_update(changed_items)
            for book_id, item_id in changed_items.iteritems():
                old_item_id = table.book_col_map.get(book_id, None)
                if old_item_id != item_id:
                    table.book_col_map[book_id] = item_id
                    table.col_book_map[item_id].add(book_id)
                    if old_item_id is not None:
                        table.col_book_map[old_item_id].discard(book_id)
                    imap[book_id] = item_id
            sql_update(imap)
            dirtied |= set(imap)
    # Remove no longer used items
    remove = {item_id for item_id in table.id_map if not
              table.col_book_map.get(item_id, False)}
    if remove:
        db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
-            tuple((item_id,) for item_id in remove))
+            ((item_id,) for item_id in remove))
        for item_id in remove:
            del table.id_map[item_id]
            table.col_book_map.pop(item_id, None)
@ -294,6 +294,96 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
    return dirtied
 # }}}
 # Many-Many fields {{{
 def many_many(book_id_val_map, db, field, allow_case_change, *args):
    dirtied = set()
    m = field.metadata
    table = field.table
    dt = m['datatype']
    is_authors = field.name == 'authors'
    # Map values to db ids, including any new values
    kmap = safe_lower if dt == 'text' else lambda x:x
    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    val_map = {}
    case_changes = {}
    for vals in book_id_val_map.itervalues():
        for val in vals:
            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
                      case_changes, val_map, is_authors=is_authors)
    if case_changes:
        change_case(case_changes, dirtied, db, table, m, is_authors=is_authors)
        if is_authors:
            for item_id, val in case_changes.iteritems():
                for book_id in table.col_book_map[item_id]:
                    current_sort = field.db_author_sort_for_book(book_id)
                    new_sort = field.author_sort_for_book(book_id)
                    if strcmp(current_sort, new_sort) == 0:
                        # The sort strings differ only by case, update the db
                        # sort
                        field.author_sort_field.writer.set_books({book_id:new_sort}, db)
    book_id_item_id_map = {k:tuple(val_map[v] for v in vals)
                           for k, vals in book_id_val_map.iteritems()}
    # Ignore those items whose value is the same as the current value
    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
        if v != table.book_col_map.get(k, None)}
    dirtied |= set(book_id_item_id_map)
    # Update the book->col and col->book maps
    deleted = set()
    updated = {}
    for book_id, item_ids in book_id_item_id_map.iteritems():
        old_item_ids = table.book_col_map.get(book_id, None)
        if old_item_ids:
            for old_item_id in old_item_ids:
                table.col_book_map[old_item_id].discard(book_id)
        if item_ids:
            table.book_col_map[book_id] = item_ids
            for item_id in item_ids:
                table.col_book_map[item_id].add(book_id)
            updated[book_id] = item_ids
        else:
            table.book_col_map.pop(book_id, None)
            deleted.add(book_id)
    # Update the db link table
    if deleted:
        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
                            ((k,) for k in deleted))
    if updated:
        vals = (
            (book_id, val) for book_id, vals in updated.iteritems()
            for val in vals
        )
        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
                            ((k,) for k in updated))
        db.conn.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format(
            table.link_table, m['link_column']), vals)
        if is_authors:
            aus_map = {book_id:field.author_sort_for_book(book_id) for book_id
                       in updated}
            field.author_sort_field.writer.set_books(aus_map, db)
    # Remove no longer used items
    remove = {item_id for item_id in table.id_map if not
              table.col_book_map.get(item_id, False)}
    if remove:
        db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
            ((item_id,) for item_id in remove))
        for item_id in remove:
            del table.id_map[item_id]
            table.col_book_map.pop(item_id, None)
            if is_authors:
                table.asort_map.pop(item_id, None)
                table.alink_map.pop(item_id, None)
    return dirtied
 # }}}
 def dummy(book_id_val_map, *args):
    return set()
@ -311,9 +401,7 @@ class Writer(object):
        elif self.name[0] == '#' and self.name.endswith('_index'):
            self.set_books_func = custom_series_index
        elif field.is_many_many:
-            # TODO: Implement this
+            self.set_books_func = many_many
            pass
            # TODO: Remember to change commas to | when writing authors to sqlite
        elif field.is_many:
            self.set_books_func = (self.set_books_for_enum if dt ==
                                   'enumeration' else many_one)
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en'
 import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time
 from calibre.constants import __appname__, __version__, cache_dir, DEBUG as CALIBRE_DEBUG
 from calibre import fit_image, confirm_config_name, strftime as _strftime
-from calibre.constants import isosx, iswindows, cache_dir as _cache_dir
+from calibre.constants import (
    __appname__, __version__, DEBUG as CALIBRE_DEBUG, isosx, iswindows,
    cache_dir as _cache_dir)
 from calibre.devices.errors import OpenFeedback, UserFeedback
 from calibre.devices.usbms.deviceconfig import DeviceConfig
 from calibre.devices.interface import DevicePlugin
@ -290,8 +291,6 @@ class ITUNES(DriverBase):
    # Properties
    cached_books = {}
    cache_dir = os.path.join(_cache_dir(), 'itunes')
    archive_path = os.path.join(cache_dir, "thumbs.zip")
    calibre_library_path = prefs['library_path']
    description_prefix = "added by calibre"
    ejected = False
@ -312,7 +311,7 @@ class ITUNES(DriverBase):
    @property
    def cache_dir(self):
-        return os.path.join(cache_dir(), 'itunes')
+        return os.path.join(_cache_dir(), 'itunes')
    @property
    def archive_path(self):
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -88,7 +88,7 @@ class Container(object):
                    self.mime_map[name] = guess_type('a.opf')
        if not hasattr(self, 'opf_name'):
-            raise InvalidBook('Book has no OPF file')
+            raise InvalidBook('Could not locate opf file: %r'%opfpath)
        # Update mime map with data from the OPF
        for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform.
 '''
 import os, math, functools, collections, re, copy
 from collections import OrderedDict
 from lxml.etree import XPath as _XPath
 from lxml import etree
@ -106,8 +107,7 @@ class Split(object):
                continue
            for elem in selector(body[0]):
                if elem not in body:
-                    if before:
+                    elem.set('pb_before', '1' if before else '0')
                        elem.set('pb_before', '1')
                    page_breaks.add(elem)
        for i, elem in enumerate(item.data.iter()):
@ -134,14 +134,12 @@ class Split(object):
                    id = 'calibre_pb_%d'%i
                    x.set('id', id)
                    xp = XPath('//*[@id=%r]'%id)
-            page_breaks_.append((xp,
+            page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
                x.get('pb_before', False)))
            page_break_ids.append(id)
        for elem in item.data.iter():
            elem.attrib.pop('pb_order', False)
-            if elem.get('pb_before', False):
+            elem.attrib.pop('pb_before', False)
                elem.attrib.pop('pb_before')
        return page_breaks_, page_break_ids
@ -223,22 +221,27 @@ class FlowSplitter(object):
        self.commit()
    def split_on_page_breaks(self, orig_tree):
-        ordered_ids = []
+        ordered_ids = OrderedDict()
-        for elem in orig_tree.xpath('//*[@id]'):
+        all_page_break_ids = frozenset(self.page_break_ids)
-            id = elem.get('id')
+        for elem_id in orig_tree.xpath('//*/@id'):
-            if id in self.page_break_ids:
+            if elem_id in all_page_break_ids:
-                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
+                ordered_ids[elem_id] = self.page_breaks[
                    self.page_break_ids.index(elem_id)]
        self.trees = [orig_tree]
        while ordered_ids:
            pb_id, (pattern, before) = ordered_ids.iteritems().next()
            del ordered_ids[pb_id]
            for i in xrange(len(self.trees)-1, -1, -1):
                tree = self.trees[i]
                elem = pattern(tree)
                if elem:
                    self.log.debug('\t\tSplitting on page-break at id=%s'%
                                elem[0].get('id'))
                    before_tree, after_tree = self.do_split(tree, elem[0], before)
                    self.trees[i:i+1] = [before_tree, after_tree]
                    break
        self.trees = []
        tree = orig_tree
        for pattern, before in ordered_ids:
            elem = pattern(tree)
            if elem:
                self.log.debug('\t\tSplitting on page-break at %s'%
                               elem[0].get('id'))
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
        self.trees.append(tree)
        trees, ids = [], set([])
        for tree in self.trees:
@ -289,7 +292,6 @@ class FlowSplitter(object):
        if self.opts.verbose > 3 and npath != path:
            self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))
        return npath
    def do_split(self, tree, split_point, before):
@ -304,7 +306,11 @@ class FlowSplitter(object):
        root         = tree.getroot()
        root2        = tree2.getroot()
        body, body2  = map(self.get_body, (root, root2))
-        path = self.adjust_split_point(root, path)
+        if before:
            # We cannot adjust for after since moving an after split point to a
            # parent will cause breakage if the parent contains any content
            # after the original split point
            path = self.adjust_split_point(root, path)
        split_point  = root.xpath(path)[0]
        split_point2 = root2.xpath(path)[0]
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -171,7 +171,7 @@ class ZshCompleter(object): # {{{
            arg = ''
            if opt.takes_value():
                arg = ':"%s":'%h
-                if opt.dest in {'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
+                if opt.dest in {'extract_to', 'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
                    arg += "'_path_files -/'"
                elif opt.choices:
                    arg += "(%s)"%'|'.join(opt.choices)