0.9.21+

2025-07-09 03:04:10 -04:00 · 2013-03-03 08:46:49 -08:00 · 2013-03-03 08:46:49 -08:00 · 61084c2392
commit 61084c2392
parent 88c150fd2e b0220833ba
13 changed files with 332 additions and 159 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -37,7 +37,7 @@ nbproject/
 calibre_plugins/
 recipes/.git
 recipes/.gitignore
-recipes/README
+recipes/README.md
 recipes/katalog_egazeciarz.recipe
 recipes/tv_axnscifi.recipe
 recipes/tv_comedycentral.recipe
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 fronda.pl
 '''
@ -68,6 +68,7 @@ class Fronda(BasicNewsRecipe):
                article_url = 'http://www.fronda.pl' + article_a['href']
                article_title = self.tag_to_string(article_a)
                articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
+            if articles[genName]:
                feeds.append((genName, articles[genName]))
        return feeds

@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe):
        dict(name='h3', attrs={'class':'block-header article comments'}),
        dict(name='ul', attrs={'class':'comment-list'}),
        dict(name='ul', attrs={'class':'category'}),
+        dict(name='ul', attrs={'class':'tag-list'}),
        dict(name='p', attrs={'id':'comments-disclaimer'}),
        dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
-        dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}),
+        dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
+        dict(name='div', attrs={'class':'related-articles content'}),
        dict(name='div', attrs={'id':'comment-form'})
        ]
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -2,7 +2,8 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
+__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
+                 2013, Tomasz Długosz, tomek3d@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -12,9 +13,9 @@ import re
 class GN(BasicNewsRecipe):
        EDITION = 0

-        __author__ = 'Piotr Kontek'
-        title = u'Gość niedzielny'
-        description = 'Weekly magazine'
+        __author__ = 'Piotr Kontek, Tomasz Długosz'
+        title = u'Gość Niedzielny'
+        description = 'Ogólnopolski tygodnik katolicki'
        encoding = 'utf-8'
        no_stylesheets = True
        language = 'pl'
@ -38,17 +39,25 @@ class GN(BasicNewsRecipe):
            first = True
            for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
                if first and p.find('img') != None:
-                    article = article + '<p>'
-                    article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
-                    article = article + '<font size="-2">'
+                    article += '<p>'
+                    article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
+                    article += '<font size="-2">'
                    for s in p.findAll('span'):
-                        article = article + self.tag_to_string(s)
-                    article = article + '</font></p>'
+                        article += self.tag_to_string(s)
+                    article += '</font></p>'
                else:
-                    article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
+                    article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
                first = False
+            limiter = main_section.find('p', attrs={'class' : 'limiter'})
+            if limiter:
+                article += str(limiter)

-            html =  unicode(title) + unicode(authors) + unicode(article)
+            html = unicode(title)
+            #sometimes authors are not filled in:
+            if authors:
+                html += unicode(authors) + unicode(article)
+            else:
+                html += unicode(article)

            self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
            self.temp_files[-1].write(html)
@ -65,7 +74,8 @@ class GN(BasicNewsRecipe):
                    if img != None:
                        a = img.parent
                        self.EDITION = a['href']
-                        self.title = img['alt']
+                        #this was preventing kindles from moving old issues to 'Back Issues'  category:
+                        #self.title = img['alt']
                        self.cover_url = 'http://www.gosc.pl' + img['src']
                        if year != date.today().year or not first:
                            break
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -30,6 +30,11 @@ class tvn24(BasicNewsRecipe):
    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
                #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -217,6 +217,8 @@ class Cache(object):
                    field.series_field = self.fields[name[:-len('_index')]]
                elif name == 'series_index':
                    field.series_field = self.fields['series']
+                elif name == 'authors':
+                    field.author_sort_field = self.fields['author_sort']

    @read_api
    def field_for(self, name, book_id, default_value=None):
--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@ -402,6 +402,13 @@ class AuthorsField(ManyToManyField):
    def category_sort_value(self, item_id, book_ids, lang_map):
        return self.table.asort_map[item_id]

+    def db_author_sort_for_book(self, book_id):
+        return self.author_sort_field.for_book(book_id)
+
+    def author_sort_for_book(self, book_id):
+        return ' & '.join(self.table.asort_map[k] for k in
+                          self.table.book_col_map[book_id])
+
 class FormatsField(ManyToManyField):

    def for_book(self, book_id, default_value=None):
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -168,7 +168,7 @@ class AuthorsTable(ManyToManyTable):
        self.asort_map  = {}
        for row in db.conn.execute(
                'SELECT id, name, sort, link FROM authors'):
-            self.id_map[row[0]] = row[1]
+            self.id_map[row[0]] = self.unserialize(row[1])
            self.asort_map[row[0]] = (row[2] if row[2] else
                    author_to_author_sort(row[1]))
            self.alink_map[row[0]] = row[3]
--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@ -203,10 +203,63 @@ class WritingTest(BaseTest):

    # }}}

+    def test_many_many_basic(self): # {{{
+        'Test the different code paths for writing to a many-many field'
+        cl = self.cloned_library
+        cache = self.init_cache(cl)
+        ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field

+        # Tags
+        ae(sf('#tags', {1:cache.field_for('tags', 1), 2:cache.field_for('tags', 2)}),
+            {1, 2})
+        for name in ('tags', '#tags'):
+            f = cache.fields[name]
+            af(sf(name, {1:('tag one', 'News')}, allow_case_change=False))
+            ae(sf(name, {1:'tag one, News'}), {1, 2})
+            ae(sf(name, {3:('tag two', 'sep,sep2')}), {2, 3})
+            ae(len(f.table.id_map), 4)
+            ae(sf(name, {1:None}), set([1]))
+            cache2 = self.init_cache(cl)
+            for c in (cache, cache2):
+                ae(c.field_for(name, 3), ('tag two', 'sep;sep2'))
+                ae(len(c.fields[name].table.id_map), 3)
+                ae(len(c.fields[name].table.id_map), 3)
+                ae(c.field_for(name, 1), ())
+                ae(c.field_for(name, 2), ('tag one', 'tag two'))
+            del cache2
+
+        # Authors
+        ae(sf('#authors', {k:cache.field_for('authors', k) for k in (1,2,3)}),
+           {1,2,3})
+
+        for name in ('authors', '#authors'):
+            f = cache.fields[name]
+            ae(len(f.table.id_map), 3)
+            af(cache.set_field(name, {3:None if name == 'authors' else 'Unknown'}))
+            ae(cache.set_field(name, {3:'Kovid Goyal & Divok Layog'}), set([3]))
+            ae(cache.set_field(name, {1:'', 2:'An, Author'}), {1,2})
+            cache2 = self.init_cache(cl)
+            for c in (cache, cache2):
+                ae(len(c.fields[name].table.id_map), 4 if name =='authors' else 3)
+                ae(c.field_for(name, 3), ('Kovid Goyal', 'Divok Layog'))
+                ae(c.field_for(name, 2), ('An, Author',))
+                ae(c.field_for(name, 1), ('Unknown',) if name=='authors' else ())
+                ae(c.field_for('author_sort', 1), 'Unknown')
+                ae(c.field_for('author_sort', 2), 'An, Author')
+                ae(c.field_for('author_sort', 3), 'Goyal, Kovid & Layog, Divok')
+            del cache2
+        ae(cache.set_field('authors', {1:'KoviD GoyaL'}), {1, 3})
+        ae(cache.field_for('author_sort', 1), 'GoyaL, KoviD')
+        ae(cache.field_for('author_sort', 3), 'GoyaL, KoviD & Layog, Divok')
+
+        # TODO: identifiers, languages
+
+    # }}}

 def tests():
-    return unittest.TestLoader().loadTestsFromTestCase(WritingTest)
+    tl = unittest.TestLoader()
+    # return tl.loadTestsFromName('writing.WritingTest.test_many_many_basic')
+    return tl.loadTestsFromTestCase(WritingTest)

 def run():
    unittest.TextTestRunner(verbosity=2).run(tests())
--- a/src/calibre/db/write.py
+++ b/src/calibre/db/write.py
@ -12,8 +12,11 @@ from functools import partial
 from datetime import datetime

 from calibre.constants import preferred_encoding, ispy3
+from calibre.ebooks.metadata import author_to_author_sort
 from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE,
                                isoformat)
+from calibre.utils.icu import strcmp
+
 if ispy3:
    unicode = str

@ -45,15 +48,20 @@ def get_series_values(val):
            pass
    return (val, None)

-def multiple_text(sep, x):
-    if x is None:
+def multiple_text(sep, ui_sep, x):
+    if not x:
        return ()
    if isinstance(x, bytes):
        x = x.decode(preferred_encoding, 'replce')
    if isinstance(x, unicode):
        x = x.split(sep)
-    x = (y.strip() for y in x if y.strip())
-    return (' '.join(y.split()) for y in x if y)
+    else:
+        x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
+             else y for y in x)
+    ui_sep = ui_sep.strip()
+    repsep = ',' if ui_sep == ';' else ';'
+    x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip())
+    return tuple(' '.join(y.split()) for y in x if y)

 def adapt_datetime(x):
    if isinstance(x, (unicode, bytes)):
@ -92,7 +100,8 @@ def get_adapter(name, metadata):
    dt = metadata['datatype']
    if dt == 'text':
        if metadata['is_multiple']:
-            ans = partial(multiple_text, metadata['is_multiple']['ui_to_list'])
+            m = metadata['is_multiple']
+            ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui'])
        else:
            ans = single_text
    elif dt == 'series':
@ -132,7 +141,7 @@ def get_adapter(name, metadata):
 def one_one_in_books(book_id_val_map, db, field, *args):
    'Set a one-one field in the books table'
    if book_id_val_map:
-        sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
+        sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
        db.conn.executemany(
            'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
        field.table.book_col_map.update(book_id_val_map)
@ -150,7 +159,7 @@ def one_one_in_other(book_id_val_map, db, field, *args):
    if updated:
        db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
            field.metadata['table'], field.metadata['column']),
-            tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
+            ((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
        field.table.book_col_map.update(updated)
    return set(book_id_val_map)

@ -178,6 +187,44 @@ def safe_lower(x):
    except (TypeError, ValueError, KeyError, AttributeError):
        return x

+def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
+              case_changes, val_map, is_authors=False):
+    ''' Get the db id for the value val. If val does not exist in the db it is
+    inserted into the db. '''
+    kval = kmap(val)
+    item_id = rid_map.get(kval, None)
+    if item_id is None:
+        if is_authors:
+            aus = author_to_author_sort(val)
+            db.conn.execute('INSERT INTO authors(name,sort) VALUES (?,?)',
+                            (val.replace(',', '|'), aus))
+        else:
+            db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
+                m['table'], m['column']), (val,))
+        item_id = rid_map[kval] = db.conn.last_insert_rowid()
+        table.id_map[item_id] = val
+        table.col_book_map[item_id] = set()
+        if is_authors:
+            table.asort_map[item_id] = aus
+            table.alink_map[item_id] = ''
+    elif allow_case_change and val != table.id_map[item_id]:
+        case_changes[item_id] = val
+    val_map[val] = item_id
+
+def change_case(case_changes, dirtied, db, table, m, is_authors=False):
+    if is_authors:
+        vals = ((val.replace(',', '|'), item_id) for item_id, val in
+                case_changes.iteritems())
+    else:
+        vals = ((val, item_id) for item_id, val in case_changes.iteritems())
+    db.conn.executemany(
+        'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals)
+    for item_id, val in case_changes.iteritems():
+        table.id_map[item_id] = val
+        dirtied.update(table.col_book_map[item_id])
+        if is_authors:
+            table.asort_map[item_id] = author_to_author_sort(val)
+
 def many_one(book_id_val_map, db, field, allow_case_change, *args):
    dirtied = set()
    m = field.metadata
@ -185,108 +232,61 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
    dt = m['datatype']
    is_custom_series = dt == 'series' and table.name.startswith('#')

-    # Map values to their canonical form for later comparison
+    # Map values to db ids, including any new values
    kmap = safe_lower if dt in {'text', 'series'} else lambda x:x
+    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
+    val_map = {None:None}
+    case_changes = {}
+    for val in book_id_val_map.itervalues():
+        if val is not None:
+            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
+                    case_changes, val_map)
+
+    if case_changes:
+        change_case(case_changes, dirtied, db, table, m)
+
+    book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()}

    # Ignore those items whose value is the same as the current value
-    no_changes = {k:nval for k, nval in book_id_val_map.iteritems() if
-                  kmap(nval) == kmap(field.for_book(k, default_value=None))}
-    for book_id in no_changes:
-        del book_id_val_map[book_id]
+    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
+        if v != table.book_col_map.get(k, None)}
+    dirtied |= set(book_id_item_id_map)

-    # If we are allowed case changes check that none of the ignored items are
-    # case changes. If they are, update the item's case in the db.
-    if allow_case_change:
-        for book_id, nval in no_changes.iteritems():
-            if nval is not None and nval != field.for_book(
-                book_id, default_value=None):
-                # Change of case
-                item_id = table.book_col_map[book_id]
-                db.conn.execute('UPDATE %s SET %s=? WHERE id=?'%(
-                    m['table'], m['column']), (nval, item_id))
-                table.id_map[item_id] = nval
-                dirtied |= table.col_book_map[item_id]
-
-    deleted = {k:v for k, v in book_id_val_map.iteritems() if v is None}
-    updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
-    link_table = table.link_table
+    # Update the book->col and col->book maps
+    deleted = set()
+    updated = {}
+    for book_id, item_id in book_id_item_id_map.iteritems():
+        old_item_id = table.book_col_map.get(book_id, None)
+        if old_item_id is not None:
+            table.col_book_map[old_item_id].discard(book_id)
+        if item_id is None:
+            table.book_col_map.pop(book_id, None)
+            deleted.add(book_id)
+        else:
+            table.book_col_map[book_id] = item_id
+            table.col_book_map[item_id].add(book_id)
+            updated[book_id] = item_id

+    # Update the db link table
    if deleted:
-        db.conn.executemany('DELETE FROM %s WHERE book=?'%link_table,
-            tuple((book_id,) for book_id in deleted))
-        for book_id in deleted:
-            item_id = table.book_col_map.pop(book_id, None)
-            if item_id is not None:
-                table.col_book_map[item_id].discard(book_id)
-        dirtied |= set(deleted)
-
+        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
+                            ((k,) for k in deleted))
    if updated:
-        rid_map = {kmap(v):k for k, v in table.id_map.iteritems()}
-        book_id_item_id_map = {k:rid_map.get(kmap(v), None) for k, v in
-                               book_id_val_map.iteritems()}
-
-        # items that dont yet exist
-        new_items = {k:v for k, v in updated.iteritems() if
-                     book_id_item_id_map[k] is None}
-        # items that already exist
-        changed_items = {k:book_id_item_id_map[k] for k in updated if
-                         book_id_item_id_map[k] is not None}
-        def sql_update(imap):
        sql = (
            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
            if is_custom_series else
            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
        )
-            db.conn.executemany(sql.format(link_table, m['link_column']),
-                tuple((book_id, book_id, item_id) for book_id, item_id in
-                       imap.iteritems()))
-
-        if new_items:
-            item_ids = {}
-            val_map = {}
-            for val in set(new_items.itervalues()):
-                lval = kmap(val)
-                if lval in val_map:
-                    item_id = val_map[lval]
-                else:
-                    db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
-                        m['table'], m['column']), (val,))
-                    item_id = val_map[lval] = db.conn.last_insert_rowid()
-                item_ids[val] = item_id
-                table.id_map[item_id] = val
-            imap = {}
-            for book_id, val in new_items.iteritems():
-                item_id = item_ids[val]
-                old_item_id = table.book_col_map.get(book_id, None)
-                if old_item_id is not None:
-                    table.col_book_map[old_item_id].discard(book_id)
-                if item_id not in table.col_book_map:
-                    table.col_book_map[item_id] = set()
-                table.col_book_map[item_id].add(book_id)
-                table.book_col_map[book_id] = imap[book_id] = item_id
-            sql_update(imap)
-            dirtied |= set(imap)
-
-        if changed_items:
-            imap = {}
-            sql_update(changed_items)
-            for book_id, item_id in changed_items.iteritems():
-                old_item_id = table.book_col_map.get(book_id, None)
-                if old_item_id != item_id:
-                    table.book_col_map[book_id] = item_id
-                    table.col_book_map[item_id].add(book_id)
-                    if old_item_id is not None:
-                        table.col_book_map[old_item_id].discard(book_id)
-                    imap[book_id] = item_id
-            sql_update(imap)
-            dirtied |= set(imap)
+        db.conn.executemany(sql.format(table.link_table, m['link_column']),
+            ((book_id, book_id, item_id) for book_id, item_id in
+                    updated.iteritems()))

    # Remove no longer used items
    remove = {item_id for item_id in table.id_map if not
              table.col_book_map.get(item_id, False)}
    if remove:
        db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
-            tuple((item_id,) for item_id in remove))
+            ((item_id,) for item_id in remove))
        for item_id in remove:
            del table.id_map[item_id]
            table.col_book_map.pop(item_id, None)
@ -294,6 +294,96 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
    return dirtied
 # }}}

+# Many-Many fields {{{
+def many_many(book_id_val_map, db, field, allow_case_change, *args):
+    dirtied = set()
+    m = field.metadata
+    table = field.table
+    dt = m['datatype']
+    is_authors = field.name == 'authors'
+
+    # Map values to db ids, including any new values
+    kmap = safe_lower if dt == 'text' else lambda x:x
+    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
+    val_map = {}
+    case_changes = {}
+    for vals in book_id_val_map.itervalues():
+        for val in vals:
+            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
+                      case_changes, val_map, is_authors=is_authors)
+
+    if case_changes:
+        change_case(case_changes, dirtied, db, table, m, is_authors=is_authors)
+        if is_authors:
+            for item_id, val in case_changes.iteritems():
+                for book_id in table.col_book_map[item_id]:
+                    current_sort = field.db_author_sort_for_book(book_id)
+                    new_sort = field.author_sort_for_book(book_id)
+                    if strcmp(current_sort, new_sort) == 0:
+                        # The sort strings differ only by case, update the db
+                        # sort
+                        field.author_sort_field.writer.set_books({book_id:new_sort}, db)
+
+    book_id_item_id_map = {k:tuple(val_map[v] for v in vals)
+                           for k, vals in book_id_val_map.iteritems()}
+
+    # Ignore those items whose value is the same as the current value
+    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
+        if v != table.book_col_map.get(k, None)}
+    dirtied |= set(book_id_item_id_map)
+
+    # Update the book->col and col->book maps
+    deleted = set()
+    updated = {}
+    for book_id, item_ids in book_id_item_id_map.iteritems():
+        old_item_ids = table.book_col_map.get(book_id, None)
+        if old_item_ids:
+            for old_item_id in old_item_ids:
+                table.col_book_map[old_item_id].discard(book_id)
+        if item_ids:
+            table.book_col_map[book_id] = item_ids
+            for item_id in item_ids:
+                table.col_book_map[item_id].add(book_id)
+            updated[book_id] = item_ids
+        else:
+            table.book_col_map.pop(book_id, None)
+            deleted.add(book_id)
+
+    # Update the db link table
+    if deleted:
+        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
+                            ((k,) for k in deleted))
+    if updated:
+        vals = (
+            (book_id, val) for book_id, vals in updated.iteritems()
+            for val in vals
+        )
+        db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
+                            ((k,) for k in updated))
+        db.conn.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format(
+            table.link_table, m['link_column']), vals)
+        if is_authors:
+            aus_map = {book_id:field.author_sort_for_book(book_id) for book_id
+                       in updated}
+            field.author_sort_field.writer.set_books(aus_map, db)
+
+    # Remove no longer used items
+    remove = {item_id for item_id in table.id_map if not
+              table.col_book_map.get(item_id, False)}
+    if remove:
+        db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
+            ((item_id,) for item_id in remove))
+        for item_id in remove:
+            del table.id_map[item_id]
+            table.col_book_map.pop(item_id, None)
+            if is_authors:
+                table.asort_map.pop(item_id, None)
+                table.alink_map.pop(item_id, None)
+
+    return dirtied
+
+# }}}
+
 def dummy(book_id_val_map, *args):
    return set()

@ -311,9 +401,7 @@ class Writer(object):
        elif self.name[0] == '#' and self.name.endswith('_index'):
            self.set_books_func = custom_series_index
        elif field.is_many_many:
-            # TODO: Implement this
-            pass
-            # TODO: Remember to change commas to | when writing authors to sqlite
+            self.set_books_func = many_many
        elif field.is_many:
            self.set_books_func = (self.set_books_for_enum if dt ==
                                   'enumeration' else many_one)
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en'

 import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time

-from calibre.constants import __appname__, __version__, cache_dir, DEBUG as CALIBRE_DEBUG
 from calibre import fit_image, confirm_config_name, strftime as _strftime
-from calibre.constants import isosx, iswindows, cache_dir as _cache_dir
+from calibre.constants import (
+    __appname__, __version__, DEBUG as CALIBRE_DEBUG, isosx, iswindows,
+    cache_dir as _cache_dir)
 from calibre.devices.errors import OpenFeedback, UserFeedback
 from calibre.devices.usbms.deviceconfig import DeviceConfig
 from calibre.devices.interface import DevicePlugin
@ -290,8 +291,6 @@ class ITUNES(DriverBase):

    # Properties
    cached_books = {}
-    cache_dir = os.path.join(_cache_dir(), 'itunes')
-    archive_path = os.path.join(cache_dir, "thumbs.zip")
    calibre_library_path = prefs['library_path']
    description_prefix = "added by calibre"
    ejected = False
@ -312,7 +311,7 @@ class ITUNES(DriverBase):

    @property
    def cache_dir(self):
-        return os.path.join(cache_dir(), 'itunes')
+        return os.path.join(_cache_dir(), 'itunes')

    @property
    def archive_path(self):
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -88,7 +88,7 @@ class Container(object):
                    self.mime_map[name] = guess_type('a.opf')

        if not hasattr(self, 'opf_name'):
-            raise InvalidBook('Book has no OPF file')
+            raise InvalidBook('Could not locate opf file: %r'%opfpath)

        # Update mime map with data from the OPF
        for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform.
 '''

 import os, math, functools, collections, re, copy
+from collections import OrderedDict

 from lxml.etree import XPath as _XPath
 from lxml import etree
@ -106,8 +107,7 @@ class Split(object):
                continue
            for elem in selector(body[0]):
                if elem not in body:
-                    if before:
-                        elem.set('pb_before', '1')
+                    elem.set('pb_before', '1' if before else '0')
                    page_breaks.add(elem)

        for i, elem in enumerate(item.data.iter()):
@ -134,14 +134,12 @@ class Split(object):
                    id = 'calibre_pb_%d'%i
                    x.set('id', id)
                    xp = XPath('//*[@id=%r]'%id)
-            page_breaks_.append((xp,
-                x.get('pb_before', False)))
+            page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
            page_break_ids.append(id)

        for elem in item.data.iter():
            elem.attrib.pop('pb_order', False)
-            if elem.get('pb_before', False):
-                elem.attrib.pop('pb_before')
+            elem.attrib.pop('pb_before', False)

        return page_breaks_, page_break_ids

@ -223,22 +221,27 @@ class FlowSplitter(object):
        self.commit()

    def split_on_page_breaks(self, orig_tree):
-        ordered_ids = []
-        for elem in orig_tree.xpath('//*[@id]'):
-            id = elem.get('id')
-            if id in self.page_break_ids:
-                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
+        ordered_ids = OrderedDict()
+        all_page_break_ids = frozenset(self.page_break_ids)
+        for elem_id in orig_tree.xpath('//*/@id'):
+            if elem_id in all_page_break_ids:
+                ordered_ids[elem_id] = self.page_breaks[
+                    self.page_break_ids.index(elem_id)]

-        self.trees = []
-        tree = orig_tree
-        for pattern, before in ordered_ids:
+        self.trees = [orig_tree]
+        while ordered_ids:
+            pb_id, (pattern, before) = ordered_ids.iteritems().next()
+            del ordered_ids[pb_id]
+            for i in xrange(len(self.trees)-1, -1, -1):
+                tree = self.trees[i]
                elem = pattern(tree)
                if elem:
-                self.log.debug('\t\tSplitting on page-break at %s'%
+                    self.log.debug('\t\tSplitting on page-break at id=%s'%
                                elem[0].get('id'))
-                before, after = self.do_split(tree, elem[0], before)
-                self.trees.append(before)
-                tree = after
+                    before_tree, after_tree = self.do_split(tree, elem[0], before)
+                    self.trees[i:i+1] = [before_tree, after_tree]
+                    break
+
        self.trees.append(tree)
        trees, ids = [], set([])
        for tree in self.trees:
@ -289,7 +292,6 @@ class FlowSplitter(object):
        if self.opts.verbose > 3 and npath != path:
            self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))

-
        return npath

    def do_split(self, tree, split_point, before):
@ -304,6 +306,10 @@ class FlowSplitter(object):
        root         = tree.getroot()
        root2        = tree2.getroot()
        body, body2  = map(self.get_body, (root, root2))
+        if before:
+            # We cannot adjust for after since moving an after split point to a
+            # parent will cause breakage if the parent contains any content
+            # after the original split point
            path = self.adjust_split_point(root, path)
        split_point  = root.xpath(path)[0]
        split_point2 = root2.xpath(path)[0]
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -171,7 +171,7 @@ class ZshCompleter(object): # {{{
            arg = ''
            if opt.takes_value():
                arg = ':"%s":'%h
-                if opt.dest in {'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
+                if opt.dest in {'extract_to', 'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
                    arg += "'_path_files -/'"
                elif opt.choices:
                    arg += "(%s)"%'|'.join(opt.choices)