diff --git a/.bzrignore b/.bzrignore index f14ff947f6..6197e46ef1 100644 --- a/.bzrignore +++ b/.bzrignore @@ -37,7 +37,7 @@ nbproject/ calibre_plugins/ recipes/.git recipes/.gitignore -recipes/README +recipes/README.md recipes/katalog_egazeciarz.recipe recipes/tv_axnscifi.recipe recipes/tv_comedycentral.recipe diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index d0177b998e..6ed5d052a3 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = u'2010-2012, Tomasz Dlugosz ' +__copyright__ = u'2010-2013, Tomasz Dlugosz ' ''' fronda.pl ''' @@ -68,7 +68,8 @@ class Fronda(BasicNewsRecipe): article_url = 'http://www.fronda.pl' + article_a['href'] article_title = self.tag_to_string(article_a) articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) - feeds.append((genName, articles[genName])) + if articles[genName]: + feeds.append((genName, articles[genName])) return feeds keep_only_tags = [ @@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe): dict(name='h3', attrs={'class':'block-header article comments'}), dict(name='ul', attrs={'class':'comment-list'}), dict(name='ul', attrs={'class':'category'}), + dict(name='ul', attrs={'class':'tag-list'}), dict(name='p', attrs={'id':'comments-disclaimer'}), dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), - dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}), + dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}), + dict(name='div', attrs={'class':'related-articles content'}), dict(name='div', attrs={'id':'comment-form'}) ] diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe index 59c8fc2f26..11beb076f5 100644 --- a/recipes/gosc_niedzielny.recipe +++ b/recipes/gosc_niedzielny.recipe @@ -2,7 +2,8 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com' +__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \ + 2013, Tomasz Długosz, tomek3d@gmail.com' from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -12,9 +13,9 @@ import re class GN(BasicNewsRecipe): EDITION = 0 - __author__ = 'Piotr Kontek' - title = u'Gość niedzielny' - description = 'Weekly magazine' + __author__ = 'Piotr Kontek, Tomasz Długosz' + title = u'Gość Niedzielny' + description = 'Ogólnopolski tygodnik katolicki' encoding = 'utf-8' no_stylesheets = True language = 'pl' @@ -38,17 +39,25 @@ class GN(BasicNewsRecipe): first = True for p in main_section.findAll('p', attrs={'class':None}, recursive=False): if first and p.find('img') != None: - article = article + '

' - article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') - article = article + '' + article += '

' + article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') + article += '' for s in p.findAll('span'): - article = article + self.tag_to_string(s) - article = article + '

' + article += self.tag_to_string(s) + article += '

' else: - article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/') + article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/') first = False + limiter = main_section.find('p', attrs={'class' : 'limiter'}) + if limiter: + article += str(limiter) - html = unicode(title) + unicode(authors) + unicode(article) + html = unicode(title) + #sometimes authors are not filled in: + if authors: + html += unicode(authors) + unicode(article) + else: + html += unicode(article) self.temp_files.append(PersistentTemporaryFile('_temparse.html')) self.temp_files[-1].write(html) @@ -65,7 +74,8 @@ class GN(BasicNewsRecipe): if img != None: a = img.parent self.EDITION = a['href'] - self.title = img['alt'] + #this was preventing kindles from moving old issues to 'Back Issues' category: + #self.title = img['alt'] self.cover_url = 'http://www.gosc.pl' + img['src'] if year != date.today().year or not first: break diff --git a/recipes/tvn24.recipe b/recipes/tvn24.recipe index a5f5111770..ed0eae574f 100644 --- a/recipes/tvn24.recipe +++ b/recipes/tvn24.recipe @@ -15,26 +15,31 @@ class tvn24(BasicNewsRecipe): remove_javascript = True no_stylesheets = True keep_only_tags=[ -# dict(name='h1', attrs={'class':'size38 mt20 pb20'}), - dict(name='div', attrs={'class':'mainContainer'}), -# dict(name='p'), -# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']}) +# dict(name='h1', attrs={'class':'size38 mt20 pb20'}), + dict(name='div', attrs={'class':'mainContainer'}), +# dict(name='p'), +# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']}) ] remove_tags=[ - dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}), - dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}), - dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}), - dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'}) - ] + dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}), + dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}), + dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}), + dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'}) + ] remove_tags_after=[dict(name='li', attrs={'class':'share'})] feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ] - #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')] + #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) + tstr = alink.string + alink.replaceWith(tstr) return soup def postprocess_html(self, soup, first): diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index dd4bd11c6b..564c4f53ed 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -217,6 +217,8 @@ class Cache(object): field.series_field = self.fields[name[:-len('_index')]] elif name == 'series_index': field.series_field = self.fields['series'] + elif name == 'authors': + field.author_sort_field = self.fields['author_sort'] @read_api def field_for(self, name, book_id, default_value=None): diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 0c3d6eb19a..2ae1fa2ecd 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -402,6 +402,13 @@ class AuthorsField(ManyToManyField): def category_sort_value(self, item_id, book_ids, lang_map): return self.table.asort_map[item_id] + def db_author_sort_for_book(self, book_id): + return self.author_sort_field.for_book(book_id) + + def author_sort_for_book(self, book_id): + return ' & '.join(self.table.asort_map[k] for k in + self.table.book_col_map[book_id]) + class FormatsField(ManyToManyField): def for_book(self, book_id, default_value=None): diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 37a189bfb1..6f4c78272a 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -168,7 +168,7 @@ class AuthorsTable(ManyToManyTable): self.asort_map = {} for row in db.conn.execute( 'SELECT id, name, sort, link FROM authors'): - self.id_map[row[0]] = row[1] + self.id_map[row[0]] = self.unserialize(row[1]) self.asort_map[row[0]] = (row[2] if row[2] else author_to_author_sort(row[1])) self.alink_map[row[0]] = row[3] diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index 9a1f40c3ac..875329558b 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -203,10 +203,63 @@ class WritingTest(BaseTest): # }}} + def test_many_many_basic(self): # {{{ + 'Test the different code paths for writing to a many-many field' + cl = self.cloned_library + cache = self.init_cache(cl) + ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field + # Tags + ae(sf('#tags', {1:cache.field_for('tags', 1), 2:cache.field_for('tags', 2)}), + {1, 2}) + for name in ('tags', '#tags'): + f = cache.fields[name] + af(sf(name, {1:('tag one', 'News')}, allow_case_change=False)) + ae(sf(name, {1:'tag one, News'}), {1, 2}) + ae(sf(name, {3:('tag two', 'sep,sep2')}), {2, 3}) + ae(len(f.table.id_map), 4) + ae(sf(name, {1:None}), set([1])) + cache2 = self.init_cache(cl) + for c in (cache, cache2): + ae(c.field_for(name, 3), ('tag two', 'sep;sep2')) + ae(len(c.fields[name].table.id_map), 3) + ae(len(c.fields[name].table.id_map), 3) + ae(c.field_for(name, 1), ()) + ae(c.field_for(name, 2), ('tag one', 'tag two')) + del cache2 + + # Authors + ae(sf('#authors', {k:cache.field_for('authors', k) for k in (1,2,3)}), + {1,2,3}) + + for name in ('authors', '#authors'): + f = cache.fields[name] + ae(len(f.table.id_map), 3) + af(cache.set_field(name, {3:None if name == 'authors' else 'Unknown'})) + ae(cache.set_field(name, {3:'Kovid Goyal & Divok Layog'}), set([3])) + ae(cache.set_field(name, {1:'', 2:'An, Author'}), {1,2}) + cache2 = self.init_cache(cl) + for c in (cache, cache2): + ae(len(c.fields[name].table.id_map), 4 if name =='authors' else 3) + ae(c.field_for(name, 3), ('Kovid Goyal', 'Divok Layog')) + ae(c.field_for(name, 2), ('An, Author',)) + ae(c.field_for(name, 1), ('Unknown',) if name=='authors' else ()) + ae(c.field_for('author_sort', 1), 'Unknown') + ae(c.field_for('author_sort', 2), 'An, Author') + ae(c.field_for('author_sort', 3), 'Goyal, Kovid & Layog, Divok') + del cache2 + ae(cache.set_field('authors', {1:'KoviD GoyaL'}), {1, 3}) + ae(cache.field_for('author_sort', 1), 'GoyaL, KoviD') + ae(cache.field_for('author_sort', 3), 'GoyaL, KoviD & Layog, Divok') + + # TODO: identifiers, languages + + # }}} def tests(): - return unittest.TestLoader().loadTestsFromTestCase(WritingTest) + tl = unittest.TestLoader() + # return tl.loadTestsFromName('writing.WritingTest.test_many_many_basic') + return tl.loadTestsFromTestCase(WritingTest) def run(): unittest.TextTestRunner(verbosity=2).run(tests()) diff --git a/src/calibre/db/write.py b/src/calibre/db/write.py index 7f2ba5baee..e558c95fe5 100644 --- a/src/calibre/db/write.py +++ b/src/calibre/db/write.py @@ -12,8 +12,11 @@ from functools import partial from datetime import datetime from calibre.constants import preferred_encoding, ispy3 +from calibre.ebooks.metadata import author_to_author_sort from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE, isoformat) +from calibre.utils.icu import strcmp + if ispy3: unicode = str @@ -45,15 +48,20 @@ def get_series_values(val): pass return (val, None) -def multiple_text(sep, x): - if x is None: +def multiple_text(sep, ui_sep, x): + if not x: return () if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replce') if isinstance(x, unicode): x = x.split(sep) - x = (y.strip() for y in x if y.strip()) - return (' '.join(y.split()) for y in x if y) + else: + x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes) + else y for y in x) + ui_sep = ui_sep.strip() + repsep = ',' if ui_sep == ';' else ';' + x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip()) + return tuple(' '.join(y.split()) for y in x if y) def adapt_datetime(x): if isinstance(x, (unicode, bytes)): @@ -92,7 +100,8 @@ def get_adapter(name, metadata): dt = metadata['datatype'] if dt == 'text': if metadata['is_multiple']: - ans = partial(multiple_text, metadata['is_multiple']['ui_to_list']) + m = metadata['is_multiple'] + ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui']) else: ans = single_text elif dt == 'series': @@ -132,7 +141,7 @@ def get_adapter(name, metadata): def one_one_in_books(book_id_val_map, db, field, *args): 'Set a one-one field in the books table' if book_id_val_map: - sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems()) + sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems()) db.conn.executemany( 'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence) field.table.book_col_map.update(book_id_val_map) @@ -150,7 +159,7 @@ def one_one_in_other(book_id_val_map, db, field, *args): if updated: db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%( field.metadata['table'], field.metadata['column']), - tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems())) + ((k, sqlite_datetime(v)) for k, v in updated.iteritems())) field.table.book_col_map.update(updated) return set(book_id_val_map) @@ -178,6 +187,44 @@ def safe_lower(x): except (TypeError, ValueError, KeyError, AttributeError): return x +def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, + case_changes, val_map, is_authors=False): + ''' Get the db id for the value val. If val does not exist in the db it is + inserted into the db. ''' + kval = kmap(val) + item_id = rid_map.get(kval, None) + if item_id is None: + if is_authors: + aus = author_to_author_sort(val) + db.conn.execute('INSERT INTO authors(name,sort) VALUES (?,?)', + (val.replace(',', '|'), aus)) + else: + db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%( + m['table'], m['column']), (val,)) + item_id = rid_map[kval] = db.conn.last_insert_rowid() + table.id_map[item_id] = val + table.col_book_map[item_id] = set() + if is_authors: + table.asort_map[item_id] = aus + table.alink_map[item_id] = '' + elif allow_case_change and val != table.id_map[item_id]: + case_changes[item_id] = val + val_map[val] = item_id + +def change_case(case_changes, dirtied, db, table, m, is_authors=False): + if is_authors: + vals = ((val.replace(',', '|'), item_id) for item_id, val in + case_changes.iteritems()) + else: + vals = ((val, item_id) for item_id, val in case_changes.iteritems()) + db.conn.executemany( + 'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals) + for item_id, val in case_changes.iteritems(): + table.id_map[item_id] = val + dirtied.update(table.col_book_map[item_id]) + if is_authors: + table.asort_map[item_id] = author_to_author_sort(val) + def many_one(book_id_val_map, db, field, allow_case_change, *args): dirtied = set() m = field.metadata @@ -185,108 +232,61 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args): dt = m['datatype'] is_custom_series = dt == 'series' and table.name.startswith('#') - # Map values to their canonical form for later comparison + # Map values to db ids, including any new values kmap = safe_lower if dt in {'text', 'series'} else lambda x:x + rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()} + val_map = {None:None} + case_changes = {} + for val in book_id_val_map.itervalues(): + if val is not None: + get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, + case_changes, val_map) + + if case_changes: + change_case(case_changes, dirtied, db, table, m) + + book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()} # Ignore those items whose value is the same as the current value - no_changes = {k:nval for k, nval in book_id_val_map.iteritems() if - kmap(nval) == kmap(field.for_book(k, default_value=None))} - for book_id in no_changes: - del book_id_val_map[book_id] + book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems() + if v != table.book_col_map.get(k, None)} + dirtied |= set(book_id_item_id_map) - # If we are allowed case changes check that none of the ignored items are - # case changes. If they are, update the item's case in the db. - if allow_case_change: - for book_id, nval in no_changes.iteritems(): - if nval is not None and nval != field.for_book( - book_id, default_value=None): - # Change of case - item_id = table.book_col_map[book_id] - db.conn.execute('UPDATE %s SET %s=? WHERE id=?'%( - m['table'], m['column']), (nval, item_id)) - table.id_map[item_id] = nval - dirtied |= table.col_book_map[item_id] - - deleted = {k:v for k, v in book_id_val_map.iteritems() if v is None} - updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None} - link_table = table.link_table + # Update the book->col and col->book maps + deleted = set() + updated = {} + for book_id, item_id in book_id_item_id_map.iteritems(): + old_item_id = table.book_col_map.get(book_id, None) + if old_item_id is not None: + table.col_book_map[old_item_id].discard(book_id) + if item_id is None: + table.book_col_map.pop(book_id, None) + deleted.add(book_id) + else: + table.book_col_map[book_id] = item_id + table.col_book_map[item_id].add(book_id) + updated[book_id] = item_id + # Update the db link table if deleted: - db.conn.executemany('DELETE FROM %s WHERE book=?'%link_table, - tuple((book_id,) for book_id in deleted)) - for book_id in deleted: - item_id = table.book_col_map.pop(book_id, None) - if item_id is not None: - table.col_book_map[item_id].discard(book_id) - dirtied |= set(deleted) - + db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table, + ((k,) for k in deleted)) if updated: - rid_map = {kmap(v):k for k, v in table.id_map.iteritems()} - book_id_item_id_map = {k:rid_map.get(kmap(v), None) for k, v in - book_id_val_map.iteritems()} - - # items that dont yet exist - new_items = {k:v for k, v in updated.iteritems() if - book_id_item_id_map[k] is None} - # items that already exist - changed_items = {k:book_id_item_id_map[k] for k in updated if - book_id_item_id_map[k] is not None} - def sql_update(imap): - sql = ( - 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)' - if is_custom_series else - 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)' - ) - db.conn.executemany(sql.format(link_table, m['link_column']), - tuple((book_id, book_id, item_id) for book_id, item_id in - imap.iteritems())) - - if new_items: - item_ids = {} - val_map = {} - for val in set(new_items.itervalues()): - lval = kmap(val) - if lval in val_map: - item_id = val_map[lval] - else: - db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%( - m['table'], m['column']), (val,)) - item_id = val_map[lval] = db.conn.last_insert_rowid() - item_ids[val] = item_id - table.id_map[item_id] = val - imap = {} - for book_id, val in new_items.iteritems(): - item_id = item_ids[val] - old_item_id = table.book_col_map.get(book_id, None) - if old_item_id is not None: - table.col_book_map[old_item_id].discard(book_id) - if item_id not in table.col_book_map: - table.col_book_map[item_id] = set() - table.col_book_map[item_id].add(book_id) - table.book_col_map[book_id] = imap[book_id] = item_id - sql_update(imap) - dirtied |= set(imap) - - if changed_items: - imap = {} - sql_update(changed_items) - for book_id, item_id in changed_items.iteritems(): - old_item_id = table.book_col_map.get(book_id, None) - if old_item_id != item_id: - table.book_col_map[book_id] = item_id - table.col_book_map[item_id].add(book_id) - if old_item_id is not None: - table.col_book_map[old_item_id].discard(book_id) - imap[book_id] = item_id - sql_update(imap) - dirtied |= set(imap) + sql = ( + 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)' + if is_custom_series else + 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)' + ) + db.conn.executemany(sql.format(table.link_table, m['link_column']), + ((book_id, book_id, item_id) for book_id, item_id in + updated.iteritems())) # Remove no longer used items remove = {item_id for item_id in table.id_map if not table.col_book_map.get(item_id, False)} if remove: db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'], - tuple((item_id,) for item_id in remove)) + ((item_id,) for item_id in remove)) for item_id in remove: del table.id_map[item_id] table.col_book_map.pop(item_id, None) @@ -294,6 +294,96 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args): return dirtied # }}} +# Many-Many fields {{{ +def many_many(book_id_val_map, db, field, allow_case_change, *args): + dirtied = set() + m = field.metadata + table = field.table + dt = m['datatype'] + is_authors = field.name == 'authors' + + # Map values to db ids, including any new values + kmap = safe_lower if dt == 'text' else lambda x:x + rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()} + val_map = {} + case_changes = {} + for vals in book_id_val_map.itervalues(): + for val in vals: + get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, + case_changes, val_map, is_authors=is_authors) + + if case_changes: + change_case(case_changes, dirtied, db, table, m, is_authors=is_authors) + if is_authors: + for item_id, val in case_changes.iteritems(): + for book_id in table.col_book_map[item_id]: + current_sort = field.db_author_sort_for_book(book_id) + new_sort = field.author_sort_for_book(book_id) + if strcmp(current_sort, new_sort) == 0: + # The sort strings differ only by case, update the db + # sort + field.author_sort_field.writer.set_books({book_id:new_sort}, db) + + book_id_item_id_map = {k:tuple(val_map[v] for v in vals) + for k, vals in book_id_val_map.iteritems()} + + # Ignore those items whose value is the same as the current value + book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems() + if v != table.book_col_map.get(k, None)} + dirtied |= set(book_id_item_id_map) + + # Update the book->col and col->book maps + deleted = set() + updated = {} + for book_id, item_ids in book_id_item_id_map.iteritems(): + old_item_ids = table.book_col_map.get(book_id, None) + if old_item_ids: + for old_item_id in old_item_ids: + table.col_book_map[old_item_id].discard(book_id) + if item_ids: + table.book_col_map[book_id] = item_ids + for item_id in item_ids: + table.col_book_map[item_id].add(book_id) + updated[book_id] = item_ids + else: + table.book_col_map.pop(book_id, None) + deleted.add(book_id) + + # Update the db link table + if deleted: + db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table, + ((k,) for k in deleted)) + if updated: + vals = ( + (book_id, val) for book_id, vals in updated.iteritems() + for val in vals + ) + db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table, + ((k,) for k in updated)) + db.conn.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format( + table.link_table, m['link_column']), vals) + if is_authors: + aus_map = {book_id:field.author_sort_for_book(book_id) for book_id + in updated} + field.author_sort_field.writer.set_books(aus_map, db) + + # Remove no longer used items + remove = {item_id for item_id in table.id_map if not + table.col_book_map.get(item_id, False)} + if remove: + db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'], + ((item_id,) for item_id in remove)) + for item_id in remove: + del table.id_map[item_id] + table.col_book_map.pop(item_id, None) + if is_authors: + table.asort_map.pop(item_id, None) + table.alink_map.pop(item_id, None) + + return dirtied + +# }}} + def dummy(book_id_val_map, *args): return set() @@ -311,9 +401,7 @@ class Writer(object): elif self.name[0] == '#' and self.name.endswith('_index'): self.set_books_func = custom_series_index elif field.is_many_many: - # TODO: Implement this - pass - # TODO: Remember to change commas to | when writing authors to sqlite + self.set_books_func = many_many elif field.is_many: self.set_books_func = (self.set_books_for_enum if dt == 'enumeration' else many_one) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 9bb8fa014e..5251e701b5 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en' import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time -from calibre.constants import __appname__, __version__, cache_dir, DEBUG as CALIBRE_DEBUG from calibre import fit_image, confirm_config_name, strftime as _strftime -from calibre.constants import isosx, iswindows, cache_dir as _cache_dir +from calibre.constants import ( + __appname__, __version__, DEBUG as CALIBRE_DEBUG, isosx, iswindows, + cache_dir as _cache_dir) from calibre.devices.errors import OpenFeedback, UserFeedback from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.interface import DevicePlugin @@ -290,8 +291,6 @@ class ITUNES(DriverBase): # Properties cached_books = {} - cache_dir = os.path.join(_cache_dir(), 'itunes') - archive_path = os.path.join(cache_dir, "thumbs.zip") calibre_library_path = prefs['library_path'] description_prefix = "added by calibre" ejected = False @@ -312,7 +311,7 @@ class ITUNES(DriverBase): @property def cache_dir(self): - return os.path.join(cache_dir(), 'itunes') + return os.path.join(_cache_dir(), 'itunes') @property def archive_path(self): diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 34ef72a9bf..db55f9579d 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -88,7 +88,7 @@ class Container(object): self.mime_map[name] = guess_type('a.opf') if not hasattr(self, 'opf_name'): - raise InvalidBook('Book has no OPF file') + raise InvalidBook('Could not locate opf file: %r'%opfpath) # Update mime map with data from the OPF for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'): diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index f2a3a0d203..91d5a3feac 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform. ''' import os, math, functools, collections, re, copy +from collections import OrderedDict from lxml.etree import XPath as _XPath from lxml import etree @@ -106,8 +107,7 @@ class Split(object): continue for elem in selector(body[0]): if elem not in body: - if before: - elem.set('pb_before', '1') + elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): @@ -134,14 +134,12 @@ class Split(object): id = 'calibre_pb_%d'%i x.set('id', id) xp = XPath('//*[@id=%r]'%id) - page_breaks_.append((xp, - x.get('pb_before', False))) + page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(): elem.attrib.pop('pb_order', False) - if elem.get('pb_before', False): - elem.attrib.pop('pb_before') + elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids @@ -223,22 +221,27 @@ class FlowSplitter(object): self.commit() def split_on_page_breaks(self, orig_tree): - ordered_ids = [] - for elem in orig_tree.xpath('//*[@id]'): - id = elem.get('id') - if id in self.page_break_ids: - ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)]) + ordered_ids = OrderedDict() + all_page_break_ids = frozenset(self.page_break_ids) + for elem_id in orig_tree.xpath('//*/@id'): + if elem_id in all_page_break_ids: + ordered_ids[elem_id] = self.page_breaks[ + self.page_break_ids.index(elem_id)] + + self.trees = [orig_tree] + while ordered_ids: + pb_id, (pattern, before) = ordered_ids.iteritems().next() + del ordered_ids[pb_id] + for i in xrange(len(self.trees)-1, -1, -1): + tree = self.trees[i] + elem = pattern(tree) + if elem: + self.log.debug('\t\tSplitting on page-break at id=%s'% + elem[0].get('id')) + before_tree, after_tree = self.do_split(tree, elem[0], before) + self.trees[i:i+1] = [before_tree, after_tree] + break - self.trees = [] - tree = orig_tree - for pattern, before in ordered_ids: - elem = pattern(tree) - if elem: - self.log.debug('\t\tSplitting on page-break at %s'% - elem[0].get('id')) - before, after = self.do_split(tree, elem[0], before) - self.trees.append(before) - tree = after self.trees.append(tree) trees, ids = [], set([]) for tree in self.trees: @@ -289,7 +292,6 @@ class FlowSplitter(object): if self.opts.verbose > 3 and npath != path: self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath)) - return npath def do_split(self, tree, split_point, before): @@ -304,7 +306,11 @@ class FlowSplitter(object): root = tree.getroot() root2 = tree2.getroot() body, body2 = map(self.get_body, (root, root2)) - path = self.adjust_split_point(root, path) + if before: + # We cannot adjust for after since moving an after split point to a + # parent will cause breakage if the parent contains any content + # after the original split point + path = self.adjust_split_point(root, path) split_point = root.xpath(path)[0] split_point2 = root2.xpath(path)[0] diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 401cab6418..7606c11f16 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -171,7 +171,7 @@ class ZshCompleter(object): # {{{ arg = '' if opt.takes_value(): arg = ':"%s":'%h - if opt.dest in {'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}: + if opt.dest in {'extract_to', 'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}: arg += "'_path_files -/'" elif opt.choices: arg += "(%s)"%'|'.join(opt.choices)