This commit is contained in:
GRiker 2013-03-03 08:46:49 -08:00
commit 61084c2392
13 changed files with 332 additions and 159 deletions

View File

@ -37,7 +37,7 @@ nbproject/
calibre_plugins/ calibre_plugins/
recipes/.git recipes/.git
recipes/.gitignore recipes/.gitignore
recipes/README recipes/README.md
recipes/katalog_egazeciarz.recipe recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe recipes/tv_comedycentral.recipe

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>' __copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
''' '''
fronda.pl fronda.pl
''' '''
@ -68,7 +68,8 @@ class Fronda(BasicNewsRecipe):
article_url = 'http://www.fronda.pl' + article_a['href'] article_url = 'http://www.fronda.pl' + article_a['href']
article_title = self.tag_to_string(article_a) article_title = self.tag_to_string(article_a)
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
feeds.append((genName, articles[genName])) if articles[genName]:
feeds.append((genName, articles[genName]))
return feeds return feeds
keep_only_tags = [ keep_only_tags = [
@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe):
dict(name='h3', attrs={'class':'block-header article comments'}), dict(name='h3', attrs={'class':'block-header article comments'}),
dict(name='ul', attrs={'class':'comment-list'}), dict(name='ul', attrs={'class':'comment-list'}),
dict(name='ul', attrs={'class':'category'}), dict(name='ul', attrs={'class':'category'}),
dict(name='ul', attrs={'class':'tag-list'}),
dict(name='p', attrs={'id':'comments-disclaimer'}), dict(name='p', attrs={'id':'comments-disclaimer'}),
dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}), dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
dict(name='div', attrs={'class':'related-articles content'}),
dict(name='div', attrs={'id':'comment-form'}) dict(name='div', attrs={'id':'comment-form'})
] ]

View File

@ -2,7 +2,8 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com' __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
@ -12,9 +13,9 @@ import re
class GN(BasicNewsRecipe): class GN(BasicNewsRecipe):
EDITION = 0 EDITION = 0
__author__ = 'Piotr Kontek' __author__ = 'Piotr Kontek, Tomasz Długosz'
title = u'Gość niedzielny' title = u'Gość Niedzielny'
description = 'Weekly magazine' description = 'Ogólnopolski tygodnik katolicki'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
@ -38,17 +39,25 @@ class GN(BasicNewsRecipe):
first = True first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False): for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None: if first and p.find('img') != None:
article = article + '<p>' article += '<p>'
article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article = article + '<font size="-2">' article += '<font size="-2">'
for s in p.findAll('span'): for s in p.findAll('span'):
article = article + self.tag_to_string(s) article += self.tag_to_string(s)
article = article + '</font></p>' article += '</font></p>'
else: else:
article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/') article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False first = False
limiter = main_section.find('p', attrs={'class' : 'limiter'})
if limiter:
article += str(limiter)
html = unicode(title) + unicode(authors) + unicode(article) html = unicode(title)
#sometimes authors are not filled in:
if authors:
html += unicode(authors) + unicode(article)
else:
html += unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html')) self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html) self.temp_files[-1].write(html)
@ -65,7 +74,8 @@ class GN(BasicNewsRecipe):
if img != None: if img != None:
a = img.parent a = img.parent
self.EDITION = a['href'] self.EDITION = a['href']
self.title = img['alt'] #this was preventing kindles from moving old issues to 'Back Issues' category:
#self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src'] self.cover_url = 'http://www.gosc.pl' + img['src']
if year != date.today().year or not first: if year != date.today().year or not first:
break break

View File

@ -15,26 +15,31 @@ class tvn24(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags=[ keep_only_tags=[
# dict(name='h1', attrs={'class':'size38 mt20 pb20'}), # dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
dict(name='div', attrs={'class':'mainContainer'}), dict(name='div', attrs={'class':'mainContainer'}),
# dict(name='p'), # dict(name='p'),
# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']}) # dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
] ]
remove_tags=[ remove_tags=[
dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}), dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}), dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}), dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'}) dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'})
] ]
remove_tags_after=[dict(name='li', attrs={'class':'share'})] remove_tags_after=[dict(name='li', attrs={'class':'share'})]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ] feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')] #(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
for alink in soup.findAll('a'): for alink in soup.findAll('a'):
if alink.string is not None: if alink.string is not None:
tstr = alink.string tstr = alink.string
alink.replaceWith(tstr) alink.replaceWith(tstr)
return soup return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):

View File

@ -217,6 +217,8 @@ class Cache(object):
field.series_field = self.fields[name[:-len('_index')]] field.series_field = self.fields[name[:-len('_index')]]
elif name == 'series_index': elif name == 'series_index':
field.series_field = self.fields['series'] field.series_field = self.fields['series']
elif name == 'authors':
field.author_sort_field = self.fields['author_sort']
@read_api @read_api
def field_for(self, name, book_id, default_value=None): def field_for(self, name, book_id, default_value=None):

View File

@ -402,6 +402,13 @@ class AuthorsField(ManyToManyField):
def category_sort_value(self, item_id, book_ids, lang_map): def category_sort_value(self, item_id, book_ids, lang_map):
return self.table.asort_map[item_id] return self.table.asort_map[item_id]
def db_author_sort_for_book(self, book_id):
return self.author_sort_field.for_book(book_id)
def author_sort_for_book(self, book_id):
return ' & '.join(self.table.asort_map[k] for k in
self.table.book_col_map[book_id])
class FormatsField(ManyToManyField): class FormatsField(ManyToManyField):
def for_book(self, book_id, default_value=None): def for_book(self, book_id, default_value=None):

View File

@ -168,7 +168,7 @@ class AuthorsTable(ManyToManyTable):
self.asort_map = {} self.asort_map = {}
for row in db.conn.execute( for row in db.conn.execute(
'SELECT id, name, sort, link FROM authors'): 'SELECT id, name, sort, link FROM authors'):
self.id_map[row[0]] = row[1] self.id_map[row[0]] = self.unserialize(row[1])
self.asort_map[row[0]] = (row[2] if row[2] else self.asort_map[row[0]] = (row[2] if row[2] else
author_to_author_sort(row[1])) author_to_author_sort(row[1]))
self.alink_map[row[0]] = row[3] self.alink_map[row[0]] = row[3]

View File

@ -203,10 +203,63 @@ class WritingTest(BaseTest):
# }}} # }}}
def test_many_many_basic(self): # {{{
'Test the different code paths for writing to a many-many field'
cl = self.cloned_library
cache = self.init_cache(cl)
ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
# Tags
ae(sf('#tags', {1:cache.field_for('tags', 1), 2:cache.field_for('tags', 2)}),
{1, 2})
for name in ('tags', '#tags'):
f = cache.fields[name]
af(sf(name, {1:('tag one', 'News')}, allow_case_change=False))
ae(sf(name, {1:'tag one, News'}), {1, 2})
ae(sf(name, {3:('tag two', 'sep,sep2')}), {2, 3})
ae(len(f.table.id_map), 4)
ae(sf(name, {1:None}), set([1]))
cache2 = self.init_cache(cl)
for c in (cache, cache2):
ae(c.field_for(name, 3), ('tag two', 'sep;sep2'))
ae(len(c.fields[name].table.id_map), 3)
ae(len(c.fields[name].table.id_map), 3)
ae(c.field_for(name, 1), ())
ae(c.field_for(name, 2), ('tag one', 'tag two'))
del cache2
# Authors
ae(sf('#authors', {k:cache.field_for('authors', k) for k in (1,2,3)}),
{1,2,3})
for name in ('authors', '#authors'):
f = cache.fields[name]
ae(len(f.table.id_map), 3)
af(cache.set_field(name, {3:None if name == 'authors' else 'Unknown'}))
ae(cache.set_field(name, {3:'Kovid Goyal & Divok Layog'}), set([3]))
ae(cache.set_field(name, {1:'', 2:'An, Author'}), {1,2})
cache2 = self.init_cache(cl)
for c in (cache, cache2):
ae(len(c.fields[name].table.id_map), 4 if name =='authors' else 3)
ae(c.field_for(name, 3), ('Kovid Goyal', 'Divok Layog'))
ae(c.field_for(name, 2), ('An, Author',))
ae(c.field_for(name, 1), ('Unknown',) if name=='authors' else ())
ae(c.field_for('author_sort', 1), 'Unknown')
ae(c.field_for('author_sort', 2), 'An, Author')
ae(c.field_for('author_sort', 3), 'Goyal, Kovid & Layog, Divok')
del cache2
ae(cache.set_field('authors', {1:'KoviD GoyaL'}), {1, 3})
ae(cache.field_for('author_sort', 1), 'GoyaL, KoviD')
ae(cache.field_for('author_sort', 3), 'GoyaL, KoviD & Layog, Divok')
# TODO: identifiers, languages
# }}}
def tests(): def tests():
return unittest.TestLoader().loadTestsFromTestCase(WritingTest) tl = unittest.TestLoader()
# return tl.loadTestsFromName('writing.WritingTest.test_many_many_basic')
return tl.loadTestsFromTestCase(WritingTest)
def run(): def run():
unittest.TextTestRunner(verbosity=2).run(tests()) unittest.TextTestRunner(verbosity=2).run(tests())

View File

@ -12,8 +12,11 @@ from functools import partial
from datetime import datetime from datetime import datetime
from calibre.constants import preferred_encoding, ispy3 from calibre.constants import preferred_encoding, ispy3
from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE, from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE,
isoformat) isoformat)
from calibre.utils.icu import strcmp
if ispy3: if ispy3:
unicode = str unicode = str
@ -45,15 +48,20 @@ def get_series_values(val):
pass pass
return (val, None) return (val, None)
def multiple_text(sep, x): def multiple_text(sep, ui_sep, x):
if x is None: if not x:
return () return ()
if isinstance(x, bytes): if isinstance(x, bytes):
x = x.decode(preferred_encoding, 'replce') x = x.decode(preferred_encoding, 'replce')
if isinstance(x, unicode): if isinstance(x, unicode):
x = x.split(sep) x = x.split(sep)
x = (y.strip() for y in x if y.strip()) else:
return (' '.join(y.split()) for y in x if y) x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
else y for y in x)
ui_sep = ui_sep.strip()
repsep = ',' if ui_sep == ';' else ';'
x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip())
return tuple(' '.join(y.split()) for y in x if y)
def adapt_datetime(x): def adapt_datetime(x):
if isinstance(x, (unicode, bytes)): if isinstance(x, (unicode, bytes)):
@ -92,7 +100,8 @@ def get_adapter(name, metadata):
dt = metadata['datatype'] dt = metadata['datatype']
if dt == 'text': if dt == 'text':
if metadata['is_multiple']: if metadata['is_multiple']:
ans = partial(multiple_text, metadata['is_multiple']['ui_to_list']) m = metadata['is_multiple']
ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui'])
else: else:
ans = single_text ans = single_text
elif dt == 'series': elif dt == 'series':
@ -132,7 +141,7 @@ def get_adapter(name, metadata):
def one_one_in_books(book_id_val_map, db, field, *args): def one_one_in_books(book_id_val_map, db, field, *args):
'Set a one-one field in the books table' 'Set a one-one field in the books table'
if book_id_val_map: if book_id_val_map:
sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems()) sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
db.conn.executemany( db.conn.executemany(
'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence) 'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
field.table.book_col_map.update(book_id_val_map) field.table.book_col_map.update(book_id_val_map)
@ -150,7 +159,7 @@ def one_one_in_other(book_id_val_map, db, field, *args):
if updated: if updated:
db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%( db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
field.metadata['table'], field.metadata['column']), field.metadata['table'], field.metadata['column']),
tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems())) ((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
field.table.book_col_map.update(updated) field.table.book_col_map.update(updated)
return set(book_id_val_map) return set(book_id_val_map)
@ -178,6 +187,44 @@ def safe_lower(x):
except (TypeError, ValueError, KeyError, AttributeError): except (TypeError, ValueError, KeyError, AttributeError):
return x return x
def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map, is_authors=False):
''' Get the db id for the value val. If val does not exist in the db it is
inserted into the db. '''
kval = kmap(val)
item_id = rid_map.get(kval, None)
if item_id is None:
if is_authors:
aus = author_to_author_sort(val)
db.conn.execute('INSERT INTO authors(name,sort) VALUES (?,?)',
(val.replace(',', '|'), aus))
else:
db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
m['table'], m['column']), (val,))
item_id = rid_map[kval] = db.conn.last_insert_rowid()
table.id_map[item_id] = val
table.col_book_map[item_id] = set()
if is_authors:
table.asort_map[item_id] = aus
table.alink_map[item_id] = ''
elif allow_case_change and val != table.id_map[item_id]:
case_changes[item_id] = val
val_map[val] = item_id
def change_case(case_changes, dirtied, db, table, m, is_authors=False):
if is_authors:
vals = ((val.replace(',', '|'), item_id) for item_id, val in
case_changes.iteritems())
else:
vals = ((val, item_id) for item_id, val in case_changes.iteritems())
db.conn.executemany(
'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals)
for item_id, val in case_changes.iteritems():
table.id_map[item_id] = val
dirtied.update(table.col_book_map[item_id])
if is_authors:
table.asort_map[item_id] = author_to_author_sort(val)
def many_one(book_id_val_map, db, field, allow_case_change, *args): def many_one(book_id_val_map, db, field, allow_case_change, *args):
dirtied = set() dirtied = set()
m = field.metadata m = field.metadata
@ -185,108 +232,61 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
dt = m['datatype'] dt = m['datatype']
is_custom_series = dt == 'series' and table.name.startswith('#') is_custom_series = dt == 'series' and table.name.startswith('#')
# Map values to their canonical form for later comparison # Map values to db ids, including any new values
kmap = safe_lower if dt in {'text', 'series'} else lambda x:x kmap = safe_lower if dt in {'text', 'series'} else lambda x:x
rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
val_map = {None:None}
case_changes = {}
for val in book_id_val_map.itervalues():
if val is not None:
get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map)
if case_changes:
change_case(case_changes, dirtied, db, table, m)
book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()}
# Ignore those items whose value is the same as the current value # Ignore those items whose value is the same as the current value
no_changes = {k:nval for k, nval in book_id_val_map.iteritems() if book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
kmap(nval) == kmap(field.for_book(k, default_value=None))} if v != table.book_col_map.get(k, None)}
for book_id in no_changes: dirtied |= set(book_id_item_id_map)
del book_id_val_map[book_id]
# If we are allowed case changes check that none of the ignored items are # Update the book->col and col->book maps
# case changes. If they are, update the item's case in the db. deleted = set()
if allow_case_change: updated = {}
for book_id, nval in no_changes.iteritems(): for book_id, item_id in book_id_item_id_map.iteritems():
if nval is not None and nval != field.for_book( old_item_id = table.book_col_map.get(book_id, None)
book_id, default_value=None): if old_item_id is not None:
# Change of case table.col_book_map[old_item_id].discard(book_id)
item_id = table.book_col_map[book_id] if item_id is None:
db.conn.execute('UPDATE %s SET %s=? WHERE id=?'%( table.book_col_map.pop(book_id, None)
m['table'], m['column']), (nval, item_id)) deleted.add(book_id)
table.id_map[item_id] = nval else:
dirtied |= table.col_book_map[item_id] table.book_col_map[book_id] = item_id
table.col_book_map[item_id].add(book_id)
deleted = {k:v for k, v in book_id_val_map.iteritems() if v is None} updated[book_id] = item_id
updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
link_table = table.link_table
# Update the db link table
if deleted: if deleted:
db.conn.executemany('DELETE FROM %s WHERE book=?'%link_table, db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
tuple((book_id,) for book_id in deleted)) ((k,) for k in deleted))
for book_id in deleted:
item_id = table.book_col_map.pop(book_id, None)
if item_id is not None:
table.col_book_map[item_id].discard(book_id)
dirtied |= set(deleted)
if updated: if updated:
rid_map = {kmap(v):k for k, v in table.id_map.iteritems()} sql = (
book_id_item_id_map = {k:rid_map.get(kmap(v), None) for k, v in 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
book_id_val_map.iteritems()} if is_custom_series else
'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
# items that dont yet exist )
new_items = {k:v for k, v in updated.iteritems() if db.conn.executemany(sql.format(table.link_table, m['link_column']),
book_id_item_id_map[k] is None} ((book_id, book_id, item_id) for book_id, item_id in
# items that already exist updated.iteritems()))
changed_items = {k:book_id_item_id_map[k] for k in updated if
book_id_item_id_map[k] is not None}
def sql_update(imap):
sql = (
'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
if is_custom_series else
'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
)
db.conn.executemany(sql.format(link_table, m['link_column']),
tuple((book_id, book_id, item_id) for book_id, item_id in
imap.iteritems()))
if new_items:
item_ids = {}
val_map = {}
for val in set(new_items.itervalues()):
lval = kmap(val)
if lval in val_map:
item_id = val_map[lval]
else:
db.conn.execute('INSERT INTO %s(%s) VALUES (?)'%(
m['table'], m['column']), (val,))
item_id = val_map[lval] = db.conn.last_insert_rowid()
item_ids[val] = item_id
table.id_map[item_id] = val
imap = {}
for book_id, val in new_items.iteritems():
item_id = item_ids[val]
old_item_id = table.book_col_map.get(book_id, None)
if old_item_id is not None:
table.col_book_map[old_item_id].discard(book_id)
if item_id not in table.col_book_map:
table.col_book_map[item_id] = set()
table.col_book_map[item_id].add(book_id)
table.book_col_map[book_id] = imap[book_id] = item_id
sql_update(imap)
dirtied |= set(imap)
if changed_items:
imap = {}
sql_update(changed_items)
for book_id, item_id in changed_items.iteritems():
old_item_id = table.book_col_map.get(book_id, None)
if old_item_id != item_id:
table.book_col_map[book_id] = item_id
table.col_book_map[item_id].add(book_id)
if old_item_id is not None:
table.col_book_map[old_item_id].discard(book_id)
imap[book_id] = item_id
sql_update(imap)
dirtied |= set(imap)
# Remove no longer used items # Remove no longer used items
remove = {item_id for item_id in table.id_map if not remove = {item_id for item_id in table.id_map if not
table.col_book_map.get(item_id, False)} table.col_book_map.get(item_id, False)}
if remove: if remove:
db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'], db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
tuple((item_id,) for item_id in remove)) ((item_id,) for item_id in remove))
for item_id in remove: for item_id in remove:
del table.id_map[item_id] del table.id_map[item_id]
table.col_book_map.pop(item_id, None) table.col_book_map.pop(item_id, None)
@ -294,6 +294,96 @@ def many_one(book_id_val_map, db, field, allow_case_change, *args):
return dirtied return dirtied
# }}} # }}}
# Many-Many fields {{{
def many_many(book_id_val_map, db, field, allow_case_change, *args):
dirtied = set()
m = field.metadata
table = field.table
dt = m['datatype']
is_authors = field.name == 'authors'
# Map values to db ids, including any new values
kmap = safe_lower if dt == 'text' else lambda x:x
rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
val_map = {}
case_changes = {}
for vals in book_id_val_map.itervalues():
for val in vals:
get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
case_changes, val_map, is_authors=is_authors)
if case_changes:
change_case(case_changes, dirtied, db, table, m, is_authors=is_authors)
if is_authors:
for item_id, val in case_changes.iteritems():
for book_id in table.col_book_map[item_id]:
current_sort = field.db_author_sort_for_book(book_id)
new_sort = field.author_sort_for_book(book_id)
if strcmp(current_sort, new_sort) == 0:
# The sort strings differ only by case, update the db
# sort
field.author_sort_field.writer.set_books({book_id:new_sort}, db)
book_id_item_id_map = {k:tuple(val_map[v] for v in vals)
for k, vals in book_id_val_map.iteritems()}
# Ignore those items whose value is the same as the current value
book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
if v != table.book_col_map.get(k, None)}
dirtied |= set(book_id_item_id_map)
# Update the book->col and col->book maps
deleted = set()
updated = {}
for book_id, item_ids in book_id_item_id_map.iteritems():
old_item_ids = table.book_col_map.get(book_id, None)
if old_item_ids:
for old_item_id in old_item_ids:
table.col_book_map[old_item_id].discard(book_id)
if item_ids:
table.book_col_map[book_id] = item_ids
for item_id in item_ids:
table.col_book_map[item_id].add(book_id)
updated[book_id] = item_ids
else:
table.book_col_map.pop(book_id, None)
deleted.add(book_id)
# Update the db link table
if deleted:
db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
((k,) for k in deleted))
if updated:
vals = (
(book_id, val) for book_id, vals in updated.iteritems()
for val in vals
)
db.conn.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
((k,) for k in updated))
db.conn.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format(
table.link_table, m['link_column']), vals)
if is_authors:
aus_map = {book_id:field.author_sort_for_book(book_id) for book_id
in updated}
field.author_sort_field.writer.set_books(aus_map, db)
# Remove no longer used items
remove = {item_id for item_id in table.id_map if not
table.col_book_map.get(item_id, False)}
if remove:
db.conn.executemany('DELETE FROM %s WHERE id=?'%m['table'],
((item_id,) for item_id in remove))
for item_id in remove:
del table.id_map[item_id]
table.col_book_map.pop(item_id, None)
if is_authors:
table.asort_map.pop(item_id, None)
table.alink_map.pop(item_id, None)
return dirtied
# }}}
def dummy(book_id_val_map, *args): def dummy(book_id_val_map, *args):
return set() return set()
@ -311,9 +401,7 @@ class Writer(object):
elif self.name[0] == '#' and self.name.endswith('_index'): elif self.name[0] == '#' and self.name.endswith('_index'):
self.set_books_func = custom_series_index self.set_books_func = custom_series_index
elif field.is_many_many: elif field.is_many_many:
# TODO: Implement this self.set_books_func = many_many
pass
# TODO: Remember to change commas to | when writing authors to sqlite
elif field.is_many: elif field.is_many:
self.set_books_func = (self.set_books_for_enum if dt == self.set_books_func = (self.set_books_for_enum if dt ==
'enumeration' else many_one) 'enumeration' else many_one)

View File

@ -7,9 +7,10 @@ __docformat__ = 'restructuredtext en'
import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time import cStringIO, ctypes, datetime, os, platform, re, shutil, sys, tempfile, time
from calibre.constants import __appname__, __version__, cache_dir, DEBUG as CALIBRE_DEBUG
from calibre import fit_image, confirm_config_name, strftime as _strftime from calibre import fit_image, confirm_config_name, strftime as _strftime
from calibre.constants import isosx, iswindows, cache_dir as _cache_dir from calibre.constants import (
__appname__, __version__, DEBUG as CALIBRE_DEBUG, isosx, iswindows,
cache_dir as _cache_dir)
from calibre.devices.errors import OpenFeedback, UserFeedback from calibre.devices.errors import OpenFeedback, UserFeedback
from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre.devices.interface import DevicePlugin from calibre.devices.interface import DevicePlugin
@ -290,8 +291,6 @@ class ITUNES(DriverBase):
# Properties # Properties
cached_books = {} cached_books = {}
cache_dir = os.path.join(_cache_dir(), 'itunes')
archive_path = os.path.join(cache_dir, "thumbs.zip")
calibre_library_path = prefs['library_path'] calibre_library_path = prefs['library_path']
description_prefix = "added by calibre" description_prefix = "added by calibre"
ejected = False ejected = False
@ -312,7 +311,7 @@ class ITUNES(DriverBase):
@property @property
def cache_dir(self): def cache_dir(self):
return os.path.join(cache_dir(), 'itunes') return os.path.join(_cache_dir(), 'itunes')
@property @property
def archive_path(self): def archive_path(self):

View File

@ -88,7 +88,7 @@ class Container(object):
self.mime_map[name] = guess_type('a.opf') self.mime_map[name] = guess_type('a.opf')
if not hasattr(self, 'opf_name'): if not hasattr(self, 'opf_name'):
raise InvalidBook('Book has no OPF file') raise InvalidBook('Could not locate opf file: %r'%opfpath)
# Update mime map with data from the OPF # Update mime map with data from the OPF
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'): for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):

View File

@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform.
''' '''
import os, math, functools, collections, re, copy import os, math, functools, collections, re, copy
from collections import OrderedDict
from lxml.etree import XPath as _XPath from lxml.etree import XPath as _XPath
from lxml import etree from lxml import etree
@ -106,8 +107,7 @@ class Split(object):
continue continue
for elem in selector(body[0]): for elem in selector(body[0]):
if elem not in body: if elem not in body:
if before: elem.set('pb_before', '1' if before else '0')
elem.set('pb_before', '1')
page_breaks.add(elem) page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()): for i, elem in enumerate(item.data.iter()):
@ -134,14 +134,12 @@ class Split(object):
id = 'calibre_pb_%d'%i id = 'calibre_pb_%d'%i
x.set('id', id) x.set('id', id)
xp = XPath('//*[@id=%r]'%id) xp = XPath('//*[@id=%r]'%id)
page_breaks_.append((xp, page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
x.get('pb_before', False)))
page_break_ids.append(id) page_break_ids.append(id)
for elem in item.data.iter(): for elem in item.data.iter():
elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_order', False)
if elem.get('pb_before', False): elem.attrib.pop('pb_before', False)
elem.attrib.pop('pb_before')
return page_breaks_, page_break_ids return page_breaks_, page_break_ids
@ -223,22 +221,27 @@ class FlowSplitter(object):
self.commit() self.commit()
def split_on_page_breaks(self, orig_tree): def split_on_page_breaks(self, orig_tree):
ordered_ids = [] ordered_ids = OrderedDict()
for elem in orig_tree.xpath('//*[@id]'): all_page_break_ids = frozenset(self.page_break_ids)
id = elem.get('id') for elem_id in orig_tree.xpath('//*/@id'):
if id in self.page_break_ids: if elem_id in all_page_break_ids:
ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)]) ordered_ids[elem_id] = self.page_breaks[
self.page_break_ids.index(elem_id)]
self.trees = [orig_tree]
while ordered_ids:
pb_id, (pattern, before) = ordered_ids.iteritems().next()
del ordered_ids[pb_id]
for i in xrange(len(self.trees)-1, -1, -1):
tree = self.trees[i]
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break at id=%s'%
elem[0].get('id'))
before_tree, after_tree = self.do_split(tree, elem[0], before)
self.trees[i:i+1] = [before_tree, after_tree]
break
self.trees = []
tree = orig_tree
for pattern, before in ordered_ids:
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break at %s'%
elem[0].get('id'))
before, after = self.do_split(tree, elem[0], before)
self.trees.append(before)
tree = after
self.trees.append(tree) self.trees.append(tree)
trees, ids = [], set([]) trees, ids = [], set([])
for tree in self.trees: for tree in self.trees:
@ -289,7 +292,6 @@ class FlowSplitter(object):
if self.opts.verbose > 3 and npath != path: if self.opts.verbose > 3 and npath != path:
self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath)) self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))
return npath return npath
def do_split(self, tree, split_point, before): def do_split(self, tree, split_point, before):
@ -304,7 +306,11 @@ class FlowSplitter(object):
root = tree.getroot() root = tree.getroot()
root2 = tree2.getroot() root2 = tree2.getroot()
body, body2 = map(self.get_body, (root, root2)) body, body2 = map(self.get_body, (root, root2))
path = self.adjust_split_point(root, path) if before:
# We cannot adjust for after since moving an after split point to a
# parent will cause breakage if the parent contains any content
# after the original split point
path = self.adjust_split_point(root, path)
split_point = root.xpath(path)[0] split_point = root.xpath(path)[0]
split_point2 = root2.xpath(path)[0] split_point2 = root2.xpath(path)[0]

View File

@ -171,7 +171,7 @@ class ZshCompleter(object): # {{{
arg = '' arg = ''
if opt.takes_value(): if opt.takes_value():
arg = ':"%s":'%h arg = ':"%s":'%h
if opt.dest in {'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}: if opt.dest in {'extract_to', 'debug_pipeline', 'to_dir', 'outbox', 'with_library', 'library_path'}:
arg += "'_path_files -/'" arg += "'_path_files -/'"
elif opt.choices: elif opt.choices:
arg += "(%s)"%'|'.join(opt.choices) arg += "(%s)"%'|'.join(opt.choices)