diff --git a/recipes/endgadget.recipe b/recipes/endgadget.recipe index 8a2181fdc3..83d994a6da 100644 --- a/recipes/endgadget.recipe +++ b/recipes/endgadget.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008 - 2009, Darko Miletic ' +__copyright__ = 'Copyright 2011 Starson17' ''' engadget.com ''' @@ -9,14 +9,29 @@ engadget.com from calibre.web.feeds.news import BasicNewsRecipe class Engadget(BasicNewsRecipe): - title = u'Engadget' - __author__ = 'Darko Miletic' + title = u'Engadget_Full' + __author__ = 'Starson17' + __version__ = 'v1.00' + __date__ = '02, July 2011' description = 'Tech news' language = 'en' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = True + use_embedded_content = False + remove_javascript = True + remove_empty_feeds = True - feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')] + keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})] + remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})] + remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})] + + feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 2ce6b24c4f..0a94384b37 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -6,7 +6,7 @@ class TheIndependent(BasicNewsRecipe): language = 'en_GB' __author__ = 'Krittika Goyal' oldest_article = 1 #days - max_articles_per_feed = 25 + max_articles_per_feed = 30 encoding = 'latin1' no_stylesheets = True @@ -25,24 +25,39 @@ class TheIndependent(BasicNewsRecipe): 'http://www.independent.co.uk/news/uk/rss'), ('World', 'http://www.independent.co.uk/news/world/rss'), - ('Sport', - 'http://www.independent.co.uk/sport/rss'), - ('Arts and Entertainment', - 'http://www.independent.co.uk/arts-entertainment/rss'), ('Business', 'http://www.independent.co.uk/news/business/rss'), - ('Life and Style', - 'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'), - ('Science', - 'http://www.independent.co.uk/news/science/rss'), ('People', 'http://www.independent.co.uk/news/people/rss'), + ('Science', + 'http://www.independent.co.uk/news/science/rss'), ('Media', 'http://www.independent.co.uk/news/media/rss'), - ('Health and Families', - 'http://www.independent.co.uk/life-style/health-and-families/rss'), + ('Education', + 'http://www.independent.co.uk/news/education/rss'), ('Obituaries', 'http://www.independent.co.uk/news/obituaries/rss'), + + ('Opinion', + 'http://www.independent.co.uk/opinion/rss'), + + ('Environment', + 'http://www.independent.co.uk/environment/rss'), + + ('Sport', + 'http://www.independent.co.uk/sport/rss'), + + ('Life and Style', + 'http://www.independent.co.uk/life-style/rss'), + + ('Arts and Entertainment', + 'http://www.independent.co.uk/arts-entertainment/rss'), + + ('Travel', + 'http://www.independent.co.uk/travel/rss'), + + ('Money', + 'http://www.independent.co.uk/money/rss'), ] def preprocess_html(self, soup): diff --git a/recipes/scmp.recipe b/recipes/scmp.recipe new file mode 100644 index 0000000000..1da7b9e1bc --- /dev/null +++ b/recipes/scmp.recipe @@ -0,0 +1,80 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +scmp.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class SCMP(BasicNewsRecipe): + title = 'South China Morning Post' + __author__ = 'llam' + description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China." + publisher = 'South China Morning Post Publishers Ltd.' + category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds' + oldest_article = 2 + delay = 1 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = 'en_CN' + remove_empty_feeds = True + needs_subscription = True + publication_type = 'newspaper' + masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif } ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + #br.set_debug_http(True) + #br.set_debug_responses(True) + #br.set_debug_redirects(True) + if self.username is not None and self.password is not None: + br.open('http://www.scmp.com/portal/site/SCMP/') + br.select_form(name='loginForm') + br['Login' ] = self.username + br['Password'] = self.password + br.submit() + return br + + remove_attributes=['width','height','border'] + + keep_only_tags = [ + dict(attrs={'id':['ART','photoBox']}) + ,dict(attrs={'class':['article_label','article_byline','article_body']}) + ] + + preprocess_regexps = [ + (re.compile(r'

).)*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + feeds = [ + (u'Business' , u'http://www.scmp.com/rss/business.xml' ) + ,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' ) + ,(u'China' , u'http://www.scmp.com/rss/china.xml' ) + ,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml') + ,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' ) + ,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' ) + ,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' ) + ] + + def print_version(self, url): + rpart, sep, rest = url.rpartition('&') + return rpart #+ sep + urllib.quote_plus(rest) + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + items = soup.findAll(src="/images/label_icon.gif") + [item.extract() for item in items] + return self.adeify_images(soup) diff --git a/recipes/sizinti_derigisi.recipe b/recipes/sizinti_derigisi.recipe new file mode 100644 index 0000000000..d05648170e --- /dev/null +++ b/recipes/sizinti_derigisi.recipe @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class TodaysZaman_en(BasicNewsRecipe): + title = u'Sızıntı Dergisi' + __author__ = u'thomass' + description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features' + oldest_article = 30 + max_articles_per_feed =80 + no_stylesheets = True + #delay = 1 + #use_embedded_content = False + encoding = 'utf-8' + #publisher = ' ' + category = 'dergi, ilim, kültür, bilim,Türkçe' + language = 'tr' + publication_type = 'magazine' + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + #keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})] + + #remove_attributes = ['aria-describedby'] + #remove_tags = [dict(name='div', attrs={'id':['renk10']}) ] + cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' + masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' + remove_tags_before = dict(id='content-right') + + + #remove_empty_feeds= True + #remove_attributes = ['width','height'] + + feeds = [ + ( u'Sızıntı', u'http://www.sizinti.com.tr/rss'), + ] + + #def preprocess_html(self, soup): + # return self.adeify_images(soup) + #def print_version(self, url): #there is a probem caused by table format + #return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?') + diff --git a/recipes/telegraph_uk.recipe b/recipes/telegraph_uk.recipe index 5fe5b168b8..157cfa99e9 100644 --- a/recipes/telegraph_uk.recipe +++ b/recipes/telegraph_uk.recipe @@ -56,6 +56,7 @@ class TelegraphUK(BasicNewsRecipe): ,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' ) ,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' ) ,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' ) + ,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' ) ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) ] diff --git a/resources/quick_start.epub b/resources/quick_start.epub index 3b289537a6..882ad76765 100644 Binary files a/resources/quick_start.epub and b/resources/quick_start.epub differ diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4858b585ae..82d1d2ff01 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1387,15 +1387,6 @@ class StoreOpenBooksStore(StoreBase): drm_free_only = True headquarters = 'US' -class StoreOpenLibraryStore(StoreBase): - name = 'Open Library' - description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.' - actual_plugin = 'calibre.gui2.store.stores.open_library_plugin:OpenLibraryStore' - - drm_free_only = True - headquarters = 'US' - formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT'] - class StoreOReillyStore(StoreBase): name = 'OReilly' description = u'Programming and tech ebooks from OReilly.' @@ -1514,7 +1505,6 @@ plugins += [ StoreMobileReadStore, StoreNextoStore, StoreOpenBooksStore, - StoreOpenLibraryStore, StoreOReillyStore, StorePragmaticBookshelfStore, StoreSmashwordsStore, diff --git a/src/calibre/db/__init__.py b/src/calibre/db/__init__.py index 4384cab2da..3c7c86b932 100644 --- a/src/calibre/db/__init__.py +++ b/src/calibre/db/__init__.py @@ -63,5 +63,4 @@ Various things that require other things before they can be migrated: columns/categories/searches info into self.field_metadata. Finally, implement metadata dirtied functionality. - ''' diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index ba683dde50..0716cf691c 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -17,12 +17,13 @@ from calibre import isbytestring, force_unicode, prints from calibre.constants import (iswindows, filesystem_encoding, preferred_encoding) from calibre.ptempfile import PersistentTemporaryFile -from calibre.library.schema_upgrades import SchemaUpgrade +from calibre.db.schema_upgrades import SchemaUpgrade from calibre.library.field_metadata import FieldMetadata from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.utils.icu import strcmp from calibre.utils.config import to_json, from_json, prefs, tweaks -from calibre.utils.date import utcfromtimestamp +from calibre.utils.date import utcfromtimestamp, parse_date +from calibre.utils.filenames import is_case_sensitive from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, SizeTable, FormatsTable, AuthorsTable, IdentifiersTable) # }}} @@ -30,7 +31,9 @@ from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, ''' Differences in semantics from pysqlite: - 1. execute/executemany/executescript operate in autocommit mode + 1. execute/executemany operate in autocommit mode + 2. There is no fetchone() method on cursor objects, instead use next() + 3. There is no executescript ''' @@ -119,6 +122,66 @@ def icu_collator(s1, s2): return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8')) # }}} +# Unused aggregators {{{ +def Concatenate(sep=','): + '''String concatenation aggregator for sqlite''' + + def step(ctxt, value): + if value is not None: + ctxt.append(value) + + def finalize(ctxt): + if not ctxt: + return None + return sep.join(ctxt) + + return ([], step, finalize) + +def SortedConcatenate(sep=','): + '''String concatenation aggregator for sqlite, sorted by supplied index''' + + def step(ctxt, ndx, value): + if value is not None: + ctxt[ndx] = value + + def finalize(ctxt): + if len(ctxt) == 0: + return None + return sep.join(map(ctxt.get, sorted(ctxt.iterkeys()))) + + return ({}, step, finalize) + +def IdentifiersConcat(): + '''String concatenation aggregator for the identifiers map''' + + def step(ctxt, key, val): + ctxt.append(u'%s:%s'%(key, val)) + + def finalize(ctxt): + return ','.join(ctxt) + + return ([], step, finalize) + +def AumSortedConcatenate(): + '''String concatenation aggregator for the author sort map''' + + def step(ctxt, ndx, author, sort, link): + if author is not None: + ctxt[ndx] = ':::'.join((author, sort, link)) + + def finalize(ctxt): + keys = list(ctxt.iterkeys()) + l = len(keys) + if l == 0: + return None + if l == 1: + return ctxt[keys[0]] + return ':#:'.join([ctxt[v] for v in sorted(keys)]) + + return ({}, step, finalize) + +# }}} + class Connection(apsw.Connection): # {{{ BUSY_TIMEOUT = 2000 # milliseconds @@ -128,32 +191,46 @@ class Connection(apsw.Connection): # {{{ self.setbusytimeout(self.BUSY_TIMEOUT) self.execute('pragma cache_size=5000') - self.conn.execute('pragma temp_store=2') + self.execute('pragma temp_store=2') - encoding = self.execute('pragma encoding').fetchone()[0] - self.conn.create_collation('PYNOCASE', partial(pynocase, + encoding = self.execute('pragma encoding').next()[0] + self.createcollation('PYNOCASE', partial(pynocase, encoding=encoding)) - self.conn.create_function('title_sort', 1, title_sort) - self.conn.create_function('author_to_author_sort', 1, - _author_to_author_sort) - - self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4())) + self.createscalarfunction('title_sort', title_sort, 1) + self.createscalarfunction('author_to_author_sort', + _author_to_author_sort, 1) + self.createscalarfunction('uuid4', lambda : str(uuid.uuid4()), + 0) # Dummy functions for dynamically created filters - self.conn.create_function('books_list_filter', 1, lambda x: 1) - self.conn.create_collation('icucollate', icu_collator) + self.createscalarfunction('books_list_filter', lambda x: 1, 1) + self.createcollation('icucollate', icu_collator) + + # Legacy aggregators (never used) but present for backwards compat + self.createaggregatefunction('sortconcat', SortedConcatenate, 2) + self.createaggregatefunction('sortconcat_bar', + partial(SortedConcatenate, sep='|'), 2) + self.createaggregatefunction('sortconcat_amper', + partial(SortedConcatenate, sep='&'), 2) + self.createaggregatefunction('identifiers_concat', + IdentifiersConcat, 2) + self.createaggregatefunction('concat', Concatenate, 1) + self.createaggregatefunction('aum_sortconcat', + AumSortedConcatenate, 4) def create_dynamic_filter(self, name): f = DynamicFilter(name) - self.conn.create_function(name, 1, f) + self.createscalarfunction(name, f, 1) def get(self, *args, **kw): ans = self.cursor().execute(*args) if kw.get('all', True): return ans.fetchall() - for row in ans: - return ans[0] + try: + return ans.next()[0] + except (StopIteration, IndexError): + return None def execute(self, sql, bindings=None): cursor = self.cursor() @@ -162,14 +239,9 @@ class Connection(apsw.Connection): # {{{ def executemany(self, sql, sequence_of_bindings): return self.cursor().executemany(sql, sequence_of_bindings) - def executescript(self, sql): - with self: - # Use an explicit savepoint so that even if this is called - # while a transaction is active, it is atomic - return self.cursor().execute(sql) # }}} -class DB(object, SchemaUpgrade): +class DB(object): PATH_LIMIT = 40 if iswindows else 100 WINDOWS_LIBRARY_PATH_LIMIT = 75 @@ -213,25 +285,24 @@ class DB(object, SchemaUpgrade): shutil.copyfile(self.dbpath, pt.name) self.dbpath = pt.name - self.is_case_sensitive = (not iswindows and - not os.path.exists(self.dbpath.replace('metadata.db', - 'MeTAdAtA.dB'))) + if not os.path.exists(os.path.dirname(self.dbpath)): + os.makedirs(os.path.dirname(self.dbpath)) self._conn = None - if self.user_version == 0: self.initialize_database() - with self.conn: - SchemaUpgrade.__init__(self) + if not os.path.exists(self.library_path): + os.makedirs(self.library_path) + self.is_case_sensitive = is_case_sensitive(self.library_path) + + SchemaUpgrade(self.conn, self.library_path, self.field_metadata) # Guarantee that the library_id is set self.library_id - self.initialize_prefs(default_prefs) - # Fix legacy triggers and columns - self.conn.executescript(''' + self.conn.execute(''' DROP TRIGGER IF EXISTS author_insert_trg; CREATE TEMP TRIGGER author_insert_trg AFTER INSERT ON authors @@ -248,7 +319,11 @@ class DB(object, SchemaUpgrade): UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL; ''') - def initialize_prefs(self, default_prefs): + self.initialize_prefs(default_prefs) + self.initialize_custom_columns() + self.initialize_tables() + + def initialize_prefs(self, default_prefs): # {{{ self.prefs = DBPrefs(self) if default_prefs is not None and not self._exists: @@ -339,15 +414,236 @@ class DB(object, SchemaUpgrade): cats_changed = True if cats_changed: self.prefs.set('user_categories', user_cats) + # }}} + + def initialize_custom_columns(self): # {{{ + with self.conn: + # Delete previously marked custom columns + for record in self.conn.get( + 'SELECT id FROM custom_columns WHERE mark_for_delete=1'): + num = record[0] + table, lt = self.custom_table_names(num) + self.conn.execute('''\ + DROP INDEX IF EXISTS {table}_idx; + DROP INDEX IF EXISTS {lt}_aidx; + DROP INDEX IF EXISTS {lt}_bidx; + DROP TRIGGER IF EXISTS fkc_update_{lt}_a; + DROP TRIGGER IF EXISTS fkc_update_{lt}_b; + DROP TRIGGER IF EXISTS fkc_insert_{lt}; + DROP TRIGGER IF EXISTS fkc_delete_{lt}; + DROP TRIGGER IF EXISTS fkc_insert_{table}; + DROP TRIGGER IF EXISTS fkc_delete_{table}; + DROP VIEW IF EXISTS tag_browser_{table}; + DROP VIEW IF EXISTS tag_browser_filtered_{table}; + DROP TABLE IF EXISTS {table}; + DROP TABLE IF EXISTS {lt}; + '''.format(table=table, lt=lt) + ) + self.conn.execute('DELETE FROM custom_columns WHERE mark_for_delete=1') + + # Load metadata for custom columns + self.custom_column_label_map, self.custom_column_num_map = {}, {} + triggers = [] + remove = [] + custom_tables = self.custom_tables + for record in self.conn.get( + 'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'): + data = { + 'label':record[0], + 'name':record[1], + 'datatype':record[2], + 'editable':bool(record[3]), + 'display':json.loads(record[4]), + 'normalized':bool(record[5]), + 'num':record[6], + 'is_multiple':bool(record[7]), + } + if data['display'] is None: + data['display'] = {} + # set up the is_multiple separator dict + if data['is_multiple']: + if data['display'].get('is_names', False): + seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '} + elif data['datatype'] == 'composite': + seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '} + else: + seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '} + else: + seps = {} + data['multiple_seps'] = seps + + table, lt = self.custom_table_names(data['num']) + if table not in custom_tables or (data['normalized'] and lt not in + custom_tables): + remove.append(data) + continue + + self.custom_column_label_map[data['label']] = data['num'] + self.custom_column_num_map[data['num']] = \ + self.custom_column_label_map[data['label']] = data + + # Create Foreign Key triggers + if data['normalized']: + trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt + else: + trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table + triggers.append(trigger) + + if remove: + with self.conn: + for data in remove: + prints('WARNING: Custom column %r not found, removing.' % + data['label']) + self.conn.execute('DELETE FROM custom_columns WHERE id=?', + (data['num'],)) + + if triggers: + with self.conn: + self.conn.execute('''\ + CREATE TEMP TRIGGER custom_books_delete_trg + AFTER DELETE ON books + BEGIN + %s + END; + '''%(' \n'.join(triggers))) + + # Setup data adapters + def adapt_text(x, d): + if d['is_multiple']: + if x is None: + return [] + if isinstance(x, (str, unicode, bytes)): + x = x.split(d['multiple_seps']['ui_to_list']) + x = [y.strip() for y in x if y.strip()] + x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, + unicode) else y for y in x] + return [u' '.join(y.split()) for y in x] + else: + return x if x is None or isinstance(x, unicode) else \ + x.decode(preferred_encoding, 'replace') + + def adapt_datetime(x, d): + if isinstance(x, (str, unicode, bytes)): + x = parse_date(x, assume_utc=False, as_utc=False) + return x + + def adapt_bool(x, d): + if isinstance(x, (str, unicode, bytes)): + x = x.lower() + if x == 'true': + x = True + elif x == 'false': + x = False + elif x == 'none': + x = None + else: + x = bool(int(x)) + return x + + def adapt_enum(x, d): + v = adapt_text(x, d) + if not v: + v = None + return v + + def adapt_number(x, d): + if x is None: + return None + if isinstance(x, (str, unicode, bytes)): + if x.lower() == 'none': + return None + if d['datatype'] == 'int': + return int(x) + return float(x) + + self.custom_data_adapters = { + 'float': adapt_number, + 'int': adapt_number, + 'rating':lambda x,d : x if x is None else min(10., max(0., float(x))), + 'bool': adapt_bool, + 'comments': lambda x,d: adapt_text(x, {'is_multiple':False}), + 'datetime' : adapt_datetime, + 'text':adapt_text, + 'series':adapt_text, + 'enumeration': adapt_enum + } + + # Create Tag Browser categories for custom columns + for k in sorted(self.custom_column_label_map.iterkeys()): + v = self.custom_column_label_map[k] + if v['normalized']: + is_category = True + else: + is_category = False + is_m = v['multiple_seps'] + tn = 'custom_column_{0}'.format(v['num']) + self.field_metadata.add_custom_field(label=v['label'], + table=tn, column='value', datatype=v['datatype'], + colnum=v['num'], name=v['name'], display=v['display'], + is_multiple=is_m, is_category=is_category, + is_editable=v['editable'], is_csp=False) + + # }}} + + def initialize_tables(self): # {{{ + tables = self.tables = {} + for col in ('title', 'sort', 'author_sort', 'series_index', 'comments', + 'timestamp', 'pubdate', 'uuid', 'path', 'cover', + 'last_modified'): + metadata = self.field_metadata[col].copy() + if col == 'comments': + metadata['table'], metadata['column'] = 'comments', 'text' + if not metadata['table']: + metadata['table'], metadata['column'] = 'books', ('has_cover' + if col == 'cover' else col) + if not metadata['column']: + metadata['column'] = col + tables[col] = OneToOneTable(col, metadata) + + for col in ('series', 'publisher', 'rating'): + tables[col] = ManyToOneTable(col, self.field_metadata[col].copy()) + + for col in ('authors', 'tags', 'formats', 'identifiers'): + cls = { + 'authors':AuthorsTable, + 'formats':FormatsTable, + 'identifiers':IdentifiersTable, + }.get(col, ManyToManyTable) + tables[col] = cls(col, self.field_metadata[col].copy()) + + tables['size'] = SizeTable('size', self.field_metadata['size'].copy()) + + for label, data in self.custom_column_label_map.iteritems(): + label = '#' + label + metadata = self.field_metadata[label].copy() + link_table = self.custom_table_names(data['num'])[1] + + if data['normalized']: + if metadata['is_multiple']: + tables[label] = ManyToManyTable(label, metadata, + link_table=link_table) + else: + tables[label] = ManyToOneTable(label, metadata, + link_table=link_table) + if metadata['datatype'] == 'series': + # Create series index table + label += '_index' + metadata = self.field_metadata[label].copy() + metadata['column'] = 'extra' + metadata['table'] = link_table + tables[label] = OneToOneTable(label, metadata) + else: + tables[label] = OneToOneTable(label, metadata) + # }}} @property def conn(self): if self._conn is None: - self._conn = apsw.Connection(self.dbpath) + self._conn = Connection(self.dbpath) if self._exists and self.user_version == 0: self._conn.close() os.remove(self.dbpath) - self._conn = apsw.Connection(self.dbpath) + self._conn = Connection(self.dbpath) return self._conn @dynamic_property @@ -365,13 +661,29 @@ class DB(object, SchemaUpgrade): def initialize_database(self): metadata_sqlite = P('metadata_sqlite.sql', data=True, allow_user_override=False).decode('utf-8') - self.conn.executescript(metadata_sqlite) + cur = self.conn.cursor() + cur.execute('BEGIN EXCLUSIVE TRANSACTION') + try: + cur.execute(metadata_sqlite) + except: + cur.execute('ROLLBACK') + else: + cur.execute('COMMIT') if self.user_version == 0: self.user_version = 1 # }}} # Database layer API {{{ + def custom_table_names(self, num): + return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num + + @property + def custom_tables(self): + return set([x[0] for x in self.conn.get( + 'SELECT name FROM sqlite_master WHERE type="table" AND ' + '(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')]) + @classmethod def exists_at(cls, path): return path and os.path.exists(os.path.join(path, 'metadata.db')) @@ -396,7 +708,7 @@ class DB(object, SchemaUpgrade): self.conn.execute(''' DELETE FROM library_id; INSERT INTO library_id (uuid) VALUES (?); - ''', self._library_id_) + ''', (self._library_id_,)) return property(doc=doc, fget=fget, fset=fset) @@ -405,39 +717,20 @@ class DB(object, SchemaUpgrade): return utcfromtimestamp(os.stat(self.dbpath).st_mtime) def read_tables(self): - tables = {} - for col in ('title', 'sort', 'author_sort', 'series_index', 'comments', - 'timestamp', 'published', 'uuid', 'path', 'cover', - 'last_modified'): - metadata = self.field_metadata[col].copy() - if metadata['table'] is None: - metadata['table'], metadata['column'] == 'books', ('has_cover' - if col == 'cover' else col) - tables[col] = OneToOneTable(col, metadata) - - for col in ('series', 'publisher', 'rating'): - tables[col] = ManyToOneTable(col, self.field_metadata[col].copy()) - - for col in ('authors', 'tags', 'formats', 'identifiers'): - cls = { - 'authors':AuthorsTable, - 'formats':FormatsTable, - 'identifiers':IdentifiersTable, - }.get(col, ManyToManyTable) - tables[col] = cls(col, self.field_metadata[col].copy()) - - tables['size'] = SizeTable('size', self.field_metadata['size'].copy()) + ''' + Read all data from the db into the python in-memory tables + ''' with self.conn: # Use a single transaction, to ensure nothing modifies # the db while we are reading - for table in tables.itervalues(): + for table in self.tables.itervalues(): try: - table.read() + table.read(self) except: prints('Failed to read table:', table.name) + import pprint + pprint.pprint(table.metadata) raise - return tables - # }}} diff --git a/src/calibre/db/schema_upgrades.py b/src/calibre/db/schema_upgrades.py new file mode 100644 index 0000000000..f3ca6f9852 --- /dev/null +++ b/src/calibre/db/schema_upgrades.py @@ -0,0 +1,618 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre import prints +from calibre.utils.date import isoformat, DEFAULT_DATE + +class SchemaUpgrade(object): + + def __init__(self, conn, library_path, field_metadata): + conn.execute('BEGIN EXCLUSIVE TRANSACTION') + self.conn = conn + self.library_path = library_path + self.field_metadata = field_metadata + # Upgrade database + try: + while True: + uv = self.conn.execute('pragma user_version').next()[0] + meth = getattr(self, 'upgrade_version_%d'%uv, None) + if meth is None: + break + else: + prints('Upgrading database to version %d...'%(uv+1)) + meth() + self.conn.execute('pragma user_version=%d'%(uv+1)) + except: + self.conn.execute('ROLLBACK') + raise + else: + self.conn.execute('COMMIT') + finally: + self.conn = self.field_metadata = None + + def upgrade_version_1(self): + ''' + Normalize indices. + ''' + self.conn.execute('''\ + DROP INDEX IF EXISTS authors_idx; + CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE, sort COLLATE NOCASE); + DROP INDEX IF EXISTS series_idx; + CREATE INDEX series_idx ON series (name COLLATE NOCASE); + DROP INDEX IF EXISTS series_sort_idx; + CREATE INDEX series_sort_idx ON books (series_index, id); + ''') + + def upgrade_version_2(self): + ''' Fix Foreign key constraints for deleting from link tables. ''' + script = '''\ + DROP TRIGGER IF EXISTS fkc_delete_books_%(ltable)s_link; + CREATE TRIGGER fkc_delete_on_%(table)s + BEFORE DELETE ON %(table)s + BEGIN + SELECT CASE + WHEN (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=OLD.id) > 0 + THEN RAISE(ABORT, 'Foreign key violation: %(table)s is still referenced') + END; + END; + DELETE FROM %(table)s WHERE (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=%(table)s.id) < 1; + ''' + self.conn.execute(script%dict(ltable='authors', table='authors', ltable_col='author')) + self.conn.execute(script%dict(ltable='publishers', table='publishers', ltable_col='publisher')) + self.conn.execute(script%dict(ltable='tags', table='tags', ltable_col='tag')) + self.conn.execute(script%dict(ltable='series', table='series', ltable_col='series')) + + def upgrade_version_3(self): + ' Add path to result cache ' + self.conn.execute(''' + DROP VIEW IF EXISTS meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path + FROM books; + ''') + + def upgrade_version_4(self): + 'Rationalize books table' + self.conn.execute(''' + CREATE TEMPORARY TABLE + books_backup(id,title,sort,timestamp,series_index,author_sort,isbn,path); + INSERT INTO books_backup SELECT id,title,sort,timestamp,series_index,author_sort,isbn,path FROM books; + DROP TABLE books; + CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE, + sort TEXT COLLATE NOCASE, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + series_index REAL NOT NULL DEFAULT 1.0, + author_sort TEXT COLLATE NOCASE, + isbn TEXT DEFAULT "" COLLATE NOCASE, + lccn TEXT DEFAULT "" COLLATE NOCASE, + path TEXT NOT NULL DEFAULT "", + flags INTEGER NOT NULL DEFAULT 1 + ); + INSERT INTO + books (id,title,sort,timestamp,pubdate,series_index,author_sort,isbn,path) + SELECT id,title,sort,timestamp,timestamp,series_index,author_sort,isbn,path FROM books_backup; + DROP TABLE books_backup; + + DROP VIEW IF EXISTS meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path, + lccn, + pubdate, + flags + FROM books; + ''') + + def upgrade_version_5(self): + 'Update indexes/triggers for new books table' + self.conn.execute(''' + CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE); + CREATE INDEX books_idx ON books (sort COLLATE NOCASE); + CREATE TRIGGER books_delete_trg + AFTER DELETE ON books + BEGIN + DELETE FROM books_authors_link WHERE book=OLD.id; + DELETE FROM books_publishers_link WHERE book=OLD.id; + DELETE FROM books_ratings_link WHERE book=OLD.id; + DELETE FROM books_series_link WHERE book=OLD.id; + DELETE FROM books_tags_link WHERE book=OLD.id; + DELETE FROM data WHERE book=OLD.id; + DELETE FROM comments WHERE book=OLD.id; + DELETE FROM conversion_options WHERE book=OLD.id; + END; + CREATE TRIGGER books_insert_trg + AFTER INSERT ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id; + END; + CREATE TRIGGER books_update_trg + AFTER UPDATE ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id; + END; + + UPDATE books SET sort=title_sort(title) WHERE sort IS NULL; + ''' + ) + + + def upgrade_version_6(self): + 'Show authors in order' + self.conn.execute(''' + DROP VIEW IF EXISTS meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path, + lccn, + pubdate, + flags + FROM books; + ''') + + def upgrade_version_7(self): + 'Add uuid column' + self.conn.execute(''' + ALTER TABLE books ADD COLUMN uuid TEXT; + DROP TRIGGER IF EXISTS books_insert_trg; + DROP TRIGGER IF EXISTS books_update_trg; + UPDATE books SET uuid=uuid4(); + + CREATE TRIGGER books_insert_trg AFTER INSERT ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id; + END; + + CREATE TRIGGER books_update_trg AFTER UPDATE ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id; + END; + + DROP VIEW IF EXISTS meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path, + lccn, + pubdate, + flags, + uuid + FROM books; + ''') + + def upgrade_version_8(self): + 'Add Tag Browser views' + def create_tag_browser_view(table_name, column_name): + self.conn.execute(''' + DROP VIEW IF EXISTS tag_browser_{tn}; + CREATE VIEW tag_browser_{tn} AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count + FROM {tn}; + '''.format(tn=table_name, cn=column_name)) + + for tn in ('authors', 'tags', 'publishers', 'series'): + cn = tn[:-1] + if tn == 'series': + cn = tn + create_tag_browser_view(tn, cn) + + def upgrade_version_9(self): + 'Add custom columns' + self.conn.execute(''' + CREATE TABLE custom_columns ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT NOT NULL, + name TEXT NOT NULL, + datatype TEXT NOT NULL, + mark_for_delete BOOL DEFAULT 0 NOT NULL, + editable BOOL DEFAULT 1 NOT NULL, + display TEXT DEFAULT "{}" NOT NULL, + is_multiple BOOL DEFAULT 0 NOT NULL, + normalized BOOL NOT NULL, + UNIQUE(label) + ); + CREATE INDEX IF NOT EXISTS custom_columns_idx ON custom_columns (label); + CREATE INDEX IF NOT EXISTS formats_idx ON data (format); + ''') + + def upgrade_version_10(self): + 'Add restricted Tag Browser views' + def create_tag_browser_view(table_name, column_name, view_column_name): + script = (''' + DROP VIEW IF EXISTS tag_browser_{tn}; + CREATE VIEW tag_browser_{tn} AS SELECT + id, + {vcn}, + (SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count + FROM {tn}; + DROP VIEW IF EXISTS tag_browser_filtered_{tn}; + CREATE VIEW tag_browser_filtered_{tn} AS SELECT + id, + {vcn}, + (SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE + {cn}={tn}.id AND books_list_filter(book)) count + FROM {tn}; + '''.format(tn=table_name, cn=column_name, vcn=view_column_name)) + self.conn.execute(script) + + for field in self.field_metadata.itervalues(): + if field['is_category'] and not field['is_custom'] and 'link_column' in field: + table = self.conn.get( + 'SELECT name FROM sqlite_master WHERE type="table" AND name=?', + ('books_%s_link'%field['table'],), all=False) + if table is not None: + create_tag_browser_view(field['table'], field['link_column'], field['column']) + + def upgrade_version_11(self): + 'Add average rating to tag browser views' + def create_std_tag_browser_view(table_name, column_name, + view_column_name, sort_column_name): + script = (''' + DROP VIEW IF EXISTS tag_browser_{tn}; + CREATE VIEW tag_browser_{tn} AS SELECT + id, + {vcn}, + (SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count, + (SELECT AVG(ratings.rating) + FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + {scn} AS sort + FROM {tn}; + DROP VIEW IF EXISTS tag_browser_filtered_{tn}; + CREATE VIEW tag_browser_filtered_{tn} AS SELECT + id, + {vcn}, + (SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE + {cn}={tn}.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + {scn} AS sort + FROM {tn}; + + '''.format(tn=table_name, cn=column_name, + vcn=view_column_name, scn= sort_column_name)) + self.conn.execute(script) + + def create_cust_tag_browser_view(table_name, link_table_name): + script = ''' + DROP VIEW IF EXISTS tag_browser_{table}; + CREATE VIEW tag_browser_{table} AS SELECT + id, + value, + (SELECT COUNT(id) FROM {lt} WHERE value={table}.id) count, + (SELECT AVG(r.rating) + FROM {lt}, + books_ratings_link AS bl, + ratings AS r + WHERE {lt}.value={table}.id AND bl.book={lt}.book AND + r.id = bl.rating AND r.rating <> 0) avg_rating, + value AS sort + FROM {table}; + + DROP VIEW IF EXISTS tag_browser_filtered_{table}; + CREATE VIEW tag_browser_filtered_{table} AS SELECT + id, + value, + (SELECT COUNT({lt}.id) FROM {lt} WHERE value={table}.id AND + books_list_filter(book)) count, + (SELECT AVG(r.rating) + FROM {lt}, + books_ratings_link AS bl, + ratings AS r + WHERE {lt}.value={table}.id AND bl.book={lt}.book AND + r.id = bl.rating AND r.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + value AS sort + FROM {table}; + '''.format(lt=link_table_name, table=table_name) + self.conn.execute(script) + + for field in self.field_metadata.itervalues(): + if field['is_category'] and not field['is_custom'] and 'link_column' in field: + table = self.conn.get( + 'SELECT name FROM sqlite_master WHERE type="table" AND name=?', + ('books_%s_link'%field['table'],), all=False) + if table is not None: + create_std_tag_browser_view(field['table'], field['link_column'], + field['column'], field['category_sort']) + + db_tables = self.conn.get('''SELECT name FROM sqlite_master + WHERE type='table' + ORDER BY name''') + tables = [] + for (table,) in db_tables: + tables.append(table) + for table in tables: + link_table = 'books_%s_link'%table + if table.startswith('custom_column_') and link_table in tables: + create_cust_tag_browser_view(table, link_table) + + self.conn.execute('UPDATE authors SET sort=author_to_author_sort(name)') + + def upgrade_version_12(self): + 'DB based preference store' + script = ''' + DROP TABLE IF EXISTS preferences; + CREATE TABLE preferences(id INTEGER PRIMARY KEY, + key TEXT NON NULL, + val TEXT NON NULL, + UNIQUE(key)); + ''' + self.conn.execute(script) + + def upgrade_version_13(self): + 'Dirtied table for OPF metadata backups' + script = ''' + DROP TABLE IF EXISTS metadata_dirtied; + CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY, + book INTEGER NOT NULL, + UNIQUE(book)); + INSERT INTO metadata_dirtied (book) SELECT id FROM books; + ''' + self.conn.execute(script) + + def upgrade_version_14(self): + 'Cache has_cover' + self.conn.execute('ALTER TABLE books ADD COLUMN has_cover BOOL DEFAULT 0') + data = self.conn.get('SELECT id,path FROM books', all=True) + def has_cover(path): + if path: + path = os.path.join(self.library_path, path.replace('/', os.sep), + 'cover.jpg') + return os.path.exists(path) + return False + + ids = [(x[0],) for x in data if has_cover(x[1])] + self.conn.executemany('UPDATE books SET has_cover=1 WHERE id=?', ids) + + def upgrade_version_15(self): + 'Remove commas from tags' + self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';')") + self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';;')") + self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', '')") + + def upgrade_version_16(self): + self.conn.execute(''' + DROP TRIGGER IF EXISTS books_update_trg; + CREATE TRIGGER books_update_trg + AFTER UPDATE ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title) + WHERE id=NEW.id AND OLD.title <> NEW.title; + END; + ''') + + def upgrade_version_17(self): + 'custom book data table (for plugins)' + script = ''' + DROP TABLE IF EXISTS books_plugin_data; + CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY, + book INTEGER NON NULL, + name TEXT NON NULL, + val TEXT NON NULL, + UNIQUE(book,name)); + DROP TRIGGER IF EXISTS books_delete_trg; + CREATE TRIGGER books_delete_trg + AFTER DELETE ON books + BEGIN + DELETE FROM books_authors_link WHERE book=OLD.id; + DELETE FROM books_publishers_link WHERE book=OLD.id; + DELETE FROM books_ratings_link WHERE book=OLD.id; + DELETE FROM books_series_link WHERE book=OLD.id; + DELETE FROM books_tags_link WHERE book=OLD.id; + DELETE FROM data WHERE book=OLD.id; + DELETE FROM comments WHERE book=OLD.id; + DELETE FROM conversion_options WHERE book=OLD.id; + DELETE FROM books_plugin_data WHERE book=OLD.id; + END; + ''' + self.conn.execute(script) + + def upgrade_version_18(self): + ''' + Add a library UUID. + Add an identifiers table. + Add a languages table. + Add a last_modified column. + NOTE: You cannot downgrade after this update, if you do + any changes you make to book isbns will be lost. + ''' + script = ''' + DROP TABLE IF EXISTS library_id; + CREATE TABLE library_id ( id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + UNIQUE(uuid) + ); + + DROP TABLE IF EXISTS identifiers; + CREATE TABLE identifiers ( id INTEGER PRIMARY KEY, + book INTEGER NON NULL, + type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE, + val TEXT NON NULL COLLATE NOCASE, + UNIQUE(book, type) + ); + + DROP TABLE IF EXISTS languages; + CREATE TABLE languages ( id INTEGER PRIMARY KEY, + lang_code TEXT NON NULL COLLATE NOCASE, + UNIQUE(lang_code) + ); + + DROP TABLE IF EXISTS books_languages_link; + CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY, + book INTEGER NOT NULL, + lang_code INTEGER NOT NULL, + item_order INTEGER NOT NULL DEFAULT 0, + UNIQUE(book, lang_code) + ); + + DROP TRIGGER IF EXISTS fkc_delete_on_languages; + CREATE TRIGGER fkc_delete_on_languages + BEFORE DELETE ON languages + BEGIN + SELECT CASE + WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0 + THEN RAISE(ABORT, 'Foreign key violation: language is still referenced') + END; + END; + + DROP TRIGGER IF EXISTS fkc_delete_on_languages_link; + CREATE TRIGGER fkc_delete_on_languages_link + BEFORE INSERT ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from books WHERE id=NEW.book) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: book not in books') + WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages') + END; + END; + + DROP TRIGGER IF EXISTS fkc_update_books_languages_link_a; + CREATE TRIGGER fkc_update_books_languages_link_a + BEFORE UPDATE OF book ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from books WHERE id=NEW.book) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: book not in books') + END; + END; + DROP TRIGGER IF EXISTS fkc_update_books_languages_link_b; + CREATE TRIGGER fkc_update_books_languages_link_b + BEFORE UPDATE OF lang_code ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages') + END; + END; + + DROP INDEX IF EXISTS books_languages_link_aidx; + CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code); + DROP INDEX IF EXISTS books_languages_link_bidx; + CREATE INDEX books_languages_link_bidx ON books_languages_link (book); + DROP INDEX IF EXISTS languages_idx; + CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE); + + DROP TRIGGER IF EXISTS books_delete_trg; + CREATE TRIGGER books_delete_trg + AFTER DELETE ON books + BEGIN + DELETE FROM books_authors_link WHERE book=OLD.id; + DELETE FROM books_publishers_link WHERE book=OLD.id; + DELETE FROM books_ratings_link WHERE book=OLD.id; + DELETE FROM books_series_link WHERE book=OLD.id; + DELETE FROM books_tags_link WHERE book=OLD.id; + DELETE FROM books_languages_link WHERE book=OLD.id; + DELETE FROM data WHERE book=OLD.id; + DELETE FROM comments WHERE book=OLD.id; + DELETE FROM conversion_options WHERE book=OLD.id; + DELETE FROM books_plugin_data WHERE book=OLD.id; + DELETE FROM identifiers WHERE book=OLD.id; + END; + + INSERT INTO identifiers (book, val) SELECT id,isbn FROM books WHERE isbn; + + ALTER TABLE books ADD COLUMN last_modified TIMESTAMP NOT NULL DEFAULT "%s"; + + '''%isoformat(DEFAULT_DATE, sep=' ') + # Sqlite does not support non constant default values in alter + # statements + self.conn.execute(script) + + def upgrade_version_19(self): + recipes = self.conn.get('SELECT id,title,script FROM feeds') + if recipes: + from calibre.web.feeds.recipes import (custom_recipes, + custom_recipe_filename) + bdir = os.path.dirname(custom_recipes.file_path) + for id_, title, script in recipes: + existing = frozenset(map(int, custom_recipes.iterkeys())) + if id_ in existing: + id_ = max(existing) + 1000 + id_ = str(id_) + fname = custom_recipe_filename(id_, title) + custom_recipes[id_] = (title, fname) + if isinstance(script, unicode): + script = script.encode('utf-8') + with open(os.path.join(bdir, fname), 'wb') as f: + f.write(script) + + def upgrade_version_20(self): + ''' + Add a link column to the authors table. + ''' + + script = ''' + ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT ""; + ''' + self.conn.execute(script) + + diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index edca43528a..cbb3ce0006 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -32,11 +32,11 @@ def _c_convert_timestamp(val): class Table(object): - def __init__(self, name, metadata): + def __init__(self, name, metadata, link_table=None): self.name, self.metadata = name, metadata - # self.adapt() maps values from the db to python objects - self.adapt = \ + # self.unserialize() maps values from the db to python objects + self.unserialize = \ { 'datetime': _c_convert_timestamp, 'bool': bool @@ -44,7 +44,10 @@ class Table(object): metadata['datatype'], lambda x: x) if name == 'authors': # Legacy - self.adapt = lambda x: x.replace('|', ',') if x else None + self.unserialize = lambda x: x.replace('|', ',') if x else None + + self.link_table = (link_table if link_table else + 'books_%s_link'%self.metadata['table']) class OneToOneTable(Table): @@ -59,7 +62,7 @@ class OneToOneTable(Table): idcol = 'id' if self.metadata['table'] == 'books' else 'book' for row in db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol, self.metadata['column'], self.metadata['table'])): - self.book_col_map[row[0]] = self.adapt(row[1]) + self.book_col_map[row[0]] = self.unserialize(row[1]) class SizeTable(OneToOneTable): @@ -68,7 +71,7 @@ class SizeTable(OneToOneTable): for row in db.conn.execute( 'SELECT books.id, (SELECT MAX(uncompressed_size) FROM data ' 'WHERE data.book=books.id) FROM books'): - self.book_col_map[row[0]] = self.adapt(row[1]) + self.book_col_map[row[0]] = self.unserialize(row[1]) class ManyToOneTable(Table): @@ -89,17 +92,17 @@ class ManyToOneTable(Table): def read_id_maps(self, db): for row in db.conn.execute('SELECT id, {0} FROM {1}'.format( - self.metadata['name'], self.metadata['table'])): + self.metadata['column'], self.metadata['table'])): if row[1]: - self.id_map[row[0]] = self.adapt(row[1]) + self.id_map[row[0]] = self.unserialize(row[1]) def read_maps(self, db): for row in db.conn.execute( - 'SELECT book, {0} FROM books_{1}_link'.format( - self.metadata['link_column'], self.metadata['table'])): + 'SELECT book, {0} FROM {1}'.format( + self.metadata['link_column'], self.link_table)): if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] - self.col_book_map.append(row[0]) + self.col_book_map[row[1]].append(row[0]) self.book_col_map[row[0]] = row[1] class ManyToManyTable(ManyToOneTable): @@ -112,11 +115,11 @@ class ManyToManyTable(ManyToOneTable): def read_maps(self, db): for row in db.conn.execute( - 'SELECT book, {0} FROM books_{1}_link'.format( - self.metadata['link_column'], self.metadata['table'])): + 'SELECT book, {0} FROM {1}'.format( + self.metadata['link_column'], self.link_table)): if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] - self.col_book_map.append(row[0]) + self.col_book_map[row[1]].append(row[0]) if row[0] not in self.book_col_map: self.book_col_map[row[0]] = [] self.book_col_map[row[0]].append(row[1]) @@ -142,7 +145,7 @@ class FormatsTable(ManyToManyTable): if row[1] is not None: if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] - self.col_book_map.append(row[0]) + self.col_book_map[row[1]].append(row[0]) if row[0] not in self.book_col_map: self.book_col_map[row[0]] = [] self.book_col_map[row[0]].append((row[1], row[2])) @@ -157,7 +160,7 @@ class IdentifiersTable(ManyToManyTable): if row[1] is not None and row[2] is not None: if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] - self.col_book_map.append(row[0]) + self.col_book_map[row[1]].append(row[0]) if row[0] not in self.book_col_map: self.book_col_map[row[0]] = [] self.book_col_map[row[0]].append((row[1], row[2])) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index e3019b8ced..62e15452f1 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -137,7 +137,7 @@ class KOBO(USBMS): bl_cache[lpath] = None if ImageID is not None: imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed') - if not os.path.exists(imagename): + if not os.path.exists(imagename): # Try the Touch version if the image does not exist imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - N3_LIBRARY_FULL.parsed') @@ -206,11 +206,21 @@ class KOBO(USBMS): if self.dbversion >= 14: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null' - else: + elif self.dbversion < 14 and self.dbversion >= 8: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' + else: + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ + 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' - cursor.execute (query) + try: + cursor.execute (query) + except Exception as e: + if '___ExpirationStatus' not in str(e): + raise + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ + 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' + cursor.execute(query) changed = False for i, row in enumerate(cursor): @@ -577,7 +587,7 @@ class KOBO(USBMS): for book in books: # debug_print('Title:', book.title, 'lpath:', book.path) if 'Im_Reading' not in book.device_collections: - book.device_collections.append('Im_Reading') + book.device_collections.append('Im_Reading') extension = os.path.splitext(book.path)[1] ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) @@ -621,7 +631,7 @@ class KOBO(USBMS): for book in books: # debug_print('Title:', book.title, 'lpath:', book.path) if 'Read' not in book.device_collections: - book.device_collections.append('Read') + book.device_collections.append('Read') extension = os.path.splitext(book.path)[1] ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) @@ -658,7 +668,7 @@ class KOBO(USBMS): for book in books: # debug_print('Title:', book.title, 'lpath:', book.path) if 'Closed' not in book.device_collections: - book.device_collections.append('Closed') + book.device_collections.append('Closed') extension = os.path.splitext(book.path)[1] ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) @@ -695,8 +705,8 @@ class KOBO(USBMS): for book in books: # debug_print('Title:', book.title, 'lpath:', book.path) if 'Shortlist' not in book.device_collections: - book.device_collections.append('Shortlist') - # debug_print ("Shortlist found for: ", book.title) + book.device_collections.append('Shortlist') + # debug_print ("Shortlist found for: ", book.title) extension = os.path.splitext(book.path)[1] ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) diff --git a/src/calibre/gui2/store/opensearch_store.py b/src/calibre/gui2/store/opensearch_store.py index 54fedbd002..bcc92b25f1 100644 --- a/src/calibre/gui2/store/opensearch_store.py +++ b/src/calibre/gui2/store/opensearch_store.py @@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' import mimetypes -import urllib from contextlib import closing from lxml import etree @@ -22,7 +21,7 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.utils.opensearch.description import Description from calibre.utils.opensearch.query import Query -class OpenSearchStore(StorePlugin): +class OpenSearchOPDSStore(StorePlugin): open_search_url = '' web_url = '' @@ -50,7 +49,7 @@ class OpenSearchStore(StorePlugin): oquery = Query(url_template) # set up initial values - oquery.searchTerms = urllib.quote_plus(query) + oquery.searchTerms = query oquery.count = max_results url = oquery.url() diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py index fd20669f09..f6fa423e23 100644 --- a/src/calibre/gui2/store/search/search.py +++ b/src/calibre/gui2/store/search/search.py @@ -349,7 +349,8 @@ class SearchDialog(QDialog, Ui_Dialog): d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys()) if d.exec_() == d.Accepted: ext = d.format() - self.gui.download_ebook(result.downloads[ext]) + fname = result.title + '.' + ext.lower() + self.gui.download_ebook(result.downloads[ext], filename=fname) def open_store(self, result): self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked()) diff --git a/src/calibre/gui2/store/stores/archive_org_plugin.py b/src/calibre/gui2/store/stores/archive_org_plugin.py index 6972c604ce..7439056baa 100644 --- a/src/calibre/gui2/store/stores/archive_org_plugin.py +++ b/src/calibre/gui2/store/stores/archive_org_plugin.py @@ -6,12 +6,11 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' - from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): +class ArchiveOrgStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml' web_url = 'http://www.archive.org/details/texts' @@ -19,7 +18,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): # http://bookserver.archive.org/catalog/ def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1] s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED @@ -33,6 +32,7 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): from calibre import browser from contextlib import closing from lxml import html + br = browser() with closing(br.open(search_result.detail_item, timeout=timeout)) as nf: idata = html.fromstring(nf.read()) diff --git a/src/calibre/gui2/store/stores/epubbud_plugin.py b/src/calibre/gui2/store/stores/epubbud_plugin.py index b4d642f62b..029b2b3fc9 100644 --- a/src/calibre/gui2/store/stores/epubbud_plugin.py +++ b/src/calibre/gui2/store/stores/epubbud_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class EpubBudStore(BasicStoreConfig, OpenSearchStore): +class EpubBudStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml' web_url = 'http://www.epubbud.com/' @@ -18,7 +18,7 @@ class EpubBudStore(BasicStoreConfig, OpenSearchStore): # http://www.epubbud.com/feeds/catalog.atom def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED s.formats = 'EPUB' diff --git a/src/calibre/gui2/store/stores/feedbooks_plugin.py b/src/calibre/gui2/store/stores/feedbooks_plugin.py index 96d0a10dc7..cac44fd8df 100644 --- a/src/calibre/gui2/store/stores/feedbooks_plugin.py +++ b/src/calibre/gui2/store/stores/feedbooks_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class FeedbooksStore(BasicStoreConfig, OpenSearchStore): +class FeedbooksStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://assets0.feedbooks.net/opensearch.xml?t=1253087147' web_url = 'http://feedbooks.com/' @@ -18,7 +18,7 @@ class FeedbooksStore(BasicStoreConfig, OpenSearchStore): # http://www.feedbooks.com/catalog def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): if s.downloads: s.drm = SearchResult.DRM_UNLOCKED s.price = '$0.00' diff --git a/src/calibre/gui2/store/stores/gutenberg_plugin.py b/src/calibre/gui2/store/stores/gutenberg_plugin.py index 85d1f3966a..ad30f2067d 100644 --- a/src/calibre/gui2/store/stores/gutenberg_plugin.py +++ b/src/calibre/gui2/store/stores/gutenberg_plugin.py @@ -6,6 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import mimetypes import urllib from contextlib import closing @@ -23,70 +24,67 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog class GutenbergStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): - url = 'http://m.gutenberg.org/' - ext_url = 'http://gutenberg.org/' + url = 'http://gutenberg.org/' + + if detail_item: + detail_item = url_slash_cleaner(url + detail_item) if external or self.config.get('open_external', False): - if detail_item: - ext_url = ext_url + detail_item - open_url(QUrl(url_slash_cleaner(ext_url))) + open_url(QUrl(detail_item if detail_item else url)) else: - detail_url = None - if detail_item: - detail_url = url + detail_item - d = WebStoreDialog(self.gui, url, parent, detail_url) + d = WebStoreDialog(self.gui, url, parent, detail_item) d.setWindowTitle(self.name) d.set_tags(self.config.get('tags', '')) d.exec_() def search(self, query, max_results=10, timeout=60): - # Gutenberg's website does not allow searching both author and title. - # Using a google search so we can search on both fields at once. - url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query) + url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query) br = browser() counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'): + for data in doc.xpath('//ol[@class="results"]//li[contains(@class, "icon_title")]'): if counter <= 0: break + + id = ''.join(data.xpath('./a/@href')) + id = id.split('.mobile')[0] - url = '' - url_a = data.xpath('div[@class="jd"]/a') - if url_a: - url_a = url_a[0] - url = url_a.get('href', None) - if url: - url = url.split('u=')[-1].split('&')[0] - if '/ebooks/' not in url: - continue - id = url.split('/')[-1] - - url_a = html.fromstring(html.tostring(url_a)) - heading = ''.join(url_a.xpath('//text()')) - title, _, author = heading.rpartition('by ') - author = author.split('-')[0] - price = '$0.00' + title = ''.join(data.xpath('.//span[@class="title"]/text()')) + author = ''.join(data.xpath('.//span[@class="subtitle"]/text()')) counter -= 1 s = SearchResult() s.cover_url = '' + + s.detail_item = id.strip() s.title = title.strip() s.author = author.strip() - s.price = price.strip() - s.detail_item = '/ebooks/' + id.strip() + s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED yield s def get_details(self, search_result, timeout): - url = 'http://m.gutenberg.org/' + url = url_slash_cleaner('http://m.gutenberg.org/' + search_result.detail_item + '.mobile') br = browser() - with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf: - idata = html.fromstring(nf.read()) - search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()')) - return True \ No newline at end of file + with closing(br.open(url, timeout=timeout)) as nf: + doc = html.fromstring(nf.read()) + + for save_item in doc.xpath('//li[contains(@class, "icon_save")]/a'): + type = save_item.get('type') + href = save_item.get('href') + + if type: + ext = mimetypes.guess_extension(type) + if ext: + ext = ext[1:].upper().strip() + search_result.downloads[ext] = href + + search_result.formats = ', '.join(search_result.downloads.keys()) + + return True diff --git a/src/calibre/gui2/store/stores/manybooks_plugin.py b/src/calibre/gui2/store/stores/manybooks_plugin.py index 829a97012f..c7dbf0a608 100644 --- a/src/calibre/gui2/store/stores/manybooks_plugin.py +++ b/src/calibre/gui2/store/stores/manybooks_plugin.py @@ -6,89 +6,101 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -import re -import urllib +import mimetypes from contextlib import closing -from lxml import html +from lxml import etree -from PyQt4.Qt import QUrl - -from calibre import browser, url_slash_cleaner -from calibre.gui2 import open_url -from calibre.gui2.store import StorePlugin +from calibre import browser from calibre.gui2.store.basic_config import BasicStoreConfig +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -from calibre.gui2.store.web_store_dialog import WebStoreDialog +from calibre.utils.opensearch.description import Description +from calibre.utils.opensearch.query import Query -class ManyBooksStore(BasicStoreConfig, StorePlugin): +class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore): - def open(self, parent=None, detail_item=None, external=False): - url = 'http://manybooks.net/' - - detail_url = None - if detail_item: - detail_url = url + detail_item - - if external or self.config.get('open_external', False): - open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) - else: - d = WebStoreDialog(self.gui, url, parent, detail_url) - d.setWindowTitle(self.name) - d.set_tags(self.config.get('tags', '')) - d.exec_() + open_search_url = 'http://www.manybooks.net/opds/' + web_url = 'http://manybooks.net' def search(self, query, max_results=10, timeout=60): - # ManyBooks website separates results for title and author. - # It also doesn't do a clear job of references authors and - # secondary titles. Google is also faster. - # Using a google search so we can search on both fields at once. - url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query) + ''' + Manybooks uses a very strange opds feed. The opds + main feed is structured like a stanza feed. The + search result entries give very little information + and requires you to go to a detail link. The detail + link has the wrong type specified (text/html instead + of application/atom+xml). + ''' + if not hasattr(self, 'open_search_url'): + return - br = browser() + description = Description(self.open_search_url) + url_template = description.get_best_template() + if not url_template: + return + oquery = Query(url_template) + # set up initial values + oquery.searchTerms = query + oquery.count = max_results + url = oquery.url() + counter = max_results + br = browser() with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'): + doc = etree.fromstring(f.read()) + for data in doc.xpath('//*[local-name() = "entry"]'): if counter <= 0: break - - url = '' - url_a = data.xpath('div[@class="jd"]/a') - if url_a: - url_a = url_a[0] - url = url_a.get('href', None) - if url: - url = url.split('u=')[-1][:-2] - if '/titles/' not in url: - continue - id = url.split('/')[-1] - id = id.strip() - - url_a = html.fromstring(html.tostring(url_a)) - heading = ''.join(url_a.xpath('//text()')) - title, _, author = heading.rpartition('by ') - author = author.split('-')[0] - price = '$0.00' - - cover_url = '' - mo = re.match('^\D+', id) - if mo: - cover_name = mo.group() - cover_name = cover_name.replace('etext', '') - cover_id = id.split('.')[0] - cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg' - + counter -= 1 - + s = SearchResult() - s.cover_url = cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price.strip() - s.detail_item = '/titles/' + id + + detail_links = data.xpath('./*[local-name() = "link" and @type = "text/html"]') + if not detail_links: + continue + detail_link = detail_links[0] + detail_href = detail_link.get('href') + if not detail_href: + continue + + s.detail_item = 'http://manybooks.net/titles/' + detail_href.split('tid=')[-1] + '.html' + # These can have HTML inside of them. We are going to get them again later + # just in case. + s.title = ''.join(data.xpath('./*[local-name() = "title"]//text()')).strip() + s.author = ', '.join(data.xpath('./*[local-name() = "author"]//text()')).strip() + + # Follow the detail link to get the rest of the info. + with closing(br.open(detail_href, timeout=timeout/4)) as df: + ddoc = etree.fromstring(df.read()) + ddata = ddoc.xpath('//*[local-name() = "entry"][1]') + if ddata: + ddata = ddata[0] + + # This is the real title and author info we want. We got + # it previously just in case it's not specified here for some reason. + s.title = ''.join(ddata.xpath('./*[local-name() = "title"]//text()')).strip() + s.author = ', '.join(ddata.xpath('./*[local-name() = "author"]//text()')).strip() + if s.author.startswith(','): + s.author = s.author[1:] + if s.author.endswith(','): + s.author = s.author[:-1] + + s.cover_url = ''.join(ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/thumbnail"][1]/@href')).strip() + + for link in ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'): + type = link.get('type') + href = link.get('href') + if type: + ext = mimetypes.guess_extension(type) + if ext: + ext = ext[1:].upper().strip() + s.downloads[ext] = href + + s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED - s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR' + s.formats = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR' yield s diff --git a/src/calibre/gui2/store/stores/open_library_plugin.py b/src/calibre/gui2/store/stores/open_library_plugin.py deleted file mode 100644 index b95f1bf930..0000000000 --- a/src/calibre/gui2/store/stores/open_library_plugin.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import (unicode_literals, division, absolute_import, print_function) - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' - -import urllib2 -from contextlib import closing - -from lxml import html - -from PyQt4.Qt import QUrl - -from calibre import browser, url_slash_cleaner -from calibre.gui2 import open_url -from calibre.gui2.store import StorePlugin -from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.search_result import SearchResult -from calibre.gui2.store.web_store_dialog import WebStoreDialog - -class OpenLibraryStore(BasicStoreConfig, StorePlugin): - - def open(self, parent=None, detail_item=None, external=False): - url = 'http://openlibrary.org/' - - if external or self.config.get('open_external', False): - if detail_item: - url = url + detail_item - open_url(QUrl(url_slash_cleaner(url))) - else: - detail_url = None - if detail_item: - detail_url = url + detail_item - d = WebStoreDialog(self.gui, url, parent, detail_url) - d.setWindowTitle(self.name) - d.set_tags(self.config.get('tags', '')) - d.exec_() - - def search(self, query, max_results=10, timeout=60): - url = 'http://openlibrary.org/search?q=' + urllib2.quote(query) + '&has_fulltext=true' - - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//div[@id="searchResults"]/ul[@id="siteSearch"]/li'): - if counter <= 0: - break - - # Don't include books that don't have downloadable files. - if not data.xpath('boolean(./span[@class="actions"]//span[@class="label" and contains(text(), "Read")])'): - continue - id = ''.join(data.xpath('./span[@class="bookcover"]/a/@href')) - if not id: - continue - cover_url = ''.join(data.xpath('./span[@class="bookcover"]/a/img/@src')) - - title = ''.join(data.xpath('.//h3[@class="booktitle"]/a[@class="results"]/text()')) - author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')) - price = '$0.00' - - counter -= 1 - - s = SearchResult() - s.cover_url = cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price - s.detail_item = id.strip() - s.drm = SearchResult.DRM_UNLOCKED - - yield s - - def get_details(self, search_result, timeout): - url = 'http://openlibrary.org/' - - br = browser() - with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf: - idata = html.fromstring(nf.read()) - search_result.formats = ', '.join(list(set(idata.xpath('//a[contains(@title, "Download")]/text()')))) - return True diff --git a/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py b/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py index 671186ba87..99b94778bf 100644 --- a/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py +++ b/src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py @@ -7,10 +7,10 @@ __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchStore +from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore from calibre.gui2.store.search_result import SearchResult -class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore): +class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://pragprog.com/catalog/search-description' web_url = 'http://pragprog.com/' @@ -18,7 +18,7 @@ class PragmaticBookshelfStore(BasicStoreConfig, OpenSearchStore): # http://pragprog.com/catalog.opds def search(self, query, max_results=10, timeout=60): - for s in OpenSearchStore.search(self, query, max_results, timeout): + for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): s.drm = SearchResult.DRM_UNLOCKED s.formats = 'EPUB, PDF, MOBI' yield s diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index b9dd2f3ed7..2fa43dc94c 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -1024,7 +1024,15 @@ class SortKeyGenerator(object): dt = 'datetime' elif sb == 'number': try: - val = float(val) + val = val.replace(',', '').strip() + p = 1 + for i, candidate in enumerate( + (' B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')): + if val.endswith(candidate): + p = 1024**(i) + val = val[:-len(candidate)].strip() + break + val = float(val) * p except: val = 0.0 dt = 'float' diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f203f36bd9..9d8a27d1fb 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -8,6 +8,7 @@ The database used to store ebook metadata ''' import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \ json, uuid, tempfile, hashlib +from collections import defaultdict import threading, random from itertools import repeat from math import ceil @@ -487,6 +488,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self) self.refresh() self.last_update_check = self.last_modified() + self.format_metadata_cache = defaultdict(dict) def break_cycles(self): self.data.break_cycles() @@ -914,11 +916,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.book_size = row[fm['size']] mi.ondevice_col= row[fm['ondevice']] mi.last_modified = row[fm['last_modified']] + id = idx if index_is_id else self.id(idx) formats = row[fm['formats']] + mi.format_metadata = {} if not formats: formats = None else: formats = formats.split(',') + for f in formats: + mi.format_metadata[f] = self.format_metadata(id, f) mi.formats = formats tags = row[fm['tags']] if tags: @@ -927,7 +933,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if mi.series: mi.series_index = row[fm['series_index']] mi.rating = row[fm['rating']] - id = idx if index_is_id else self.id(idx) mi.set_identifiers(self.get_identifiers(id, index_is_id=True)) mi.application_id = id mi.id = id @@ -1127,13 +1132,21 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if m: return m['mtime'] - def format_metadata(self, id_, fmt): + def format_metadata(self, id_, fmt, allow_cache=True): + if not fmt: + return {} + fmt = fmt.upper() + if allow_cache: + x = self.format_metadata_cache[id_].get(fmt, None) + if x is not None: + return x path = self.format_abspath(id_, fmt, index_is_id=True) ans = {} if path is not None: stat = os.stat(path) ans['size'] = stat.st_size ans['mtime'] = utcfromtimestamp(stat.st_mtime) + self.format_metadata_cache[id_][fmt] = ans return ans def format_hash(self, id_, fmt): @@ -1269,6 +1282,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True, replace=True): id = index if index_is_id else self.id(index) + if format: + self.format_metadata_cache[id].pop(format.upper(), None) if path is None: path = os.path.join(self.library_path, self.path(id, index_is_id=True)) name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) @@ -1321,6 +1336,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def remove_format(self, index, format, index_is_id=False, notify=True, commit=True, db_only=False): id = index if index_is_id else self.id(index) + if format: + self.format_metadata_cache[id].pop(format.upper(), None) name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) if name: if not db_only: diff --git a/src/calibre/library/schema_upgrades.py b/src/calibre/library/schema_upgrades.py index 2907e43098..a9cd36a588 100644 --- a/src/calibre/library/schema_upgrades.py +++ b/src/calibre/library/schema_upgrades.py @@ -606,6 +606,7 @@ class SchemaUpgrade(object): ''' script = ''' + BEGIN TRANSACTION; ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT ""; ''' self.conn.executescript(script) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index c67d44b7d5..ee72d0d442 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -340,6 +340,10 @@ When you first run |app|, it will ask you for a folder in which to store your bo Metadata about the books is stored in the file ``metadata.db`` at the top level of the library folder This file is is a sqlite database. When backing up your library make sure you copy the entire folder and all its sub-folders. +The library folder and all it's contents make up what is called a *|app| library*. You can have multiple such libraries. To manage the libraries, click the |app| icon on the toolbar. You can create new libraries, remove/rename existing ones and switch between libraries easily. + +You can copy or move books between different libraries (once you have more than one library setup) by right clicking on a book and selecting the :guilabel:`Copy to library` action. + How does |app| manage author names and sorting? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index e5e789d9dd..1cd48a8dce 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -164,7 +164,7 @@ Library .. |lii| image:: images/library.png :class: float-right-img -|lii| The :guilabel:`Library` action allows you to create, switch between, rename or delete a Library. |app| allows you to create as many libraries as you wish. You could for instance create a fiction library, a non fiction library, a foreign language library, a project library, basically any structure that suits your needs. Libraries are the highest organizational structure within |app|, each library has its own set of books, tags, categories and base storage location. +|lii| The :guilabel:`Library` action allows you to create, switch between, rename or remove a Library. |app| allows you to create as many libraries as you wish. You could for instance create a fiction library, a non fiction library, a foreign language library, a project library, basically any structure that suits your needs. Libraries are the highest organizational structure within |app|, each library has its own set of books, tags, categories and base storage location. 1. **Switch/Create library**: This action allows you to; a) connect to a pre-existing |app| library at another location from your currently open library, b) Create and empty library at a new location or, c) Move the current Library to a newly specified location. 2. **Quick Switch**: This action allows you to switch between libraries that have been registered or created within |app|. @@ -175,6 +175,8 @@ Library .. note:: Metadata about your ebooks like title/author/tags/etc. is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an .opf file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual .opf files for you. +You can copy or move books between different libraries (once you have more than one library setup) by right clicking on the book and selecting the action :guilabel:`Copy to library`. + .. _device: Device @@ -268,6 +270,7 @@ Preferences .. |cbi| image:: images/preferences.png The Preferences Action allows you to change the way various aspects of |app| work. To access it, click the |cbi|. +You can also re-run the Welcome Wizard by clicking the arrow next to the preferences button. .. _catalogs: diff --git a/src/calibre/manual/template_lang.rst b/src/calibre/manual/template_lang.rst index 9f33bcbb9e..b8808a6abf 100644 --- a/src/calibre/manual/template_lang.rst +++ b/src/calibre/manual/template_lang.rst @@ -124,6 +124,8 @@ The functions available are listed below. Note that the definitive documentation * ``capitalize()`` -- return the value with the first letter upper case and the rest lower case. * ``contains(pattern, text if match, text if not match)`` -- checks if field contains matches for the regular expression `pattern`. Returns `text if match` if matches are found, otherwise it returns `text if no match`. * ``count(separator)`` -- interprets the value as a list of items separated by `separator`, returning the number of items in the list. Most lists use a comma as the separator, but authors uses an ampersand. Examples: `{tags:count(,)}`, `{authors:count(&)}` + * ``format_number(template)`` -- interprets the value as a number and format that number using a python formatting template such as "{0:5.2f}" or "{0:,d}" or "${0:5,.2f}". The field_name part of the template must be a 0 (zero) (the "{0:" in the above examples). See the template language and python documentation for more examples. Returns the empty string if formatting fails. + * ``human_readable()`` -- expects the value to be a number and returns a string representing that number in KB, MB, GB, etc. * ``ifempty(text)`` -- if the field is not empty, return the value of the field. Otherwise return `text`. * ``in_list(separator, pattern, found_val, not_found_val)`` -- interpret the field as a list of items separated by `separator`, comparing the `pattern` against each value in the list. If the pattern matches a value, return `found_val`, otherwise return `not_found_val`. * ``list_item(index, separator)`` -- interpret the field as a list of items separated by `separator`, returning the `index`th item. The first item is number zero. The last item can be returned using `list_item(-1,separator)`. If the item is not in the list, then the empty value is returned. The separator has the same meaning as in the `count` function. @@ -257,6 +259,8 @@ The following functions are available in addition to those described in single-f iso : the date with time and timezone. Must be the only format present. * ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. + * ``formats_modtimes(date_format)`` -- return a comma-separated list of colon_separated items representing modification times for the formats of a book. The date_format parameter specifies how the date is to be formatted. See the date_format function for details. You can use the select function to get the mod time for a specific format. Note that format names are always uppercase, as in EPUB. + * ``formats_sizes()`` -- return a comma-separated list of colon_separated items representing sizes in bytes of the formats of a book. You can use the select function to get the size for a specific format. Note that format names are always uppercase, as in EPUB. * ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string * ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want. * ``merge_lists(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list. diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index c93e69874c..2c973da224 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -121,7 +121,8 @@ def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'): date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else _local_tz) date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz) - return unicode(date_time.isoformat(sep)) + # str(sep) because isoformat barfs with unicode sep on python 2.x + return unicode(date_time.isoformat(str(sep))) def as_local_time(date_time, assume_utc=True): if not hasattr(date_time, 'tzinfo'): diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index cca75915b5..8c6daa5adf 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -93,3 +93,24 @@ def find_executable_in_path(name, path=None): q = os.path.abspath(os.path.join(x, name)) if os.access(q, os.X_OK): return q + +def is_case_sensitive(path): + ''' + Return True if the filesystem is case sensitive. + + path must be the path to an existing directory. You must have permission + to create and delete files in this directory. The results of this test + apply to the filesystem containing the directory in path. + ''' + is_case_sensitive = False + if not iswindows: + name1, name2 = ('calibre_test_case_sensitivity.txt', + 'calibre_TesT_CaSe_sensitiVitY.Txt') + f1, f2 = os.path.join(path, name1), os.path.join(path, name2) + if os.path.exists(f1): + os.remove(f1) + open(f1, 'w').close() + is_case_sensitive = not os.path.exists(f2) + os.remove(f1) + return is_case_sensitive + diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index fcfd69d16d..6916b0903a 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import inspect, re, traceback +from calibre import human_readable from calibre.utils.titlecase import titlecase from calibre.utils.icu import capitalize, strcmp, sort_key from calibre.utils.date import parse_date, format_date, now, UNDEFINED_DATE @@ -519,6 +520,80 @@ class BuiltinSelect(BuiltinFormatterFunction): return v[len(key)+1:] return '' +class BuiltinFormatsModtimes(BuiltinFormatterFunction): + name = 'formats_modtimes' + arg_count = 1 + category = 'Get values from metadata' + __doc__ = doc = _('formats_modtimes(date_format) -- return a comma-separated ' + 'list of colon_separated items representing modification times ' + 'for the formats of a book. The date_format parameter ' + 'specifies how the date is to be formatted. See the ' + 'date_format function for details. You can use the select ' + 'function to get the mod time for a specific ' + 'format. Note that format names are always uppercase, ' + 'as in EPUB.' + ) + + def evaluate(self, formatter, kwargs, mi, locals, fmt): + fmt_data = mi.get('format_metadata', {}) + return ','.join(k.upper()+':'+format_date(v['mtime'], fmt) + for k,v in fmt_data.iteritems()) + +class BuiltinFormatsSizes(BuiltinFormatterFunction): + name = 'formats_sizes' + arg_count = 0 + category = 'Get values from metadata' + __doc__ = doc = _('formats_sizes() -- return a comma-separated list of ' + 'colon_separated items representing sizes in bytes' + 'of the formats of a book. You can use the select ' + 'function to get the size for a specific ' + 'format. Note that format names are always uppercase, ' + 'as in EPUB.' + ) + + def evaluate(self, formatter, kwargs, mi, locals): + fmt_data = mi.get('format_metadata', {}) + return ','.join(k.upper()+':'+str(v['size']) for k,v in fmt_data.iteritems()) + +class BuiltinHumanReadable(BuiltinFormatterFunction): + name = 'human_readable' + arg_count = 1 + category = 'Formatting values' + __doc__ = doc = _('human_readable(v) -- return a string ' + 'representing the number v in KB, MB, GB, etc.' + ) + + def evaluate(self, formatter, kwargs, mi, locals, val): + try: + return human_readable(long(val)) + except: + return '' + +class BuiltinFormatNumber(BuiltinFormatterFunction): + name = 'format_number' + arg_count = 2 + category = 'Formatting values' + __doc__ = doc = _('format_number(v, template) -- format the number v using ' + 'a python formatting template such as "{0:5.2f}" or ' + '"{0:,d}" or "${0:5,.2f}". The field_name part of the ' + 'template must be a 0 (zero) (the "{0:" in the above examples). ' + 'See the template language and python documentation for more ' + 'examples. Returns the empty string if formatting fails.' + ) + + def evaluate(self, formatter, kwargs, mi, locals, val, template): + if val == '' or val == 'None': + return '' + try: + return template.format(float(val)) + except: + pass + try: + return template.format(int(val)) + except: + pass + return '' + class BuiltinSublist(BuiltinFormatterFunction): name = 'sublist' arg_count = 4 @@ -591,7 +666,7 @@ class BuiltinSubitems(BuiltinFormatterFunction): class BuiltinFormatDate(BuiltinFormatterFunction): name = 'format_date' arg_count = 2 - category = 'Date functions' + category = 'Formatting values' __doc__ = doc = _('format_date(val, format_string) -- format the value, ' 'which must be a date, using the format_string, returning a string. ' 'The formatting codes are: ' @@ -811,52 +886,22 @@ class BuiltinDaysBetween(BuiltinFormatterFunction): i = d1 - d2 return str('%d.%d'%(i.days, i.seconds/8640)) - -builtin_add = BuiltinAdd() -builtin_and = BuiltinAnd() -builtin_assign = BuiltinAssign() -builtin_booksize = BuiltinBooksize() -builtin_capitalize = BuiltinCapitalize() -builtin_cmp = BuiltinCmp() -builtin_contains = BuiltinContains() -builtin_count = BuiltinCount() -builtin_days_between= BuiltinDaysBetween() -builtin_divide = BuiltinDivide() -builtin_eval = BuiltinEval() -builtin_first_non_empty = BuiltinFirstNonEmpty() -builtin_field = BuiltinField() -builtin_format_date = BuiltinFormatDate() -builtin_has_cover = BuiltinHasCover() -builtin_identifier_in_list = BuiltinIdentifierInList() -builtin_ifempty = BuiltinIfempty() -builtin_in_list = BuiltinInList() -builtin_list_item = BuiltinListitem() -builtin_lookup = BuiltinLookup() -builtin_lowercase = BuiltinLowercase() -builtin_merge_lists = BuiltinMergeLists() -builtin_multiply = BuiltinMultiply() -builtin_not = BuiltinNot() -builtin_ondevice = BuiltinOndevice() -builtin_or = BuiltinOr() -builtin_print = BuiltinPrint() -builtin_raw_field = BuiltinRawField() -builtin_re = BuiltinRe() -builtin_select = BuiltinSelect() -builtin_shorten = BuiltinShorten() -builtin_strcat = BuiltinStrcat() -builtin_strcmp = BuiltinStrcmp() -builtin_str_in_list = BuiltinStrInList() -builtin_subitems = BuiltinSubitems() -builtin_sublist = BuiltinSublist() -builtin_substr = BuiltinSubstr() -builtin_subtract = BuiltinSubtract() -builtin_swaparound = BuiltinSwapAroundComma() -builtin_switch = BuiltinSwitch() -builtin_template = BuiltinTemplate() -builtin_test = BuiltinTest() -builtin_titlecase = BuiltinTitlecase() -builtin_today = BuiltinToday() -builtin_uppercase = BuiltinUppercase() +formatter_builtins = [ + BuiltinAdd(), BuiltinAnd(), BuiltinAssign(), BuiltinBooksize(), + BuiltinCapitalize(), BuiltinCmp(), BuiltinContains(), BuiltinCount(), + BuiltinDaysBetween(), BuiltinDivide(), BuiltinEval(), + BuiltinFirstNonEmpty(), BuiltinField(), BuiltinFormatDate(), + BuiltinFormatNumber(), BuiltinFormatsModtimes(), BuiltinFormatsSizes(), + BuiltinHasCover(), BuiltinHumanReadable(), BuiltinIdentifierInList(), + BuiltinIfempty(), BuiltinInList(), BuiltinListitem(), BuiltinLookup(), + BuiltinLowercase(), BuiltinMergeLists(), BuiltinMultiply(), BuiltinNot(), + BuiltinOndevice(), BuiltinOr(), BuiltinPrint(), BuiltinRawField(), + BuiltinRe(), BuiltinSelect(), BuiltinShorten(), BuiltinStrcat(), + BuiltinStrcmp(), BuiltinStrInList(), BuiltinSubitems(), BuiltinSublist(), + BuiltinSubstr(), BuiltinSubtract(), BuiltinSwapAroundComma(), + BuiltinSwitch(), BuiltinTemplate(), BuiltinTest(), BuiltinTitlecase(), + BuiltinToday(), BuiltinUppercase(), +] class FormatterUserFunction(FormatterFunction): def __init__(self, name, doc, arg_count, program_text): diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index aab0f29995..eaf8ac675a 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -150,7 +150,9 @@ def get_language(lang): global _iso639 lang = _lcase_map.get(lang, lang) if lang in _extra_lang_codes: - return _extra_lang_codes[lang] + # The translator was not active when _extra_lang_codes was defined, so + # re-translate + return _(_extra_lang_codes[lang]) ip = P('localization/iso639.pickle') if not os.path.exists(ip): return lang diff --git a/src/calibre/utils/opensearch/__init__.py b/src/calibre/utils/opensearch/__init__.py index e69de29bb2..62bd0e0236 100644 --- a/src/calibre/utils/opensearch/__init__.py +++ b/src/calibre/utils/opensearch/__init__.py @@ -0,0 +1,37 @@ +''' +Based on the OpenSearch Python module by Ed Summers from +https://github.com/edsu/opensearch . + +This module is heavily modified and does not implement all the features from +the original. The ability for the the module to perform a search and retrieve +search results has been removed. The original module used a modified version +of the Universal feed parser from http://feedparser.org/ . The use of +FeedPaser made getting search results very slow. There is also a bug in the +modified FeedParser that causes the system to run out of file descriptors. + +Instead of fixing the modified feed parser it was decided to remove it and +manually parse the feeds in a set of type specific classes. This is much +faster and as we know in advance the feed format is simpler than using +FeedParser. Also, replacing the modified FeedParser with the newest version +of FeedParser caused some feeds to be parsed incorrectly and result in a loss +of data. + +The module was also rewritten to use lxml instead of MiniDom. + + +Usage: + +description = Description(open_search_url) +url_template = description.get_best_template() +if not url_template: + return +query = Query(url_template) + +# set up initial values. +query.searchTerms = search_terms +# Note the count is ignored by some feeds. +query.count = max_results + +search_url = oquery.url() + +''' diff --git a/src/calibre/utils/opensearch/description.py b/src/calibre/utils/opensearch/description.py index 0b5afd8a7e..d5922d0c2b 100644 --- a/src/calibre/utils/opensearch/description.py +++ b/src/calibre/utils/opensearch/description.py @@ -40,7 +40,7 @@ class Description(object): with closing(br.open(url, timeout=15)) as f: doc = etree.fromstring(f.read()) - # version 1.1 has repeating Url elements + # version 1.1 has repeating Url elements. self.urls = [] for element in doc.xpath('//*[local-name() = "Url"]'): template = element.get('template') @@ -50,9 +50,22 @@ class Description(object): url.template = template url.type = type self.urls.append(url) + # Stanza catalogs. + for element in doc.xpath('//*[local-name() = "link"]'): + if element.get('rel') != 'search': + continue + href = element.get('href') + type = element.get('type') + if href and type: + url = URL() + url.template = href + url.type = type + self.urls.append(url) - # this is version 1.0 specific - self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()')) + # this is version 1.0 specific. + self.url = '' + if not self.urls: + self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()')) self.format = ''.join(doc.xpath('//*[local-name() = "Format"][1]//text()')) self.shortname = ''.join(doc.xpath('//*[local-name() = "ShortName"][1]//text()'))