diff --git a/src/calibre/db/__init__.py b/src/calibre/db/__init__.py index 7a5c8d51d4..bb0f679fd8 100644 --- a/src/calibre/db/__init__.py +++ b/src/calibre/db/__init__.py @@ -54,7 +54,7 @@ def _get_series_values(val): pass return (val, None) -def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None): +def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None, convert_to_local_tz=True): ''' Return all metadata stored in the database as a dict. Includes paths to the cover and each format. @@ -66,6 +66,7 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None): ''' import os from calibre.ebooks.metadata import authors_to_string + from calibre.utils.date import as_local_time backend = getattr(self, 'backend', self) # Works with both old and legacy interfaces if prefix is None: prefix = backend.library_path @@ -88,6 +89,10 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None): x = {} for field in FIELDS: x[field] = record[self.FIELD_MAP[field]] + if convert_to_local_tz and hasattr(self, 'new_api'): + for tf in ('timestamp', 'pubdate', 'last_modified'): + x[tf] = as_local_time(x[tf]) + data.append(x) x['id'] = db_id x['formats'] = [] diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index f00b7102e7..98886b8fc9 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -13,7 +13,7 @@ from datetime import timedelta from calibre.constants import preferred_encoding from calibre.utils.config_base import prefs -from calibre.utils.date import parse_date, UNDEFINED_DATE, now +from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local from calibre.utils.icu import primary_find, sort_key from calibre.utils.localization import lang_map, canonicalize_lang from calibre.utils.search_query_parser import SearchQueryParser, ParseException @@ -211,7 +211,7 @@ class DateSearch(object): # {{{ for v, book_ids in field_iter(): if isinstance(v, (str, unicode)): v = parse_date(v) - if v is not None and relop(v, qd, field_count): + if v is not None and relop(dt_as_local(v), qd, field_count): matches |= book_ids return matches diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 412c937544..c8d974fcf7 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -7,16 +7,37 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from datetime import datetime +from datetime import datetime, timedelta from collections import defaultdict -from dateutil.tz import tzoffset - from calibre.constants import plugins -from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE +from calibre.utils.date import parse_date, UNDEFINED_DATE, utc_tz from calibre.ebooks.metadata import author_to_author_sort -_c_speedup = plugins['speedup'][0] +_c_speedup = plugins['speedup'][0].parse_date + +def c_parse(val): + try: + year, month, day, hour, minutes, seconds, tzsecs = _c_speedup(val) + except (AttributeError, TypeError): + # If a value like 2001 is stored in the column, apsw will return it as + # an int + if isinstance(val, (int, float)): + return datetime(int(val), 1, 3, tzinfo=utc_tz) + except: + pass + else: + try: + ans = datetime(year, month, day, hour, minutes, seconds, tzinfo=utc_tz) + if tzsecs is not 0: + ans -= timedelta(seconds=tzsecs) + except OverflowError: + ans = UNDEFINED_DATE + return ans + try: + return parse_date(val, as_utc=True, assume_utc=True) + except ValueError: + return UNDEFINED_DATE ONE_ONE, MANY_ONE, MANY_MANY = xrange(3) @@ -24,28 +45,6 @@ class Null: pass null = Null() -def _c_convert_timestamp(val): - if not val: - return None - try: - ret = _c_speedup.parse_date(val.strip()) - except AttributeError: - # If a value like 2001 is stored in the column, apsw will return it as - # an int - if isinstance(val, (int, float)): - return datetime(int(val), 1, 1, tzinfo=tzoffset(None, 0)).astimezone(local_tz) - ret = None - except: - ret = None - if ret is None: - return parse_date(val, as_utc=False) - year, month, day, hour, minutes, seconds, tzsecs = ret - try: - return datetime(year, month, day, hour, minutes, seconds, - tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) - except OverflowError: - return UNDEFINED_DATE.astimezone(local_tz) - class Table(object): def __init__(self, name, metadata, link_table=None): @@ -54,7 +53,7 @@ class Table(object): # self.unserialize() maps values from the db to python objects self.unserialize = { - 'datetime': _c_convert_timestamp, + 'datetime': c_parse, 'bool': bool }.get(metadata['datatype'], None) if name == 'authors': @@ -89,7 +88,6 @@ class OneToOneTable(Table): table_type = ONE_ONE def read(self, db): - self.book_col_map = {} idcol = 'id' if self.metadata['table'] == 'books' else 'book' query = db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol, self.metadata['column'], self.metadata['table'])) @@ -175,7 +173,7 @@ class ManyToOneTable(Table): def read(self, db): self.id_map = {} - self.col_book_map = {} + self.col_book_map = defaultdict(set) self.book_col_map = {} self.read_id_maps(db) self.read_maps(db) @@ -190,13 +188,13 @@ class ManyToOneTable(Table): self.id_map = {book_id:us(val) for book_id, val in query} def read_maps(self, db): - for row in db.conn.execute( + cbm = self.col_book_map + bcm = self.book_col_map + for book, item_id in db.conn.execute( 'SELECT book, {0} FROM {1}'.format( self.metadata['link_column'], self.link_table)): - if row[1] not in self.col_book_map: - self.col_book_map[row[1]] = set() - self.col_book_map[row[1]].add(row[0]) - self.book_col_map[row[0]] = row[1] + cbm[item_id].add(book) + bcm[book] = item_id def remove_books(self, book_ids, db): clean = set() @@ -272,17 +270,14 @@ class ManyToManyTable(ManyToOneTable): do_clean_on_remove = True def read_maps(self, db): - for row in db.conn.execute( - self.selectq.format(self.metadata['link_column'], self.link_table)): - if row[1] not in self.col_book_map: - self.col_book_map[row[1]] = set() - self.col_book_map[row[1]].add(row[0]) - if row[0] not in self.book_col_map: - self.book_col_map[row[0]] = [] - self.book_col_map[row[0]].append(row[1]) + bcm = defaultdict(list) + cbm = self.col_book_map + for book, item_id in db.conn.execute( + self.selectq.format(self.metadata['link_column'], self.link_table)): + cbm[item_id].add(book) + bcm[book].append(item_id) - for key in tuple(self.book_col_map.iterkeys()): - self.book_col_map[key] = tuple(self.book_col_map[key]) + self.book_col_map = {k:tuple(v) for k, v in bcm.iteritems()} def remove_books(self, book_ids, db): clean = set() @@ -351,17 +346,16 @@ class ManyToManyTable(ManyToOneTable): class AuthorsTable(ManyToManyTable): def read_id_maps(self, db): - self.alink_map = {} - self.asort_map = {} - self.id_map = {} + self.alink_map = lm = {} + self.asort_map = sm = {} + self.id_map = im = {} us = self.unserialize - for row in db.conn.execute( + for aid, name, sort, link in db.conn.execute( 'SELECT id, name, sort, link FROM authors'): - val = us(row[1]) - self.id_map[row[0]] = self.unserialize(val) - self.asort_map[row[0]] = (row[2] if row[2] else - author_to_author_sort(val)) - self.alink_map[row[0]] = row[3] + name = us(name) + im[aid] = name + sm[aid] = (sort or author_to_author_sort(name)) + lm[aid] = link def set_sort_names(self, aus_map, db): aus_map = {aid:(a or '').strip() for aid, a in aus_map.iteritems()} @@ -404,22 +398,20 @@ class FormatsTable(ManyToManyTable): pass def read_maps(self, db): - self.fname_map = defaultdict(dict) - self.size_map = defaultdict(dict) - for row in db.conn.execute('SELECT book, format, name, uncompressed_size FROM data'): - if row[1] is not None: - fmt = row[1].upper() - if fmt not in self.col_book_map: - self.col_book_map[fmt] = set() - self.col_book_map[fmt].add(row[0]) - if row[0] not in self.book_col_map: - self.book_col_map[row[0]] = [] - self.book_col_map[row[0]].append(fmt) - self.fname_map[row[0]][fmt] = row[2] - self.size_map[row[0]][fmt] = row[3] + self.fname_map = fnm = defaultdict(dict) + self.size_map = sm = defaultdict(dict) + self.col_book_map = cbm = defaultdict(set) + bcm = defaultdict(list) - for key in tuple(self.book_col_map.iterkeys()): - self.book_col_map[key] = tuple(sorted(self.book_col_map[key])) + for book, fmt, name, sz in db.conn.execute('SELECT book, format, name, uncompressed_size FROM data'): + if fmt is not None: + fmt = fmt.upper() + cbm[fmt].add(book) + bcm[book].append(fmt) + fnm[book][fmt] = name + sm[book][fmt] = sz + + self.book_col_map = {k:tuple(sorted(v)) for k, v in bcm.iteritems()} def remove_books(self, book_ids, db): clean = ManyToManyTable.remove_books(self, book_ids, db) @@ -485,14 +477,12 @@ class IdentifiersTable(ManyToManyTable): pass def read_maps(self, db): - for row in db.conn.execute('SELECT book, type, val FROM identifiers'): - if row[1] is not None and row[2] is not None: - if row[1] not in self.col_book_map: - self.col_book_map[row[1]] = set() - self.col_book_map[row[1]].add(row[0]) - if row[0] not in self.book_col_map: - self.book_col_map[row[0]] = {} - self.book_col_map[row[0]][row[1]] = row[2] + self.book_col_map = defaultdict(dict) + self.col_book_map = defaultdict(set) + for book, typ, val in db.conn.execute('SELECT book, type, val FROM identifiers'): + if typ is not None and val is not None: + self.col_book_map[typ].add(book) + self.book_col_map[book][typ] = val def remove_books(self, book_ids, db): clean = set() diff --git a/src/calibre/db/tests/profiling.py b/src/calibre/db/tests/profiling.py new file mode 100644 index 0000000000..6cff57f990 --- /dev/null +++ b/src/calibre/db/tests/profiling.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import os, cProfile +from tempfile import gettempdir + +from calibre.db.legacy import LibraryDatabase + +db = None +def initdb(path): + global db + db = LibraryDatabase(os.path.expanduser(path)) + +def show_stats(path): + from pstats import Stats + s = Stats(path) + s.sort_stats('cumulative') + s.print_stats(30) + +def main(): + stats = os.path.join(gettempdir(), 'read_db.stats') + pr = cProfile.Profile() + pr.enable() + initdb('~/documents/largelib') + pr.disable() + pr.dump_stats(stats) + show_stats(stats) + print ('Stats saved to', stats) + +if __name__ == '__main__': + main() diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index fcf309ea66..eea8cd2eaa 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -385,3 +385,21 @@ class ReadingTest(BaseTest): self.assertFalse(x.has_book(Metadata(title[:1]))) db.close() # }}} + + def test_datetime(self): + ' Test the reading of datetimes stored in the db ' + from calibre.utils.date import parse_date + from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup + + # First test parsing of string to UTC time + for raw in ('2013-07-22 15:18:29+05:30', ' 2013-07-22 15:18:29+00:00', '2013-07-22 15:18:29', '2003-09-21 23:30:00-06:00'): + self.assertTrue(_c_speedup(raw)) + ctime = c_parse(raw) + pytime = parse_date(raw, assume_utc=True) + self.assertEqual(ctime, pytime) + + self.assertEqual(c_parse(2003).year, 2003) + for x in (None, '', 'abc'): + self.assertEqual(UNDEFINED_DATE, c_parse(x)) + + diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index e31b9808b0..50c29cdbab 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -18,7 +18,7 @@ from calibre.ebooks.metadata import author_to_author_sort from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException, \ InvalidGenresSourceFieldException from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.utils.date import format_date, is_date_undefined, now as nowf +from calibre.utils.date import format_date, is_date_undefined, now as nowf, as_local_time from calibre.utils.filenames import ascii_text, shorten_components_to from calibre.utils.icu import capitalize, collation_order, sort_key from calibre.utils.magick.draw import thumbnail @@ -940,7 +940,7 @@ class CatalogBuilder(object): if is_date_undefined(record['pubdate']): this_title['date'] = None else: - this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) + this_title['date'] = strftime(u'%B %Y', as_local_time(record['pubdate']).timetuple()) this_title['timestamp'] = record['timestamp'] diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 5a888f672e..012d88dace 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -74,7 +74,7 @@ def do_list(db, fields, afields, sort_by, ascending, search_text, line_width, se db.sort(sort_by, ascending) if search_text: db.search(search_text) - data = db.get_data_as_dict(prefix, authors_as_string=True) + data = db.get_data_as_dict(prefix, authors_as_string=True, convert_to_local_tz=False) if limit > -1: data = data[:limit] fields = ['id'] + fields diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 45a46d6c56..cdc569f8b7 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -10,7 +10,7 @@ import re, os, posixpath import cherrypy from calibre import fit_image, guess_type -from calibre.utils.date import fromtimestamp +from calibre.utils.date import fromtimestamp, as_utc from calibre.library.caches import SortKeyGenerator from calibre.library.save_to_disk import find_plugboard from calibre.ebooks.metadata import authors_to_string @@ -54,6 +54,7 @@ class ContentServer(object): Generates a locale independent, english timestamp from a datetime object ''' + updated = as_utc(updated) lm = updated.strftime('day, %d month %Y %H:%M:%S GMT') day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'} lm = lm.replace('day', day[int(updated.strftime('%w'))]) diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index 51bfca204a..767a48a9d9 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -19,7 +19,7 @@ from calibre.library.server.utils import strftime, format_tag_string from calibre.ebooks.metadata import fmt_sidx from calibre.constants import __appname__ from calibre import human_readable, isbytestring -from calibre.utils.date import utcfromtimestamp +from calibre.utils.date import utcfromtimestamp, as_local_time from calibre.utils.filenames import ascii_filename from calibre.utils.icu import sort_key @@ -254,7 +254,7 @@ class MobileServer(object): no_tag_count=True) book['title'] = record[FM['title']] for x in ('timestamp', 'pubdate'): - book[x] = strftime('%d %b, %Y', record[FM[x]]) + book[x] = strftime('%d %b, %Y', as_local_time(record[FM[x]])) book['id'] = record[FM['id']] books.append(book) for key in CKEYS: diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index 36a65661d1..b4e5c7b265 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -22,6 +22,7 @@ from calibre.library.server import custom_fields_to_display from calibre.library.server.utils import format_tag_string, Offsets from calibre import guess_type, prepare_string_for_xml as xml from calibre.utils.icu import sort_key +from calibre.utils.date import as_utc BASE_HREFS = { 0 : '/stanza', @@ -58,7 +59,7 @@ ID = E.id ICON = E.icon def UPDATED(dt, *args, **kwargs): - return E.updated(dt.strftime('%Y-%m-%dT%H:%M:%S+00:00'), *args, **kwargs) + return E.updated(as_utc(dt).strftime('%Y-%m-%dT%H:%M:%S+00:00'), *args, **kwargs) LINK = partial(E.link, type='application/atom+xml') NAVLINK = partial(E.link, diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index 1a2289681c..0a8d779af8 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -6,23 +6,47 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re -from datetime import datetime, time, timedelta +import re, time +from datetime import datetime, time as dtime, timedelta from functools import partial -from dateutil.tz import tzlocal, tzutc +from dateutil.tz import tzlocal, tzutc, EPOCHORDINAL from calibre import strftime class SafeLocalTimeZone(tzlocal): - ''' - Assume DST was not in effect for historical dates, if DST - data for the local timezone is not present in the operating system. - ''' def _isdst(self, dt): + # We can't use mktime here. It is unstable when deciding if + # the hour near to a change is DST or not. + # + # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour, + # dt.minute, dt.second, dt.weekday(), 0, -1)) + # return time.localtime(timestamp).tm_isdst + # + # The code above yields the following result: + # + #>>> import tz, datetime + #>>> t = tz.tzlocal() + #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + #'BRDT' + #>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname() + #'BRST' + #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + #'BRST' + #>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname() + #'BRDT' + #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + #'BRDT' + # + # Here is a more stable implementation: + # try: - return tzlocal._isdst(self, dt) + timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400 + + dt.hour * 3600 + + dt.minute * 60 + + dt.second) + return time.localtime(timestamp+time.timezone).tm_isdst except ValueError: pass return False @@ -150,6 +174,11 @@ def as_local_time(date_time, assume_utc=True): _local_tz) return date_time.astimezone(_local_tz) +def dt_as_local(dt): + if dt.tzinfo is local_tz: + return dt + return dt.astimezone(local_tz) + def as_utc(date_time, assume_utc=True): if not hasattr(date_time, 'tzinfo'): return date_time @@ -174,24 +203,27 @@ def utcfromtimestamp(stamp): traceback.print_exc() return utcnow() -#### Format date functions +# Format date functions def fd_format_hour(dt, strf, ampm, hr): l = len(hr) h = dt.hour if ampm: h = h%12 - if l == 1: return '%d'%h + if l == 1: + return '%d'%h return '%02d'%h def fd_format_minute(dt, strf, ampm, min): l = len(min) - if l == 1: return '%d'%dt.minute + if l == 1: + return '%d'%dt.minute return '%02d'%dt.minute def fd_format_second(dt, strf, ampm, sec): l = len(sec) - if l == 1: return '%d'%dt.second + if l == 1: + return '%d'%dt.second return '%02d'%dt.second def fd_format_ampm(dt, strf, ampm, ap): @@ -202,20 +234,27 @@ def fd_format_ampm(dt, strf, ampm, ap): def fd_format_day(dt, strf, ampm, dy): l = len(dy) - if l == 1: return '%d'%dt.day - if l == 2: return '%02d'%dt.day - if l == 3: return strf('%a') + if l == 1: + return '%d'%dt.day + if l == 2: + return '%02d'%dt.day + if l == 3: + return strf('%a') return strf('%A') def fd_format_month(dt, strf, ampm, mo): l = len(mo) - if l == 1: return '%d'%dt.month - if l == 2: return '%02d'%dt.month - if l == 3: return strf('%b') + if l == 1: + return '%d'%dt.month + if l == 2: + return '%02d'%dt.month + if l == 3: + return strf('%b') return strf('%B') def fd_format_year(dt, strf, ampm, yr): - if len(yr) == 2: return '%02d'%(dt.year % 100) + if len(yr) == 2: + return '%02d'%(dt.year % 100) return '%04d'%dt.year fd_function_index = { @@ -240,7 +279,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False): format = 'dd MMM yyyy' if not isinstance(dt, datetime): - dt = datetime.combine(dt, time()) + dt = datetime.combine(dt, dtime()) if hasattr(dt, 'tzinfo'): if dt.tzinfo is None: @@ -260,7 +299,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False): '(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, format) -#### Clean date functions +# Clean date functions def cd_has_hour(tt, dt): tt['hour'] = dt.hour @@ -307,7 +346,7 @@ def clean_date_for_sort(dt, format): format = 'yyMd' if not isinstance(dt, datetime): - dt = datetime.combine(dt, time()) + dt = datetime.combine(dt, dtime()) if hasattr(dt, 'tzinfo'): if dt.tzinfo is None: @@ -340,7 +379,7 @@ def replace_months(datestr, clang): u'[sS]eptembre': u'sep', u'[Oo]ctobre': u'oct', u'[nN]ovembre': u'nov', - u'[dD].cembre': u'dec' } + u'[dD].cembre': u'dec'} detoen = { u'[jJ]anuar': u'jan', u'[fF]ebruar': u'feb', @@ -353,7 +392,7 @@ def replace_months(datestr, clang): u'[sS]eptember': u'sep', u'[Oo]ktober': u'oct', u'[nN]ovember': u'nov', - u'[dD]ezember': u'dec' } + u'[dD]ezember': u'dec'} if clang == 'fr': dictoen = frtoen @@ -364,6 +403,8 @@ def replace_months(datestr, clang): for k in dictoen.iterkeys(): tmp = re.sub(k, dictoen[k], datestr) - if tmp != datestr: break + if tmp != datestr: + break return tmp + diff --git a/src/calibre/utils/speedup.c b/src/calibre/utils/speedup.c index 171179a88a..b05d8ee3c1 100644 --- a/src/calibre/utils/speedup.c +++ b/src/calibre/utils/speedup.c @@ -13,12 +13,12 @@ speedup_parse_date(PyObject *self, PyObject *args) { long year, month, day, hour, minute, second, tzh = 0, tzm = 0, sign = 0; size_t len; if(!PyArg_ParseTuple(args, "s", &raw)) return NULL; + while ((*raw == ' ' || *raw == '\t' || *raw == '\n' || *raw == '\r' || *raw == '\f' || *raw == '\v') && *raw != 0) raw++; len = strlen(raw); if (len < 19) Py_RETURN_NONE; orig = raw; - year = strtol(raw, &end, 10); if ((end - raw) != 4) Py_RETURN_NONE; raw += 5; @@ -27,7 +27,6 @@ speedup_parse_date(PyObject *self, PyObject *args) { month = strtol(raw, &end, 10); if ((end - raw) != 2) Py_RETURN_NONE; raw += 3; - day = strtol(raw, &end, 10); if ((end - raw) != 2) Py_RETURN_NONE;