Speed up reading the db

Principally by storing dates as UTC instead of local time. Includes some
micro optimizations in the code paths to build the maps.
This commit is contained in:
Kovid Goyal 2013-07-23 08:16:55 +05:30
parent 627375e091
commit a1581e1433
12 changed files with 205 additions and 114 deletions

View File

@ -54,7 +54,7 @@ def _get_series_values(val):
pass pass
return (val, None) return (val, None)
def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None): def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None, convert_to_local_tz=True):
''' '''
Return all metadata stored in the database as a dict. Includes paths to Return all metadata stored in the database as a dict. Includes paths to
the cover and each format. the cover and each format.
@ -66,6 +66,7 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
''' '''
import os import os
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.utils.date import as_local_time
backend = getattr(self, 'backend', self) # Works with both old and legacy interfaces backend = getattr(self, 'backend', self) # Works with both old and legacy interfaces
if prefix is None: if prefix is None:
prefix = backend.library_path prefix = backend.library_path
@ -88,6 +89,10 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
x = {} x = {}
for field in FIELDS: for field in FIELDS:
x[field] = record[self.FIELD_MAP[field]] x[field] = record[self.FIELD_MAP[field]]
if convert_to_local_tz and hasattr(self, 'new_api'):
for tf in ('timestamp', 'pubdate', 'last_modified'):
x[tf] = as_local_time(x[tf])
data.append(x) data.append(x)
x['id'] = db_id x['id'] = db_id
x['formats'] = [] x['formats'] = []

View File

@ -13,7 +13,7 @@ from datetime import timedelta
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.utils.config_base import prefs from calibre.utils.config_base import prefs
from calibre.utils.date import parse_date, UNDEFINED_DATE, now from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
from calibre.utils.icu import primary_find, sort_key from calibre.utils.icu import primary_find, sort_key
from calibre.utils.localization import lang_map, canonicalize_lang from calibre.utils.localization import lang_map, canonicalize_lang
from calibre.utils.search_query_parser import SearchQueryParser, ParseException from calibre.utils.search_query_parser import SearchQueryParser, ParseException
@ -211,7 +211,7 @@ class DateSearch(object): # {{{
for v, book_ids in field_iter(): for v, book_ids in field_iter():
if isinstance(v, (str, unicode)): if isinstance(v, (str, unicode)):
v = parse_date(v) v = parse_date(v)
if v is not None and relop(v, qd, field_count): if v is not None and relop(dt_as_local(v), qd, field_count):
matches |= book_ids matches |= book_ids
return matches return matches

View File

@ -7,16 +7,37 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from datetime import datetime from datetime import datetime, timedelta
from collections import defaultdict from collections import defaultdict
from dateutil.tz import tzoffset
from calibre.constants import plugins from calibre.constants import plugins
from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE from calibre.utils.date import parse_date, UNDEFINED_DATE, utc_tz
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
_c_speedup = plugins['speedup'][0] _c_speedup = plugins['speedup'][0].parse_date
def c_parse(val):
try:
year, month, day, hour, minutes, seconds, tzsecs = _c_speedup(val)
except (AttributeError, TypeError):
# If a value like 2001 is stored in the column, apsw will return it as
# an int
if isinstance(val, (int, float)):
return datetime(int(val), 1, 3, tzinfo=utc_tz)
except:
pass
else:
try:
ans = datetime(year, month, day, hour, minutes, seconds, tzinfo=utc_tz)
if tzsecs is not 0:
ans -= timedelta(seconds=tzsecs)
except OverflowError:
ans = UNDEFINED_DATE
return ans
try:
return parse_date(val, as_utc=True, assume_utc=True)
except ValueError:
return UNDEFINED_DATE
ONE_ONE, MANY_ONE, MANY_MANY = xrange(3) ONE_ONE, MANY_ONE, MANY_MANY = xrange(3)
@ -24,28 +45,6 @@ class Null:
pass pass
null = Null() null = Null()
def _c_convert_timestamp(val):
if not val:
return None
try:
ret = _c_speedup.parse_date(val.strip())
except AttributeError:
# If a value like 2001 is stored in the column, apsw will return it as
# an int
if isinstance(val, (int, float)):
return datetime(int(val), 1, 1, tzinfo=tzoffset(None, 0)).astimezone(local_tz)
ret = None
except:
ret = None
if ret is None:
return parse_date(val, as_utc=False)
year, month, day, hour, minutes, seconds, tzsecs = ret
try:
return datetime(year, month, day, hour, minutes, seconds,
tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
except OverflowError:
return UNDEFINED_DATE.astimezone(local_tz)
class Table(object): class Table(object):
def __init__(self, name, metadata, link_table=None): def __init__(self, name, metadata, link_table=None):
@ -54,7 +53,7 @@ class Table(object):
# self.unserialize() maps values from the db to python objects # self.unserialize() maps values from the db to python objects
self.unserialize = { self.unserialize = {
'datetime': _c_convert_timestamp, 'datetime': c_parse,
'bool': bool 'bool': bool
}.get(metadata['datatype'], None) }.get(metadata['datatype'], None)
if name == 'authors': if name == 'authors':
@ -89,7 +88,6 @@ class OneToOneTable(Table):
table_type = ONE_ONE table_type = ONE_ONE
def read(self, db): def read(self, db):
self.book_col_map = {}
idcol = 'id' if self.metadata['table'] == 'books' else 'book' idcol = 'id' if self.metadata['table'] == 'books' else 'book'
query = db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol, query = db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol,
self.metadata['column'], self.metadata['table'])) self.metadata['column'], self.metadata['table']))
@ -175,7 +173,7 @@ class ManyToOneTable(Table):
def read(self, db): def read(self, db):
self.id_map = {} self.id_map = {}
self.col_book_map = {} self.col_book_map = defaultdict(set)
self.book_col_map = {} self.book_col_map = {}
self.read_id_maps(db) self.read_id_maps(db)
self.read_maps(db) self.read_maps(db)
@ -190,13 +188,13 @@ class ManyToOneTable(Table):
self.id_map = {book_id:us(val) for book_id, val in query} self.id_map = {book_id:us(val) for book_id, val in query}
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute( cbm = self.col_book_map
bcm = self.book_col_map
for book, item_id in db.conn.execute(
'SELECT book, {0} FROM {1}'.format( 'SELECT book, {0} FROM {1}'.format(
self.metadata['link_column'], self.link_table)): self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map: cbm[item_id].add(book)
self.col_book_map[row[1]] = set() bcm[book] = item_id
self.col_book_map[row[1]].add(row[0])
self.book_col_map[row[0]] = row[1]
def remove_books(self, book_ids, db): def remove_books(self, book_ids, db):
clean = set() clean = set()
@ -272,17 +270,14 @@ class ManyToManyTable(ManyToOneTable):
do_clean_on_remove = True do_clean_on_remove = True
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute( bcm = defaultdict(list)
self.selectq.format(self.metadata['link_column'], self.link_table)): cbm = self.col_book_map
if row[1] not in self.col_book_map: for book, item_id in db.conn.execute(
self.col_book_map[row[1]] = set() self.selectq.format(self.metadata['link_column'], self.link_table)):
self.col_book_map[row[1]].add(row[0]) cbm[item_id].add(book)
if row[0] not in self.book_col_map: bcm[book].append(item_id)
self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append(row[1])
for key in tuple(self.book_col_map.iterkeys()): self.book_col_map = {k:tuple(v) for k, v in bcm.iteritems()}
self.book_col_map[key] = tuple(self.book_col_map[key])
def remove_books(self, book_ids, db): def remove_books(self, book_ids, db):
clean = set() clean = set()
@ -351,17 +346,16 @@ class ManyToManyTable(ManyToOneTable):
class AuthorsTable(ManyToManyTable): class AuthorsTable(ManyToManyTable):
def read_id_maps(self, db): def read_id_maps(self, db):
self.alink_map = {} self.alink_map = lm = {}
self.asort_map = {} self.asort_map = sm = {}
self.id_map = {} self.id_map = im = {}
us = self.unserialize us = self.unserialize
for row in db.conn.execute( for aid, name, sort, link in db.conn.execute(
'SELECT id, name, sort, link FROM authors'): 'SELECT id, name, sort, link FROM authors'):
val = us(row[1]) name = us(name)
self.id_map[row[0]] = self.unserialize(val) im[aid] = name
self.asort_map[row[0]] = (row[2] if row[2] else sm[aid] = (sort or author_to_author_sort(name))
author_to_author_sort(val)) lm[aid] = link
self.alink_map[row[0]] = row[3]
def set_sort_names(self, aus_map, db): def set_sort_names(self, aus_map, db):
aus_map = {aid:(a or '').strip() for aid, a in aus_map.iteritems()} aus_map = {aid:(a or '').strip() for aid, a in aus_map.iteritems()}
@ -404,22 +398,20 @@ class FormatsTable(ManyToManyTable):
pass pass
def read_maps(self, db): def read_maps(self, db):
self.fname_map = defaultdict(dict) self.fname_map = fnm = defaultdict(dict)
self.size_map = defaultdict(dict) self.size_map = sm = defaultdict(dict)
for row in db.conn.execute('SELECT book, format, name, uncompressed_size FROM data'): self.col_book_map = cbm = defaultdict(set)
if row[1] is not None: bcm = defaultdict(list)
fmt = row[1].upper()
if fmt not in self.col_book_map:
self.col_book_map[fmt] = set()
self.col_book_map[fmt].add(row[0])
if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append(fmt)
self.fname_map[row[0]][fmt] = row[2]
self.size_map[row[0]][fmt] = row[3]
for key in tuple(self.book_col_map.iterkeys()): for book, fmt, name, sz in db.conn.execute('SELECT book, format, name, uncompressed_size FROM data'):
self.book_col_map[key] = tuple(sorted(self.book_col_map[key])) if fmt is not None:
fmt = fmt.upper()
cbm[fmt].add(book)
bcm[book].append(fmt)
fnm[book][fmt] = name
sm[book][fmt] = sz
self.book_col_map = {k:tuple(sorted(v)) for k, v in bcm.iteritems()}
def remove_books(self, book_ids, db): def remove_books(self, book_ids, db):
clean = ManyToManyTable.remove_books(self, book_ids, db) clean = ManyToManyTable.remove_books(self, book_ids, db)
@ -485,14 +477,12 @@ class IdentifiersTable(ManyToManyTable):
pass pass
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute('SELECT book, type, val FROM identifiers'): self.book_col_map = defaultdict(dict)
if row[1] is not None and row[2] is not None: self.col_book_map = defaultdict(set)
if row[1] not in self.col_book_map: for book, typ, val in db.conn.execute('SELECT book, type, val FROM identifiers'):
self.col_book_map[row[1]] = set() if typ is not None and val is not None:
self.col_book_map[row[1]].add(row[0]) self.col_book_map[typ].add(book)
if row[0] not in self.book_col_map: self.book_col_map[book][typ] = val
self.book_col_map[row[0]] = {}
self.book_col_map[row[0]][row[1]] = row[2]
def remove_books(self, book_ids, db): def remove_books(self, book_ids, db):
clean = set() clean = set()

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, cProfile
from tempfile import gettempdir
from calibre.db.legacy import LibraryDatabase
db = None
def initdb(path):
global db
db = LibraryDatabase(os.path.expanduser(path))
def show_stats(path):
from pstats import Stats
s = Stats(path)
s.sort_stats('cumulative')
s.print_stats(30)
def main():
stats = os.path.join(gettempdir(), 'read_db.stats')
pr = cProfile.Profile()
pr.enable()
initdb('~/documents/largelib')
pr.disable()
pr.dump_stats(stats)
show_stats(stats)
print ('Stats saved to', stats)
if __name__ == '__main__':
main()

View File

@ -385,3 +385,21 @@ class ReadingTest(BaseTest):
self.assertFalse(x.has_book(Metadata(title[:1]))) self.assertFalse(x.has_book(Metadata(title[:1])))
db.close() db.close()
# }}} # }}}
def test_datetime(self):
' Test the reading of datetimes stored in the db '
from calibre.utils.date import parse_date
from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup
# First test parsing of string to UTC time
for raw in ('2013-07-22 15:18:29+05:30', ' 2013-07-22 15:18:29+00:00', '2013-07-22 15:18:29', '2003-09-21 23:30:00-06:00'):
self.assertTrue(_c_speedup(raw))
ctime = c_parse(raw)
pytime = parse_date(raw, assume_utc=True)
self.assertEqual(ctime, pytime)
self.assertEqual(c_parse(2003).year, 2003)
for x in (None, '', 'abc'):
self.assertEqual(UNDEFINED_DATE, c_parse(x))

View File

@ -18,7 +18,7 @@ from calibre.ebooks.metadata import author_to_author_sort
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException, \ from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException, \
InvalidGenresSourceFieldException InvalidGenresSourceFieldException
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.date import format_date, is_date_undefined, now as nowf from calibre.utils.date import format_date, is_date_undefined, now as nowf, as_local_time
from calibre.utils.filenames import ascii_text, shorten_components_to from calibre.utils.filenames import ascii_text, shorten_components_to
from calibre.utils.icu import capitalize, collation_order, sort_key from calibre.utils.icu import capitalize, collation_order, sort_key
from calibre.utils.magick.draw import thumbnail from calibre.utils.magick.draw import thumbnail
@ -940,7 +940,7 @@ class CatalogBuilder(object):
if is_date_undefined(record['pubdate']): if is_date_undefined(record['pubdate']):
this_title['date'] = None this_title['date'] = None
else: else:
this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) this_title['date'] = strftime(u'%B %Y', as_local_time(record['pubdate']).timetuple())
this_title['timestamp'] = record['timestamp'] this_title['timestamp'] = record['timestamp']

View File

@ -74,7 +74,7 @@ def do_list(db, fields, afields, sort_by, ascending, search_text, line_width, se
db.sort(sort_by, ascending) db.sort(sort_by, ascending)
if search_text: if search_text:
db.search(search_text) db.search(search_text)
data = db.get_data_as_dict(prefix, authors_as_string=True) data = db.get_data_as_dict(prefix, authors_as_string=True, convert_to_local_tz=False)
if limit > -1: if limit > -1:
data = data[:limit] data = data[:limit]
fields = ['id'] + fields fields = ['id'] + fields

View File

@ -10,7 +10,7 @@ import re, os, posixpath
import cherrypy import cherrypy
from calibre import fit_image, guess_type from calibre import fit_image, guess_type
from calibre.utils.date import fromtimestamp from calibre.utils.date import fromtimestamp, as_utc
from calibre.library.caches import SortKeyGenerator from calibre.library.caches import SortKeyGenerator
from calibre.library.save_to_disk import find_plugboard from calibre.library.save_to_disk import find_plugboard
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
@ -54,6 +54,7 @@ class ContentServer(object):
Generates a locale independent, english timestamp from a datetime Generates a locale independent, english timestamp from a datetime
object object
''' '''
updated = as_utc(updated)
lm = updated.strftime('day, %d month %Y %H:%M:%S GMT') lm = updated.strftime('day, %d month %Y %H:%M:%S GMT')
day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'} day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'}
lm = lm.replace('day', day[int(updated.strftime('%w'))]) lm = lm.replace('day', day[int(updated.strftime('%w'))])

View File

@ -19,7 +19,7 @@ from calibre.library.server.utils import strftime, format_tag_string
from calibre.ebooks.metadata import fmt_sidx from calibre.ebooks.metadata import fmt_sidx
from calibre.constants import __appname__ from calibre.constants import __appname__
from calibre import human_readable, isbytestring from calibre import human_readable, isbytestring
from calibre.utils.date import utcfromtimestamp from calibre.utils.date import utcfromtimestamp, as_local_time
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
@ -254,7 +254,7 @@ class MobileServer(object):
no_tag_count=True) no_tag_count=True)
book['title'] = record[FM['title']] book['title'] = record[FM['title']]
for x in ('timestamp', 'pubdate'): for x in ('timestamp', 'pubdate'):
book[x] = strftime('%d %b, %Y', record[FM[x]]) book[x] = strftime('%d %b, %Y', as_local_time(record[FM[x]]))
book['id'] = record[FM['id']] book['id'] = record[FM['id']]
books.append(book) books.append(book)
for key in CKEYS: for key in CKEYS:

View File

@ -22,6 +22,7 @@ from calibre.library.server import custom_fields_to_display
from calibre.library.server.utils import format_tag_string, Offsets from calibre.library.server.utils import format_tag_string, Offsets
from calibre import guess_type, prepare_string_for_xml as xml from calibre import guess_type, prepare_string_for_xml as xml
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.date import as_utc
BASE_HREFS = { BASE_HREFS = {
0 : '/stanza', 0 : '/stanza',
@ -58,7 +59,7 @@ ID = E.id
ICON = E.icon ICON = E.icon
def UPDATED(dt, *args, **kwargs): def UPDATED(dt, *args, **kwargs):
return E.updated(dt.strftime('%Y-%m-%dT%H:%M:%S+00:00'), *args, **kwargs) return E.updated(as_utc(dt).strftime('%Y-%m-%dT%H:%M:%S+00:00'), *args, **kwargs)
LINK = partial(E.link, type='application/atom+xml') LINK = partial(E.link, type='application/atom+xml')
NAVLINK = partial(E.link, NAVLINK = partial(E.link,

View File

@ -6,23 +6,47 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re, time
from datetime import datetime, time, timedelta from datetime import datetime, time as dtime, timedelta
from functools import partial from functools import partial
from dateutil.tz import tzlocal, tzutc from dateutil.tz import tzlocal, tzutc, EPOCHORDINAL
from calibre import strftime from calibre import strftime
class SafeLocalTimeZone(tzlocal): class SafeLocalTimeZone(tzlocal):
'''
Assume DST was not in effect for historical dates, if DST
data for the local timezone is not present in the operating system.
'''
def _isdst(self, dt): def _isdst(self, dt):
# We can't use mktime here. It is unstable when deciding if
# the hour near to a change is DST or not.
#
# timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
# dt.minute, dt.second, dt.weekday(), 0, -1))
# return time.localtime(timestamp).tm_isdst
#
# The code above yields the following result:
#
#>>> import tz, datetime
#>>> t = tz.tzlocal()
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRST'
#>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
#'BRDT'
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
#'BRDT'
#
# Here is a more stable implementation:
#
try: try:
return tzlocal._isdst(self, dt) timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
+ dt.hour * 3600
+ dt.minute * 60
+ dt.second)
return time.localtime(timestamp+time.timezone).tm_isdst
except ValueError: except ValueError:
pass pass
return False return False
@ -150,6 +174,11 @@ def as_local_time(date_time, assume_utc=True):
_local_tz) _local_tz)
return date_time.astimezone(_local_tz) return date_time.astimezone(_local_tz)
def dt_as_local(dt):
if dt.tzinfo is local_tz:
return dt
return dt.astimezone(local_tz)
def as_utc(date_time, assume_utc=True): def as_utc(date_time, assume_utc=True):
if not hasattr(date_time, 'tzinfo'): if not hasattr(date_time, 'tzinfo'):
return date_time return date_time
@ -174,24 +203,27 @@ def utcfromtimestamp(stamp):
traceback.print_exc() traceback.print_exc()
return utcnow() return utcnow()
#### Format date functions # Format date functions
def fd_format_hour(dt, strf, ampm, hr): def fd_format_hour(dt, strf, ampm, hr):
l = len(hr) l = len(hr)
h = dt.hour h = dt.hour
if ampm: if ampm:
h = h%12 h = h%12
if l == 1: return '%d'%h if l == 1:
return '%d'%h
return '%02d'%h return '%02d'%h
def fd_format_minute(dt, strf, ampm, min): def fd_format_minute(dt, strf, ampm, min):
l = len(min) l = len(min)
if l == 1: return '%d'%dt.minute if l == 1:
return '%d'%dt.minute
return '%02d'%dt.minute return '%02d'%dt.minute
def fd_format_second(dt, strf, ampm, sec): def fd_format_second(dt, strf, ampm, sec):
l = len(sec) l = len(sec)
if l == 1: return '%d'%dt.second if l == 1:
return '%d'%dt.second
return '%02d'%dt.second return '%02d'%dt.second
def fd_format_ampm(dt, strf, ampm, ap): def fd_format_ampm(dt, strf, ampm, ap):
@ -202,20 +234,27 @@ def fd_format_ampm(dt, strf, ampm, ap):
def fd_format_day(dt, strf, ampm, dy): def fd_format_day(dt, strf, ampm, dy):
l = len(dy) l = len(dy)
if l == 1: return '%d'%dt.day if l == 1:
if l == 2: return '%02d'%dt.day return '%d'%dt.day
if l == 3: return strf('%a') if l == 2:
return '%02d'%dt.day
if l == 3:
return strf('%a')
return strf('%A') return strf('%A')
def fd_format_month(dt, strf, ampm, mo): def fd_format_month(dt, strf, ampm, mo):
l = len(mo) l = len(mo)
if l == 1: return '%d'%dt.month if l == 1:
if l == 2: return '%02d'%dt.month return '%d'%dt.month
if l == 3: return strf('%b') if l == 2:
return '%02d'%dt.month
if l == 3:
return strf('%b')
return strf('%B') return strf('%B')
def fd_format_year(dt, strf, ampm, yr): def fd_format_year(dt, strf, ampm, yr):
if len(yr) == 2: return '%02d'%(dt.year % 100) if len(yr) == 2:
return '%02d'%(dt.year % 100)
return '%04d'%dt.year return '%04d'%dt.year
fd_function_index = { fd_function_index = {
@ -240,7 +279,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
format = 'dd MMM yyyy' format = 'dd MMM yyyy'
if not isinstance(dt, datetime): if not isinstance(dt, datetime):
dt = datetime.combine(dt, time()) dt = datetime.combine(dt, dtime())
if hasattr(dt, 'tzinfo'): if hasattr(dt, 'tzinfo'):
if dt.tzinfo is None: if dt.tzinfo is None:
@ -260,7 +299,7 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))', '(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
repl_func, format) repl_func, format)
#### Clean date functions # Clean date functions
def cd_has_hour(tt, dt): def cd_has_hour(tt, dt):
tt['hour'] = dt.hour tt['hour'] = dt.hour
@ -307,7 +346,7 @@ def clean_date_for_sort(dt, format):
format = 'yyMd' format = 'yyMd'
if not isinstance(dt, datetime): if not isinstance(dt, datetime):
dt = datetime.combine(dt, time()) dt = datetime.combine(dt, dtime())
if hasattr(dt, 'tzinfo'): if hasattr(dt, 'tzinfo'):
if dt.tzinfo is None: if dt.tzinfo is None:
@ -340,7 +379,7 @@ def replace_months(datestr, clang):
u'[sS]eptembre': u'sep', u'[sS]eptembre': u'sep',
u'[Oo]ctobre': u'oct', u'[Oo]ctobre': u'oct',
u'[nN]ovembre': u'nov', u'[nN]ovembre': u'nov',
u'[dD].cembre': u'dec' } u'[dD].cembre': u'dec'}
detoen = { detoen = {
u'[jJ]anuar': u'jan', u'[jJ]anuar': u'jan',
u'[fF]ebruar': u'feb', u'[fF]ebruar': u'feb',
@ -353,7 +392,7 @@ def replace_months(datestr, clang):
u'[sS]eptember': u'sep', u'[sS]eptember': u'sep',
u'[Oo]ktober': u'oct', u'[Oo]ktober': u'oct',
u'[nN]ovember': u'nov', u'[nN]ovember': u'nov',
u'[dD]ezember': u'dec' } u'[dD]ezember': u'dec'}
if clang == 'fr': if clang == 'fr':
dictoen = frtoen dictoen = frtoen
@ -364,6 +403,8 @@ def replace_months(datestr, clang):
for k in dictoen.iterkeys(): for k in dictoen.iterkeys():
tmp = re.sub(k, dictoen[k], datestr) tmp = re.sub(k, dictoen[k], datestr)
if tmp != datestr: break if tmp != datestr:
break
return tmp return tmp

View File

@ -13,12 +13,12 @@ speedup_parse_date(PyObject *self, PyObject *args) {
long year, month, day, hour, minute, second, tzh = 0, tzm = 0, sign = 0; long year, month, day, hour, minute, second, tzh = 0, tzm = 0, sign = 0;
size_t len; size_t len;
if(!PyArg_ParseTuple(args, "s", &raw)) return NULL; if(!PyArg_ParseTuple(args, "s", &raw)) return NULL;
while ((*raw == ' ' || *raw == '\t' || *raw == '\n' || *raw == '\r' || *raw == '\f' || *raw == '\v') && *raw != 0) raw++;
len = strlen(raw); len = strlen(raw);
if (len < 19) Py_RETURN_NONE; if (len < 19) Py_RETURN_NONE;
orig = raw; orig = raw;
year = strtol(raw, &end, 10); year = strtol(raw, &end, 10);
if ((end - raw) != 4) Py_RETURN_NONE; if ((end - raw) != 4) Py_RETURN_NONE;
raw += 5; raw += 5;
@ -27,7 +27,6 @@ speedup_parse_date(PyObject *self, PyObject *args) {
month = strtol(raw, &end, 10); month = strtol(raw, &end, 10);
if ((end - raw) != 2) Py_RETURN_NONE; if ((end - raw) != 2) Py_RETURN_NONE;
raw += 3; raw += 3;
day = strtol(raw, &end, 10); day = strtol(raw, &end, 10);
if ((end - raw) != 2) Py_RETURN_NONE; if ((end - raw) != 2) Py_RETURN_NONE;