From 6f5cd87990d9d650d32a5abcd3172041d272f919 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 3 Jun 2016 21:51:23 +0530 Subject: [PATCH] A module to parse ISO 8601 format dates Based on pyiso8601, with modifications --- src/calibre/utils/date.py | 18 +---- src/calibre/utils/iso8601.py | 135 +++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 15 deletions(-) create mode 100644 src/calibre/utils/iso8601.py diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index 6d7b77df69..4b9ed62bbc 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -10,25 +10,13 @@ import re from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR from functools import partial -from dateutil.tz import tzlocal, tzutc - from calibre import strftime from calibre.constants import iswindows, isosx, plugins +from calibre.utils.iso8601 import utc_tz, local_tz from calibre.utils.localization import lcdata -class SafeLocalTimeZone(tzlocal): - - def _isdst(self, dt): - # This method in tzlocal raises ValueError if dt is out of range. - # In such cases, just assume that dt is not DST. - try: - tzlocal._isdst(self, dt) - except Exception: - pass - return False - -utc_tz = _utc_tz = tzutc() -local_tz = _local_tz = SafeLocalTimeZone() +_utc_tz = utc_tz +_local_tz = local_tz # When parsing ambiguous dates that could be either dd-MM Or MM-dd use the # user's locale preferences diff --git a/src/calibre/utils/iso8601.py b/src/calibre/utils/iso8601.py new file mode 100644 index 0000000000..1b36919dcf --- /dev/null +++ b/src/calibre/utils/iso8601.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +from datetime import datetime +from decimal import Decimal +import re + +from dateutil.tz import tzlocal, tzutc, tzoffset + +class SafeLocalTimeZone(tzlocal): + + def _isdst(self, dt): + # This method in tzlocal raises ValueError if dt is out of range. + # In such cases, just assume that dt is not DST. + try: + tzlocal._isdst(self, dt) + except Exception: + pass + return False + +utc_tz = tzutc() +local_tz = SafeLocalTimeZone() + +_iso_pat = None + +def iso_pat(): + global _iso_pat + if _iso_pat is None: + _iso_pat = re.compile( + # Adapted from http://delete.me.uk/2005/03/iso8601.html + r""" + (?P[0-9]{4}) + ( + ( + (-(?P[0-9]{1,2})) + | + (?P[0-9]{2}) + (?!$) # Don't allow YYYYMM + ) + ( + ( + (-(?P[0-9]{1,2})) + | + (?P[0-9]{2}) + ) + ( + ( + (?P[ T]) + (?P[0-9]{2}) + (:{0,1}(?P[0-9]{2})){0,1} + ( + :{0,1}(?P[0-9]{1,2}) + ([.,](?P[0-9]+)){0,1} + ){0,1} + (?P + Z + | + ( + (?P[-+]) + (?P[0-9]{2}) + :{0,1} + (?P[0-9]{2}){0,1} + ) + ){0,1} + ){0,1} + ) + ){0,1} # YYYY-MM + ){0,1} # YYYY only + $ + """, re.VERBOSE) + return _iso_pat + +def to_int(d, key, default_to_zero=False, default=None, required=True): + """Pull a value from the dict and convert to int + + :param default_to_zero: If the value is None or empty, treat it as zero + :param default: If the value is missing in the dict use this default + + """ + value = d.get(key) or default + if (value is None or value == '') and default_to_zero: + return 0 + if value is None: + if required: + raise ValueError("Unable to read %s from %s" % (key, d)) + else: + return int(value) + +def parse_timezone(matches, default_timezone=utc_tz): + """Parses ISO 8601 time zone specs into tzinfo offsets + + """ + + if matches["timezone"] == "Z": + return utc_tz + # This isn't strictly correct, but it's common to encounter dates without + # timezones so I'll assume the default (which defaults to UTC). + # Addresses issue 4. + if matches["timezone"] is None: + return default_timezone + sign = matches["tz_sign"] + hours = to_int(matches, "tz_hour") + minutes = to_int(matches, "tz_minute", default_to_zero=True) + description = "%s%02d:%02d" % (sign, hours, minutes) + if sign == "-": + hours = -hours + minutes = -minutes + return tzoffset(description, 3600*hours + 60*minutes) + +def parse_iso8601(date_string, assume_utc=True, as_utc=True): + if isinstance(date_string, bytes): + date_string = date_string.decode('ascii') + m = iso_pat().match(date_string) + if m is None: + raise ValueError('%r is not a valid ISO8601 date' % date_string) + groups = m.groupdict() + tz = parse_timezone(groups, default_timezone=utc_tz if assume_utc else local_tz) + groups["second_fraction"] = int(Decimal("0.%s" % (groups["second_fraction"] or 0)) * Decimal("1000000.0")) + return datetime( + year=to_int(groups, "year"), + month=to_int(groups, "month", default=to_int(groups, "monthdash", required=False, default=1)), + day=to_int(groups, "day", default=to_int(groups, "daydash", required=False, default=1)), + hour=to_int(groups, "hour", default_to_zero=True), + minute=to_int(groups, "minute", default_to_zero=True), + second=to_int(groups, "second", default_to_zero=True), + microsecond=groups["second_fraction"], + tzinfo=tz, + ).astimezone(utc_tz if as_utc else local_tz) + +if __name__ == '__main__': + import sys + print(parse_iso8601(sys.argv[-1]))