|
|
@ -40,7 +40,7 @@ from calendar import monthrange |
|
|
|
from io import StringIO |
|
|
|
|
|
|
|
import six |
|
|
|
from six import binary_type, integer_types, text_type |
|
|
|
from six import integer_types, text_type |
|
|
|
|
|
|
|
from decimal import Decimal |
|
|
|
|
|
|
@ -49,7 +49,7 @@ from warnings import warn |
|
|
|
from .. import relativedelta |
|
|
|
from .. import tz |
|
|
|
|
|
|
|
__all__ = ["parse", "parserinfo"] |
|
|
|
__all__ = ["parse", "parserinfo", "ParserError"] |
|
|
|
|
|
|
|
|
|
|
|
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth |
|
|
@ -63,7 +63,7 @@ class _timelex(object): |
|
|
|
if six.PY2: |
|
|
|
# In Python 2, we can't duck type properly because unicode has |
|
|
|
# a 'decode' function, and we'd be double-decoding |
|
|
|
if isinstance(instream, (binary_type, bytearray)): |
|
|
|
if isinstance(instream, (bytes, bytearray)): |
|
|
|
instream = instream.decode() |
|
|
|
else: |
|
|
|
if getattr(instream, 'decode', None) is not None: |
|
|
@ -291,7 +291,7 @@ class parserinfo(object): |
|
|
|
("s", "second", "seconds")] |
|
|
|
AMPM = [("am", "a"), |
|
|
|
("pm", "p")] |
|
|
|
UTCZONE = ["UTC", "GMT", "Z"] |
|
|
|
UTCZONE = ["UTC", "GMT", "Z", "z"] |
|
|
|
PERTAIN = ["of"] |
|
|
|
TZOFFSET = {} |
|
|
|
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", |
|
|
@ -388,7 +388,8 @@ class parserinfo(object): |
|
|
|
if res.year is not None: |
|
|
|
res.year = self.convertyear(res.year, res.century_specified) |
|
|
|
|
|
|
|
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': |
|
|
|
if ((res.tzoffset == 0 and not res.tzname) or |
|
|
|
(res.tzname == 'Z' or res.tzname == 'z')): |
|
|
|
res.tzname = "UTC" |
|
|
|
res.tzoffset = 0 |
|
|
|
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): |
|
|
@ -625,7 +626,7 @@ class parser(object): |
|
|
|
first element being a :class:`datetime.datetime` object, the second |
|
|
|
a tuple containing the fuzzy tokens. |
|
|
|
|
|
|
|
:raises ValueError: |
|
|
|
:raises ParserError: |
|
|
|
Raised for invalid or unknown string format, if the provided |
|
|
|
:class:`tzinfo` is not in a valid format, or if an invalid date |
|
|
|
would be created. |
|
|
@ -645,12 +646,15 @@ class parser(object): |
|
|
|
res, skipped_tokens = self._parse(timestr, **kwargs) |
|
|
|
|
|
|
|
if res is None: |
|
|
|
raise ValueError("Unknown string format:", timestr) |
|
|
|
raise ParserError("Unknown string format: %s", timestr) |
|
|
|
|
|
|
|
if len(res) == 0: |
|
|
|
raise ValueError("String does not contain a date:", timestr) |
|
|
|
raise ParserError("String does not contain a date: %s", timestr) |
|
|
|
|
|
|
|
ret = self._build_naive(res, default) |
|
|
|
try: |
|
|
|
ret = self._build_naive(res, default) |
|
|
|
except ValueError as e: |
|
|
|
six.raise_from(ParserError(e.args[0] + ": %s", timestr), e) |
|
|
|
|
|
|
|
if not ignoretz: |
|
|
|
ret = self._build_tzaware(ret, res, tzinfos) |
|
|
@ -1060,7 +1064,8 @@ class parser(object): |
|
|
|
tzname is None and |
|
|
|
tzoffset is None and |
|
|
|
len(token) <= 5 and |
|
|
|
all(x in string.ascii_uppercase for x in token)) |
|
|
|
(all(x in string.ascii_uppercase for x in token) |
|
|
|
or token in self.info.UTCZONE)) |
|
|
|
|
|
|
|
def _ampm_valid(self, hour, ampm, fuzzy): |
|
|
|
""" |
|
|
@ -1109,14 +1114,6 @@ class parser(object): |
|
|
|
second = int(60 * sec_remainder) |
|
|
|
return (minute, second) |
|
|
|
|
|
|
|
def _parsems(self, value): |
|
|
|
"""Parse a I[.F] seconds value into (seconds, microseconds).""" |
|
|
|
if "." not in value: |
|
|
|
return int(value), 0 |
|
|
|
else: |
|
|
|
i, f = value.split(".") |
|
|
|
return int(i), int(f.ljust(6, "0")[:6]) |
|
|
|
|
|
|
|
def _parse_hms(self, idx, tokens, info, hms_idx): |
|
|
|
# TODO: Is this going to admit a lot of false-positives for when we |
|
|
|
# just happen to have digits and "h", "m" or "s" characters in non-date |
|
|
@ -1135,21 +1132,35 @@ class parser(object): |
|
|
|
|
|
|
|
return (new_idx, hms) |
|
|
|
|
|
|
|
def _recombine_skipped(self, tokens, skipped_idxs): |
|
|
|
""" |
|
|
|
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] |
|
|
|
>>> skipped_idxs = [0, 1, 2, 5] |
|
|
|
>>> _recombine_skipped(tokens, skipped_idxs) |
|
|
|
["foo bar", "baz"] |
|
|
|
""" |
|
|
|
skipped_tokens = [] |
|
|
|
for i, idx in enumerate(sorted(skipped_idxs)): |
|
|
|
if i > 0 and idx - 1 == skipped_idxs[i - 1]: |
|
|
|
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] |
|
|
|
else: |
|
|
|
skipped_tokens.append(tokens[idx]) |
|
|
|
# ------------------------------------------------------------------ |
|
|
|
# Handling for individual tokens. These are kept as methods instead |
|
|
|
# of functions for the sake of customizability via subclassing. |
|
|
|
|
|
|
|
return skipped_tokens |
|
|
|
def _parsems(self, value): |
|
|
|
"""Parse a I[.F] seconds value into (seconds, microseconds).""" |
|
|
|
if "." not in value: |
|
|
|
return int(value), 0 |
|
|
|
else: |
|
|
|
i, f = value.split(".") |
|
|
|
return int(i), int(f.ljust(6, "0")[:6]) |
|
|
|
|
|
|
|
def _to_decimal(self, val): |
|
|
|
try: |
|
|
|
decimal_value = Decimal(val) |
|
|
|
# See GH 662, edge case, infinite value should not be converted |
|
|
|
# via `_to_decimal` |
|
|
|
if not decimal_value.is_finite(): |
|
|
|
raise ValueError("Converted decimal value is infinite or NaN") |
|
|
|
except Exception as e: |
|
|
|
msg = "Could not convert %s to decimal" % val |
|
|
|
six.raise_from(ValueError(msg), e) |
|
|
|
else: |
|
|
|
return decimal_value |
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
|
|
# Post-Parsing construction of datetime output. These are kept as |
|
|
|
# methods instead of functions for the sake of customizability via |
|
|
|
# subclassing. |
|
|
|
|
|
|
|
def _build_tzinfo(self, tzinfos, tzname, tzoffset): |
|
|
|
if callable(tzinfos): |
|
|
@ -1164,6 +1175,9 @@ class parser(object): |
|
|
|
tzinfo = tz.tzstr(tzdata) |
|
|
|
elif isinstance(tzdata, integer_types): |
|
|
|
tzinfo = tz.tzoffset(tzname, tzdata) |
|
|
|
else: |
|
|
|
raise TypeError("Offset must be tzinfo subclass, tz string, " |
|
|
|
"or int offset.") |
|
|
|
return tzinfo |
|
|
|
|
|
|
|
def _build_tzaware(self, naive, res, tzinfos): |
|
|
@ -1181,10 +1195,10 @@ class parser(object): |
|
|
|
# This is mostly relevant for winter GMT zones parsed in the UK |
|
|
|
if (aware.tzname() != res.tzname and |
|
|
|
res.tzname in self.info.UTCZONE): |
|
|
|
aware = aware.replace(tzinfo=tz.tzutc()) |
|
|
|
aware = aware.replace(tzinfo=tz.UTC) |
|
|
|
|
|
|
|
elif res.tzoffset == 0: |
|
|
|
aware = naive.replace(tzinfo=tz.tzutc()) |
|
|
|
aware = naive.replace(tzinfo=tz.UTC) |
|
|
|
|
|
|
|
elif res.tzoffset: |
|
|
|
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) |
|
|
@ -1239,17 +1253,21 @@ class parser(object): |
|
|
|
|
|
|
|
return dt |
|
|
|
|
|
|
|
def _to_decimal(self, val): |
|
|
|
try: |
|
|
|
decimal_value = Decimal(val) |
|
|
|
# See GH 662, edge case, infinite value should not be converted via `_to_decimal` |
|
|
|
if not decimal_value.is_finite(): |
|
|
|
raise ValueError("Converted decimal value is infinite or NaN") |
|
|
|
except Exception as e: |
|
|
|
msg = "Could not convert %s to decimal" % val |
|
|
|
six.raise_from(ValueError(msg), e) |
|
|
|
else: |
|
|
|
return decimal_value |
|
|
|
def _recombine_skipped(self, tokens, skipped_idxs): |
|
|
|
""" |
|
|
|
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] |
|
|
|
>>> skipped_idxs = [0, 1, 2, 5] |
|
|
|
>>> _recombine_skipped(tokens, skipped_idxs) |
|
|
|
["foo bar", "baz"] |
|
|
|
""" |
|
|
|
skipped_tokens = [] |
|
|
|
for i, idx in enumerate(sorted(skipped_idxs)): |
|
|
|
if i > 0 and idx - 1 == skipped_idxs[i - 1]: |
|
|
|
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] |
|
|
|
else: |
|
|
|
skipped_tokens.append(tokens[idx]) |
|
|
|
|
|
|
|
return skipped_tokens |
|
|
|
|
|
|
|
|
|
|
|
DEFAULTPARSER = parser() |
|
|
@ -1573,6 +1591,19 @@ DEFAULTTZPARSER = _tzparser() |
|
|
|
def _parsetz(tzstr): |
|
|
|
return DEFAULTTZPARSER.parse(tzstr) |
|
|
|
|
|
|
|
|
|
|
|
class ParserError(ValueError): |
|
|
|
"""Error class for representing failure to parse a datetime string.""" |
|
|
|
def __str__(self): |
|
|
|
try: |
|
|
|
return self.args[0] % self.args[1:] |
|
|
|
except (TypeError, IndexError): |
|
|
|
return super(ParserError, self).__str__() |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return "%s(%s)" % (self.__class__.__name__, str(self)) |
|
|
|
|
|
|
|
|
|
|
|
class UnknownTimezoneWarning(RuntimeWarning): |
|
|
|
"""Raised when the parser finds a timezone it cannot parse into a tzinfo""" |
|
|
|
# vim:ts=4:sw=4:et |
|
|
|