diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index d7fd499236d86..a606861e4f8ed 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -70,6 +70,44 @@ def parser(x): data = p_ufunc(arr) return np.array(data, dtype='M8[us]') +#------- +# Interval sketching + +_dt_component_ordinal = { + "year" : 0, + "month" : 2, + "day" : 3, + "hour" : 4, + "minute" : 5, + "second" : 6, + "microsecond" : 8 +} + +_ti_ordinal = { + 'Y' : 0, + 'Q' : 1, + 'M' : 2, + 'D' : 3, + 'H' : 4, + 'm' : 5, + 's' : 6, + 'ms' : 7, + 'us' : 8 +} + +class TimeInterval: + def __init__(self, ival, freq='D'): + ret, parsed, reso = parse_time_string(ival) + + if _dt_component_ordinal[reso] > _ti_ordinal[freq]: + pass + +def to_interval(val, freq): + if isinstance(val, datetime): + return val + if isinstance(val, int): + return TimeInterval(val, freq) + #------------------------------------------------------------------------------- # Miscellaneous date functions diff --git a/pandas/core/index.py b/pandas/core/index.py index 431b573564bdf..1c63e925bfa9f 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -16,7 +16,7 @@ import pandas.core.datetools as datetools from pandas.core.datetools import (_dt_box, _dt_unbox, _dt_box_array, - _dt_unbox_array, to_timestamp) + _dt_unbox_array, to_timestamp, Interval) __all__ = ['Index'] @@ -1076,7 +1076,6 @@ def _maybe_box_dtindex(idx): return Index(_dt_box_array(idx.asi8), dtype='object') return idx - class DatetimeIndex(Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and @@ -1154,7 +1153,7 @@ def __new__(cls, data=None, offset = freq if data is None and offset is None: - raise ValueError("Must provide offset argument if no data is " + raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: @@ -1805,6 +1804,79 @@ def tz_validate(self): return True +# --- Interval index sketch + +class IntervalIndex(Int64Index): + """ + Immutable ndarray where values represent interval offsets from the + Gregorian proleptic date 1/1/1. + + Intervals are boxed to Interval objects which carry metadata such as + frequency information and start and end datetimes. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional interval-like data to construct index with + dtype : NumPy dtype (default: i8) + copy : bool + Make a copy of input ndarray + freq : string or interval object, optional + One of pandas interval strings or corresponding objects + start : starting value, interval-like, optional + If data is None, used as the start point in generating regular + interval data. + periods : int, optional, > 0 + Number of intervals to generate, if generating data. Takes precedence + over end argument + end : end value, interval-like, optional + If periods is none, generated index will extend to first conforming + interval on or just past end argument + """ + + def __new__(cls, data=None, + freq=None, start=None, end=None, periods=None, + copy=False, name=None): + + if data is None and freq is None: + raise ValueError("Must provide freq argument if no data is " + "supplied") + + if data is None: + start = datetools.to_interval(start, freq) + end = datetools.to_interval(end, freq) + + if (start is not None and not isinstance(start, Interval)): + raise ValueError('Failed to convert %s to interval' % start) + + if (end is not None and not isinstance(end, Interval)): + raise ValueError('Failed to convert %s to interval' % end) + + data = np.arange(start.value, end.value+1, dtype=np.int64) + + index = data.view(cls) + index.name = name + index.freq = freq + + return index + + if not isinstance(data, np.ndarray): + if np.isscalar(data): + raise ValueError('IntervalIndex() must be called with a ' + 'collection of some kind, %s was passed' + % repr(data)) + + if isinstance(data, int): + data = [data] + + # other iterable of some kind + if not isinstance(data, (list, tuple)): + data = list(data) + + subarr = data.view(cls) + subarr.name = name + + return subarr # --------------------------- end of datetime-specific code --------------- class Factor(np.ndarray): diff --git a/pandas/tests/test_datetime64.py b/pandas/tests/test_datetime64.py index 83231743ea176..30ad9921d4b12 100644 --- a/pandas/tests/test_datetime64.py +++ b/pandas/tests/test_datetime64.py @@ -21,6 +21,9 @@ from pandas.core.groupby import Tinterval from pandas.core.datetools import Minute, BDay +from pandas.core.index import IntervalIndex +from pandas.core.datetools import TimeInterval + try: import pytz except ImportError: @@ -558,6 +561,12 @@ def test_datetimeindex_constructor(self): for other in [idx2, idx3, idx4, idx5, idx6]: self.assert_( (idx1.values == other.values).all() ) + def test_intervalindex_constructor(self): + ii = IntervalIndex(freq='M', start='1/1/2005', end='12/1/2005') + self.assertEquals(len(ii), 12) + self.assertEquals(ii[0], TimeInterval('1/2005', freq='M')) + self.assertEquals(ii[-1], TimeInterval('12/2005', freq='M')) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],