Helper functions used to download and extract common time series datasets.

decompress_from_url[source]

decompress_from_url(url, target_dir=None, verbose=False)

download_data[source]

download_data(url, fname=None, c_key='archive', force_download=False, timeout=4)

Download url to fname.

get_UCR_univariate_list[source]

get_UCR_univariate_list()

get_UCR_multivariate_list[source]

get_UCR_multivariate_list()

158

get_UCR_data[source]

get_UCR_data(dsid, path='.', parent_dir='data/UCR', on_disk=True, mode='c', Xdtype='float32', ydtype=None, return_split=True, split_data=True, force_download=False, verbose=False)

X_train, y_train, X_valid, y_valid = get_UCR_data('natops')
dsid = 'natops' 
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, verbose=True)
X, y, splits = get_UCR_data(dsid, split_data=False)
test_eq(X[splits[0]], X_train)
test_eq(y[splits[1]], y_valid)
test_eq(X[splits[0]], X_train)
test_eq(y[splits[1]], y_valid)
test_type(X, X_train)
test_type(y, y_train)
Dataset: NATOPS
X_train: (180, 24, 51)
y_train: (180,)
X_valid: (180, 24, 51)
y_valid: (180,) 

check_data[source]

check_data(X, y=None, splits=None, show_plot=True)

dsid = 'ECGFiveDays'
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=True)
check_data(X, y, splits)
check_data(X[:, 0], y, splits)
y = y.astype(np.float32)
check_data(X, y, splits)
y[:10] = np.nan
check_data(X[:, 0], y, splits)
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=True)
splits = get_splits(y, 3)
check_data(X, y, splits)
check_data(X[:, 0], y, splits)
y[:5]= np.nan
check_data(X[:, 0], y, splits)
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=True)
X      - shape: [884 samples x 1 features x 136 timesteps]  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:<U1  n_classes: 2 (442 samples per class) ['1', '2']  isnan: False
splits - n_splits: 2 shape: [23, 861]  overlap: [False]
X      - shape: (884, 136)  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:<U1  n_classes: 2 (442 samples per class) ['1', '2']  isnan: False
splits - n_splits: 2 shape: [23, 861]  overlap: [False]
X      - shape: [884 samples x 1 features x 136 timesteps]  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:float32  isnan: 0
splits - n_splits: 2 shape: [23, 861]  overlap: [False]
X      - shape: (884, 136)  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:float32  isnan: 10
splits - n_splits: 2 shape: [23, 861]  overlap: [False]
/Users/nacho/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:25: UserWarning: y must not contain nan values
X      - shape: [884 samples x 1 features x 136 timesteps]  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:<U1  n_classes: 2 (442 samples per class) ['1', '2']  isnan: False
splits - n_splits: 3 shape: [[589, 295], [589, 295], [590, 294]]  overlap: [False, False, False]
X      - shape: (884, 136)  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:<U1  n_classes: 2 (442 samples per class) ['1', '2']  isnan: False
splits - n_splits: 3 shape: [[589, 295], [589, 295], [590, 294]]  overlap: [False, False, False]
X      - shape: (884, 136)  type: ndarray  dtype:float32  isnan: 0
y      - shape: (884,)  type: ndarray  dtype:<U1  n_classes: 3 (294 samples per class) ['1', '2', 'n']  isnan: False
splits - n_splits: 3 shape: [[589, 295], [589, 295], [590, 294]]  overlap: [False, False, False]

get_Monash_regression_list[source]

get_Monash_regression_list()

15

get_Monash_data[source]

get_Monash_data(dsid, path='./data/Monash', on_disk=True, mode='c', Xdtype='float32', ydtype=None, split_data=True, force_download=False, verbose=False)

dsid = "Covid3Month"
X_train, y_train, X_valid, y_valid = get_Monash_data(dsid, on_disk=True, split_data=True)
X, y, splits = get_Monash_data(dsid, on_disk=True, split_data=False, force_download=True)
153it [00:00, 1004.72it/s]
74it [00:00, 1382.19it/s]

get_forecasting_list[source]

get_forecasting_list()

get_forecasting_data[source]

get_forecasting_data(dsid, path='./data/forecasting/', force_download=False, verbose=True, **kwargs)

ts = get_forecasting_data("sunspots", force_download=True)
if ts is not None: 
    test_eq(len(ts), 3235)
Dataset: Sunspots
downloading data...
...data downloaded. Path = data/forecasting/Sunspots.csv