Main Numpy and Times Series functions used throughout the library.
dsid = 'OliveOil'
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, on_disk=True, force_download=True)
X_on_disk, y_on_disk, splits = get_UCR_data(dsid, on_disk=True, return_split=False, force_download=True)
X_in_memory, y_in_memory, splits = get_UCR_data(dsid, on_disk=False, return_split=False, force_download=True)
y_tensor = cat2int(y_on_disk)
y_array = y_tensor.numpy()
t = TSTensor(torch.randn(2, 3, 4))
p = torch.tensor(3., requires_grad=True)
test = torch.add(t, p)
test_eq(test.requires_grad, True)
test_eq(type(t.data), torch.Tensor)
test_eq(type(t), TSTensor)
l = L([0,1,2,3], [4,5,6,7], [8, 9, 10, 11])
TSTensor(l), TSTensor(l).data
t = TSTensor(X_train)
for i in range(4):
print(t)
if i < 3: t = t[0]
TSTensor(X_on_disk)
ToTSTensor()(X_on_disk)
TSTensor(X_train).show();
TSTensor(X_train).show(title='1');
TSTensor(X_train).show(title=['1', '2']);
t = TSLabelTensor(torch.randint(0,10,(1, 2, 3)))
t, t[0], t[0][0], t[0][0][0]
t = TSMaskTensor(torch.randint(0,10,(1, 2, 3)))
t, t[0], t[0][0], t[0][0][0]
a = np.random.randint(0, 2, 10)
b = np.array(['1', '2', '3'])
c = np.array(['1.0', '2.0', '3.0'])
t = torch.randint(0, 2, (10, ))
test_eq(ToFloat()(a).dtype, 'float32')
test_eq(ToFloat()(b).dtype, 'float32')
test_eq(ToFloat()(c).dtype, 'float32')
test_eq(ToFloat()(t).dtype, torch.float32)
a = np.random.rand(10)*10
b = np.array(['1.0', '2.0', '3.0'])
t = torch.rand(10)*10
test_eq(ToInt()(a).dtype, 'int64')
test_eq(ToInt()(b).dtype, 'int64')
test_eq(ToInt()(t).dtype, torch.long)
test_eq(NumpyTensorBlock().item_tfms[0].__name__, 'ToNumpyTensor')
test_eq(TSTensorBlock().item_tfms[0].__name__, 'ToTSTensor')
a = np.random.rand(5,6,7)
b = np.random.rand(5)
ds = NumpyDataset(a,b)
xb, yb = ds[[0,4]]
test_eq(xb.shape, (2,6,7))
test_eq(yb.shape, (2,))
dsets = TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=False)
dsets2 = TSDatasets(X_on_disk[:, 0], y_array, tfms=None, splits=splits, inplace=True)
test_eq_type(dsets[0][0].data, dsets2[0][0].data)
mini_dsets = get_subset_dset(dsets.train, np.arange(10))
test_eq(len(mini_dsets), 10)
test_eq(type(mini_dsets), type(dsets))
dsets = TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=False)
torch.save(dsets, 'export/dsets.pth')
del dsets
dsets = torch.load('export/dsets.pth')
dsets
dsets = TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=False)
torch.save(dsets.train, 'export/dsets.pth')
del dsets
dsets = torch.load('export/dsets.pth')
dsets
dsets = TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=False)
test_eq(len(dsets.train), len(X_train))
dsets = TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=True)
test_eq(len(dsets.train), len(X_train))
dsets = TSDatasets(X_on_disk, y_array, tfms=[add(1), Categorize()], splits=splits, inplace=True)
test_eq(len(dsets.train), len(X_train))
# test_eq(dsets.train[0][0].data, tensor(X_train[0] + 1))
test_eq(dsets.train[0][1].data, y_tensor[0])
dsets = TSDatasets(X_on_disk, y_on_disk, tfms=[None, Categorize()], splits=splits, inplace=True)
test_eq(len(dsets.add_test(X_train, y_train)), len(X_train))
test_eq(len(dsets.add_unlabeled(X_train)), len(X_train))
show_tuple((TSTensor(np.arange(10).reshape(2,5)), 1))
show_tuple((TSTensor(np.arange(10).reshape(2,5)), '1'))
show_tuple((TSTensor(np.arange(10).reshape(2,5)), [1,2]))
show_tuple((TSTensor(np.arange(10).reshape(2,5)), ['1', '2']))
X, y, splits = get_UCR_data(dsid, on_disk=False, split_data=False)
dls = get_ts_dls(X, y, tfms=[None, Categorize()], splits=splits, bs=8)
b=first(dls.train)
dls.decode(b)
test_eq(X.shape[1], dls.vars)
test_eq(X.shape[-1], dls.len)
X, y, splits = get_UCR_data(dsid, on_disk=False, split_data=False)
dsets = TSDatasets(X, y, tfms=[None, Categorize()], splits=splits)
ts_dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, device=default_device(), bs=4)
torch.save(ts_dls, 'export/ts_dls.pth')
del ts_dls
ts_dls = torch.load('export/ts_dls.pth')
for xb,yb in ts_dls.train:
test_eq(tensor(X[ts_dls.train.idxs]), xb)
X, y, splits = get_UCR_data(dsid, on_disk=False, split_data=False)
dls = get_ts_dls(X, y, tfms=[None, Categorize()], splits=splits, bs=4)
for xb,yb in dls.train:
test_eq(xb.cpu().numpy(), X[dls.train.input_idxs])
for xb,yb in dls.valid:
test_eq(xb.cpu().numpy(), X[dls.valid.input_idxs])
test_eq((ts_dls.train.shuffle, ts_dls.valid.shuffle, ts_dls.train.drop_last, ts_dls.valid.drop_last), (True, False, True, False))
dsid = 'OliveOil'
X, y, splits = get_UCR_data(dsid, split_data=False)
dls = get_ts_dls(X, y, tfms=[None, Categorize()], splits=splits, bs=8, num_workers=0)
xb, yb = first(dls.train)
test_eq(tensor(X[dls.train.idxs]), xb)
test_eq((dls.train.shuffle, dls.valid.shuffle, dls.train.drop_last, dls.valid.drop_last), (True, False, True, False))
dsid = 'OliveOil'
X, y, splits = get_UCR_data(dsid, on_disk=True, split_data=False)
dls = get_ts_dls(X, y, tfms=[None, Categorize()], splits=splits)
dls.show_dist()
dls.train.show_dist()
xb,yb = first(dls.train)
test_eq((dls.cat, dls.c), (True, 4))
test_ne(dls.cws.numpy(), None)
dls.decoder((xb, ))
dls.decoder((xb[0], ))
dls.decoder((xb, yb))
dls.decoder((xb[0], yb[0]))
dls.decoder(yb)
dls.decoder(yb[0])
dls.cws
new_dl = dls.new_dl(X)
first(new_dl)
new_dl = dls.new_dl(X, y=y)
first(new_dl)
dsid = 'OliveOil'
X, y, splits = get_UCR_data(dsid, on_disk=True, split_data=False)
dls = get_ts_dls(X, np.random.rand(60, ), tfms=[None, ToNumpyTensor], splits=splits)
dls.show_dist()
dls.train.show_dist()
xb,yb = first(dls.train)
dls.decoder((xb, ))
dls.decoder((xb[0], ))
dls.decoder((xb, yb))
dls.decoder((xb[0], yb[0]))
dls.decoder(yb)
dls.decoder(yb[0])
test_eq((dls.cat, dls.c), (False, 1))
test_eq(dls.cws, None)
dsid = 'OliveOil'
X, y, splits = get_UCR_data(dsid, on_disk=True, split_data=False)
dls = get_ts_dls(X, np.random.rand(60, 3) * 5, tfms=[None, ToNumpyTensor], splits=splits)
dls.show_dist()
dls.train.show_dist()
xb,yb = first(dls.train)
dls.decoder((xb, ))
dls.decoder((xb[0], ))
dls.decoder((xb, yb))
dls.decoder((xb[0], yb[0]))
dls.decoder(yb)
dls.decoder(yb[0])
test_eq((dls.cat, dls.c, dls.d),(False, 3, 3))
test_eq(dls.cws, None)
dsid = 'OliveOil'
X, y, splits = get_UCR_data(dsid, on_disk=True, split_data=False)
dls = get_ts_dls(X, np.repeat(y_array.reshape(-1,1), 3, 1), tfms=[None, MultiCategorize()], splits=splits)
dls.show_dist()
dls.train.show_dist()
xb,yb = first(dls.train)
dls.decoder((xb, ))
dls.decoder((xb[0], ))
dls.decoder((xb, yb))
dls.decoder((xb[0], yb[0]))
dls.decoder(yb)
dls.decoder(yb[0])
test_eq((dls.cat, dls.c), (True, 4))
test_ne(dls.cws.numpy(), None)
bs = 25
dsets = TSDatasets(X, y, tfms=[None, Categorize()], splits=splits, inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2], batch_tfms=add(1), num_workers=0)
xb,yb = dls.train.one_batch()
test_eq(xb.data, tensor(X_on_disk[splits[0]][dls.train.idxs]) + 1)
dsets = TSDatasets(X, y, tfms=[None, Categorize()], splits=splits, inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
xb,yb = dls.train.one_batch()
test_eq(xb.shape, (min(bs, len(splits[0])), X.shape[1], X.shape[-1]))
it = iter(dls.valid)
for xb,yb in it:
test_close(xb, TSTensor(X[splits[1]][dls.valid.idxs]))
bs = 64
dsets = TSDatasets(X, y, tfms=[add(1), Categorize()], splits=RandomSplitter(valid_pct=.3)(y_array), inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
xb,yb = dls.train.one_batch()
test_eq(xb.shape, (min(bs, len(dsets.train)), X_on_disk.shape[1], X_on_disk.shape[-1]))
xb,yb = dls.valid.one_batch()
test_eq(xb.shape, (min(bs*2, len(dsets.valid)), X_on_disk.shape[1], X_on_disk.shape[-1]))
dsets = TSDatasets(X_on_disk, y_array, tfms=[None, Categorize()], splits=splits, inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[32, 64])
for i in range(100):
dl = dls.train if random.random() < .5 else dls.valid
xb,yb = dl.one_batch()
torch.equal(xb, TSTensor(X_on_disk[dl.input_idxs]))
dsets = TSDatasets(X_on_disk, y_array, tfms=[None, Categorize()], inplace=True)
dls = TSDataLoaders.from_dsets(dsets, bs=32)
for i in range(100):
xb,yb = dls.one_batch()
torch.equal(xb, TSTensor(X_on_disk[dl.input_idxs]))
dsets = TSDatasets(X_on_disk, tfms=None, inplace=True)
dls = TSDataLoaders.from_dsets(dsets, bs=32)
for i in range(100):
xb = dls.one_batch()
torch.equal(xb[0], TSTensor(X_on_disk[dl.input_idxs]))
dsets = TSDatasets(X_on_disk, y_array, tfms=[None, Categorize()], inplace=True)
dls = TSDataLoaders.from_dsets(dsets, bs=32)
test_eq_type(dls.split_idxs, L(np.arange(len(X_on_disk)).tolist()))
X, y, splits = get_UCR_data('NATOPS', return_split=False)
tfms = [None, [Categorize()]]
dls = get_ts_dls(X, y, tfms=tfms, splits=splits, bs=[64, 128])
dls.show_batch()
dls.show_dist()
dsid = 'NATOPS'
bs = 64
X, y, splits = get_UCR_data(dsid, return_split=False)
y = [str(z) for z in y]
vocab = sorted(set(y))
tfms = [None, [Categorize(vocab=vocab)]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
bs = 64
n_epochs = 100
tfms = [None, [MultiCategorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=bs)
first(dls.train)