Helper functions used throughout the library not related to timeseries data.
fns = ['data', 'export', 'models']
for fn in fns: 
    path = Path('.')/fn
    if not os.path.exists(path): os.makedirs(path)

totensor[source]

totensor(o)

toarray[source]

toarray(o)

toL[source]

toL(o)

to3dtensor[source]

to3dtensor(o)

to2dtensor[source]

to2dtensor(o)

to1dtensor[source]

to1dtensor(o)

to3darray[source]

to3darray(o)

to2darray[source]

to2darray(o)

to1darray[source]

to1darray(o)

to3d[source]

to3d(o)

to2d[source]

to2d(o)

to1d[source]

to1d(o)

to2dPlus[source]

to2dPlus(o)

to3dPlus[source]

to3dPlus(o)

to2dPlusTensor[source]

to2dPlusTensor(o)

to2dPlusArray[source]

to2dPlusArray(o)

to3dPlusTensor[source]

to3dPlusTensor(o)

to3dPlusArray[source]

to3dPlusArray(o)

todtype[source]

todtype(dtype)

a = np.random.rand(100).astype(np.float32)
b = torch.from_numpy(a).float()
test_eq(totensor(a), b)
test_eq(a, toarray(b))
test_eq(to3dtensor(a).ndim, 3)
test_eq(to2dtensor(a).ndim, 2)
test_eq(to1dtensor(a).ndim, 1)
test_eq(to3darray(b).ndim, 3)
test_eq(to2darray(b).ndim, 2)
test_eq(to1darray(b).ndim, 1)

bytes2size[source]

bytes2size(size_bytes)

bytes2GB[source]

bytes2GB(byts)

get_size[source]

get_size(o, return_str=False)

a = np.random.rand(10, 5, 3)
test_eq(get_size(a), 1328)
test_eq(get_size(a, True), '1.3 KB')

delete_all_in_dir[source]

delete_all_in_dir(tgt_dir, exception=None)

reverse_dict[source]

reverse_dict(dictionary)

is_tuple[source]

is_tuple(o)

itemify[source]

itemify(*o, tup_id=None)

a = [1, 2, 3]
b = [4, 5, 6]
print(itemify(a, b))
test_eq(len(itemify(a, b)), len(a))
a = [1, 2, 3]
b = None
print(itemify(a, b))
test_eq(len(itemify(a, b)), len(a))
a = [1, 2, 3]
b = [4, 5, 6]
c = None
print(itemify(a, b, c))
test_eq(len(itemify(a, b, c)), len(a))
[(1, 4), (2, 5), (3, 6)]
[(1,), (2,), (3,)]
[(1, 4), (2, 5), (3, 6)]

isnone[source]

isnone(o)

exists[source]

exists(o)

ifelse[source]

ifelse(a, b, c)

b if a is True else c

a = np.array(3)
test_eq(isnone(a), False)
test_eq(exists(a), True)
b = None
test_eq(isnone(b), True)
test_eq(exists(b), False)

is_not_close[source]

is_not_close(a, b, eps=1e-05)

Is a within eps of b

test_not_close[source]

test_not_close(a, b, eps=1e-05)

test that a is within eps of b

test_type[source]

test_type(a, b)

test_ok[source]

test_ok(f, *args, **kwargs)

test_not_ok[source]

test_not_ok(f, *args, **kwargs)

test_error[source]

test_error(error, f, *args, **kwargs)

assert_fn[source]

assert_fn(*args, **kwargs)

test_gt[source]

test_gt(a, b)

test that a>b

test_ge[source]

test_ge(a, b)

test that a>=b

test_lt[source]

test_lt(a, b)

test that a>b

test_le[source]

test_le(a, b)

test that a>b

test_ok(test_gt, 5, 4)
test_not_ok(test_gt, 4, 4)
test_ok(test_ge, 4, 4)
test_not_ok(test_ge, 3, 4)

test_ok(test_lt, 3, 4)
test_not_ok(test_lt, 4, 4)
test_ok(test_le, 4, 4)
test_not_ok(test_le, 5, 4)

stack[source]

stack(o, axis=0, retain=True)

stack_pad[source]

stack_pad(l)

a = [[0,1,2], [4,5,6,7]]
test_eq(stack_pad(a).shape, (2, 4))
test_eq(type(stack_pad(a)), np.ndarray)
a = np.random.rand(2, 3, 4)
t = torch.from_numpy(a)
test_eq_type(stack(itemify(a, tup_id=0)), a)
test_eq_type(stack(itemify(t, tup_id=0)), t)

match_seq_len[source]

match_seq_len(*arrays)

a = np.random.rand(10, 5, 8)
b = np.random.rand(3, 5, 10)
c, d = match_seq_len(a, b)
test_eq(c.shape[-1], d.shape[-1])

random_shuffle[source]

random_shuffle(o, random_state=None)

a = np.arange(10)
test_eq_type(random_shuffle(a, 1), np.array([2, 9, 6, 4, 0, 3, 1, 7, 8, 5]))
t = torch.arange(10)
test_eq_type(random_shuffle(t, 1), tensor([2, 9, 6, 4, 0, 3, 1, 7, 8, 5]))
l = list(a)
test_eq(random_shuffle(l, 1), [2, 9, 6, 4, 0, 3, 1, 7, 8, 5])
l2 = L(l)
test_eq_type(random_shuffle(l2, 1), L([2, 9, 6, 4, 0, 3, 1, 7, 8, 5]))

cat2int[source]

cat2int(o)

a = np.array(['b', 'a', 'a', 'b', 'a', 'b', 'a'])
test_eq_type(cat2int(a), TensorCategory([1, 0, 0, 1, 0, 1, 0]))
TensorBase([1,2,3])
TensorBase([1, 2, 3])

cycle_dl[source]

cycle_dl(dl)

cycle_dl_to_device[source]

cycle_dl_to_device(dl)

cache_memmap[source]

cache_memmap(o, slice_len=1000, verbose=False)

get_func_defaults[source]

get_func_defaults(f)

get_idx_from_df_col_vals[source]

get_idx_from_df_col_vals(df, col, val_list)

get_sublist_idxs[source]

get_sublist_idxs(aList, bList)

Get idxs that when applied to aList will return bList. aList must contain all values in bList

x = np.array([3, 5, 7, 1, 9, 8, 6, 2])
y = np.array([6, 1, 5, 7])
idx = get_sublist_idxs(x, y)
test_eq(x[idx], y)
x = np.array([3, 5, 7, 1, 9, 8, 6, 6, 2])
y = np.array([6, 1, 5, 7, 5])
idx = get_sublist_idxs(x, y)
test_eq(x[idx], y)

flatten_list[source]

flatten_list(l)

display_pd_df[source]

display_pd_df(df, max_rows:Union[bool, int]=False, max_columns:Union[bool, int]=False)

old_max_rows, old_max_columns = pd.get_option('display.max_rows'), pd.get_option('display.max_columns')
df = pd.DataFrame(np.random.rand(70, 25))
display_pd_df(df, max_rows=2, max_columns=3)
test_eq(old_max_rows, pd.get_option('display.max_rows'))
test_eq(old_max_columns, pd.get_option('display.max_columns'))
0 ... 24
0 0.932162 ... 0.103960
... ... ... ...
69 0.776789 ... 0.917613

70 rows × 25 columns

ttest[source]

ttest(data1, data2, equal_var=False)

Calculates t-statistic and p-value based on 2 sample distributions

tscore[source]

tscore(o)

a = np.random.normal(0.5, 1, 100)
b = np.random.normal(0.15, .5, 50)
plt.hist(a, 50)
plt.hist(b, 50)
plt.show()
ttest(a,b)
(2.330567862453566, 0.021126920613600466)
a = np.random.normal(0.5, 1, 100)
t = torch.normal(0.5, 1, (100, ))
tscore(a), tscore(t)
(3.6768570476232756, tensor(6.5903))

ttest_tensor[source]

ttest_tensor(a, b)

differentiable pytorch function equivalent to scipy.stats.ttest_ind with equal_var=False

a = torch.rand(100).requires_grad_(True) + .1
b = torch.rand(100).requires_grad_(True)
ttest_tensor(a, b)
tensor(2.8642, grad_fn=<DivBackward0>)

pcc[source]

pcc(a, b)

scc[source]

scc(a, b)

(-0.00795960702650289, 0.005004500450045004)

remove_fn[source]

remove_fn(fn, verbose=False)

Removes a file (fn) if exists

npsave[source]

npsave(array_fn, array, verbose=True)

fn = 'data/remove_fn_test.npy'
a = np.zeros(1)
npsave(fn, a)
del a
np.load(fn, mmap_mode='r+')
remove_fn(fn, True)
remove_fn(fn, True)
data/remove_fn_test.npy does not exist
saving data/remove_fn_test.npy...
...data/remove_fn_test.npy saved
data/remove_fn_test.npy file removed
data/remove_fn_test.npy does not exist

permute_2D[source]

permute_2D(array, axis=None)

Permute rows or columns in an array. This can be used, for example, in feature permutation

s = np.arange(100 * 50).reshape(100, 50) 
test_eq(permute_2D(s, axis=0).mean(0), s.mean(0))
test_ne(permute_2D(s, axis=0), s)
test_eq(permute_2D(s, axis=1).mean(1), s.mean(1))
test_ne(permute_2D(s, axis=1), s)
test_ne(permute_2D(s), s)

random_normal[source]

random_normal()

Returns a number between -1 and 1 with a normal distribution

random_half_normal[source]

random_half_normal()

Returns a number between 0 and 1 with a half-normal distribution

random_normal_tensor[source]

random_normal_tensor(shape=1, device=None)

Returns a tensor of a predefined shape between -1 and 1 with a normal distribution

random_half_normal_tensor[source]

random_half_normal_tensor(shape=1, device=None)

Returns a tensor of a predefined shape between 0 and 1 with a half-normal distribution

default_dpi[source]

default_dpi()

get_plot_fig[source]

get_plot_fig(size=None, dpi=72)

fig2buf[source]

fig2buf(fig)

default_dpi()
72

plot_scatter[source]

plot_scatter(x, y, deg=1)

a = np.random.rand(100)
b = np.random.rand(100)**2
plot_scatter(a, b)

get_idxs[source]

get_idxs(o, aList)

a = random_shuffle(np.arange(100, 200))
b = np.random.choice(a, 10, False)
idxs = get_idxs(a, b)
test_eq(a[idxs], b)

apply_cmap[source]

apply_cmap(o, cmap)

a = np.random.rand(16, 1, 40, 50)
s = L(a.shape)
s[1] = 3
test_eq(L(apply_cmap(a, 'viridis').shape), s)

s[0] = 1
a = np.random.rand(1, 40, 50)
test_eq(L(apply_cmap(a, 'viridis').shape), s)

torch_tile[source]

torch_tile(a, n_tile, dim=0)

test_eq(torch_tile(torch.arange(2), 3), tensor([0, 0, 0, 1, 1, 1]))

to_tsfresh_df[source]

to_tsfresh_df(ts)

Prepares a time series (Tensor/ np.ndarray) to be used as a tsfresh dataset to allow feature extraction

ts = torch.rand(16, 3, 20)
a = to_tsfresh_df(ts)
ts = ts.numpy()
b = to_tsfresh_df(ts)

pcorr[source]

pcorr(a, b)

scorr[source]

scorr(a, b)

torch_diff[source]

torch_diff(t, lag=1, pad=True)

t = torch.arange(24).reshape(2,3,4)
test_eq(torch_diff(t, 1)[..., 1:].float().mean(), 1.)
test_eq(torch_diff(t, 2)[..., 2:].float().mean(), 2.)

get_outliers_IQR[source]

get_outliers_IQR(o, axis=None)

clip_outliers[source]

clip_outliers(o, axis=None)

get_percentile[source]

get_percentile(o, percentile, axis=None)

torch_clamp[source]

torch_clamp(o, min=None, max=None)

Clamp torch.Tensor using 1 or multiple dimensions

t = torch.randn(2,3,100)
test_eq(type(get_outliers_IQR(t, -1)[0]), torch.Tensor)
a = np.random.randn(2,3,100)
test_eq(type(get_outliers_IQR(a, -1)[0]), np.ndarray)

torch_slice_by_dim[source]

torch_slice_by_dim(t, index, dim=-1, **kwargs)

t = torch.rand(5, 3)
index = torch.randint(0, 3, (5, 1))
# index = [[0, 2], [0, 1], [1, 2], [0, 2], [0, 1]]
torch_slice_by_dim(t, index)
tensor([[0.0675],
        [0.4854],
        [0.9020],
        [0.6969],
        [0.4382]])

torch_nanmean[source]

torch_nanmean(o, dim=None, keepdim=False)

There's currently no torch.nanmean function

torch_nanstd[source]

torch_nanstd(o, dim=None, keepdim=False)

There's currently no torch.nanstd function

t = torch.rand(1000)
t[:100] = float('nan')
assert torch_nanmean(t).item() > 0

concat[source]

concat(colls)

Concatenate all collections in colls

reduce_memory_usage[source]

reduce_memory_usage(df)

cls_name[source]

cls_name(o)

test_eq(cls_name(timer), 'Timer')

roll2d[source]

roll2d(o, roll1:Union[NoneType, list, int]=None, roll2:Union[NoneType, list, int]=None)

Rolls a 2D object on the indicated axis This solution is based on https://stackoverflow.com/questions/20360675/roll-rows-of-a-matrix-independently

roll3d[source]

roll3d(o, roll1:Union[NoneType, list, int]=None, roll2:Union[NoneType, list, int]=None, roll3:Union[NoneType, list, int]=None)

Rolls a 3D object on the indicated axis This solution is based on https://stackoverflow.com/questions/20360675/roll-rows-of-a-matrix-independently

random_roll2d[source]

random_roll2d(o, axis=())

Rolls a 2D object on the indicated axis This solution is based on https://stackoverflow.com/questions/20360675/roll-rows-of-a-matrix-independently

random_roll3d[source]

random_roll3d(o, axis=(), replace=False)

Randomly rolls a 3D object along the indicated axes This solution is based on https://stackoverflow.com/questions/20360675/roll-rows-of-a-matrix-independently

a = np.tile(np.arange(10), 3).reshape(3, 10) * np.array([1, 10, 100]).reshape(-1, 1)
a
array([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9],
       [  0,  10,  20,  30,  40,  50,  60,  70,  80,  90],
       [  0, 100, 200, 300, 400, 500, 600, 700, 800, 900]])
roll2d(a, roll1=[2, 1, 0])
array([[  0, 100, 200, 300, 400, 500, 600, 700, 800, 900],
       [  0,  10,  20,  30,  40,  50,  60,  70,  80,  90],
       [  0,   1,   2,   3,   4,   5,   6,   7,   8,   9]])
roll2d(a, roll2=3)
array([[  7,   8,   9,   0,   1,   2,   3,   4,   5,   6],
       [ 70,  80,  90,   0,  10,  20,  30,  40,  50,  60],
       [700, 800, 900,   0, 100, 200, 300, 400, 500, 600]])

create_empty_array[source]

create_empty_array(shape, fname=None, path='./data', on_disk=True, dtype='float32', mode='r+', **kwargs)

mode: ‘r’: Open existing file for reading only. ‘r+’: Open existing file for reading and writing. ‘w+’: Create or overwrite existing file for reading and writing. ‘c’: Copy-on-write: assignments affect data in memory, but changes are not saved to disk. The file on disk is read-only.

fname = 'X_on_disk'
shape = (100, 10, 10)
X = create_empty_array(shape, fname, on_disk=True, mode='r+')

chunksize = 10
pbar = progress_bar(range(math.ceil(len(X) / chunksize)), leave=False)
start = 0
for i in pbar: 
    end = min(start + chunksize, len(X))
    partial_data = np.random.rand(end - start, X.shape[1] , X.shape[2])
    X[start:end] = partial_data
    start = end
    del partial_data
    gc.collect()
filename = X.filename
del X
X = np.load(filename, mmap_mode='r+')
test_eq((X == 0).sum(), 0)
test_eq(X.shape, shape)
os.remove(X.filename)

np_save_compressed[source]

np_save_compressed(arr, fname=None, path='./data', verbose=False, **kwargs)

np_load_compressed[source]

np_load_compressed(fname=None, path='./data', **kwargs)

X1 = np.random.rand(10)
np_save_compressed(X1, 'X_comp', path='./data')
X2 = np_load_compressed('X_comp')
test_eq(X1, X2)

np2memmap[source]

np2memmap(arr, fname=None, path='./data', dtype='float32', mode='c', **kwargs)

Function that turns an ndarray into a memmap ndarray mode: ‘r’: Open existing file for reading only. ‘r+’: Open existing file for reading and writing. ‘w+’: Create or overwrite existing file for reading and writing. ‘c’: Copy-on-write: assignments affect data in memory, but changes are not saved to disk. The file on disk is read-only.

X1 = np.random.rand(10)
X2 = np2memmap(X1, 'X1_test')
test_eq(X1, X2)
test_ne(type(X1), type(X2))

torch_mean_groupby[source]

torch_mean_groupby(o, idxs)

Computes torch mean along axis 0 grouped by the idxs. Need to ensure that idxs have the same order as o

o = torch.arange(6*2*3).reshape(6, 2, 3).float()
idxs = np.array([[0,1,2,3], [2,3]], dtype=object)
output = torch_mean_groupby(o, idxs)
test_eq(o[:2], output[:2])
test_eq(o[2:4].mean(0), output[2])
test_eq(o[4:6].mean(0), output[3])

torch_flip[source]

torch_flip(t, dims=-1)

t = torch.randn(2, 3, 4)
test_eq(torch.flip(t, (2,)), torch_flip(t, dims=-1))

torch_nan_to_num[source]

torch_nan_to_num(o, num=0, inplace=False)

torch_masked_to_num[source]

torch_masked_to_num(o, mask, num=0, inplace=False)

x = torch.rand(2, 4, 6)
x[:, :3][x[:, :3] < .5] = np.nan
nan_values = torch.isnan(x).sum()
y = torch_nan_to_num(x[:, :3], inplace=False)
test_eq(torch.isnan(y).sum(), 0)
test_eq(torch.isnan(x).sum(), nan_values)
torch_nan_to_num(x[:, :3], inplace=True)
test_eq(torch.isnan(x).sum(), 0)
x = torch.rand(2, 4, 6)
mask = x[:, :3] > .5
x[:, :3] = torch_masked_to_num(x[:, :3], mask, num=0, inplace=False)
test_eq(x[:, :3][mask].sum(), 0)
x = torch.rand(2, 4, 6)
mask = x[:, :3] > .5
torch_masked_to_num(x[:, :3], mask, num=0, inplace=True)
test_eq(x[:, :3][mask].sum(), 0)