DataLoader than can take data from multiple dataloaders with different types of data

class MixedDataLoader[source]

MixedDataLoader(*loaders, path='.', shuffle=False, device=None, bs=None)

class MixedDataLoaders[source]

MixedDataLoaders(*loaders, path='.', device=None) :: DataLoaders

Basic wrapper around several DataLoaders.

get_mixed_dls[source]

get_mixed_dls(*dls, device=None, shuffle_train=True, **kwargs)

from tsai.data.tabular import *

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
# df['salary'] = np.random.rand(len(df)) # uncomment to simulate a cont dependent variable
target = 'salary'
splits = RandomSplitter()(range_of(df))

cat_names = ['workclass', 'education', 'marital-status']
cont_names = ['age', 'fnlwgt']
dls1 = get_tabular_dls(df, cat_names=cat_names, cont_names=cont_names, y_names=target, splits=splits, bs=512)
dls1.show_batch()

cat_names = None #['occupation', 'relationship', 'race']
cont_names = ['education-num']
dls2 = get_tabular_dls(df, cat_names=cat_names, cont_names=cont_names, y_names=target, splits=splits, bs=128)
dls2.show_batch()
workclass education marital-status age fnlwgt salary
0 Private 7th-8th Married-civ-spouse 43.000000 316182.999841 <50k
1 Federal-gov Bachelors Never-married 35.000000 185052.999815 <50k
2 Self-emp-inc Some-college Married-civ-spouse 49.000000 143482.000349 >=50k
3 Private HS-grad Never-married 27.000000 215873.000615 <50k
4 Private Prof-school Never-married 45.000000 327886.000393 >=50k
5 Private Assoc-voc Married-civ-spouse 39.000000 119097.997807 <50k
6 Self-emp-inc Assoc-acdm Married-civ-spouse 62.999999 96930.003163 >=50k
7 Private Some-college Married-civ-spouse 45.000000 252078.998013 >=50k
8 Private Some-college Divorced 46.000000 169953.000109 <50k
9 ? HS-grad Never-married 39.000000 103985.999645 <50k
education-num_na education-num salary
0 False 9.0 <50k
1 False 9.0 <50k
2 False 13.0 >=50k
3 False 10.0 <50k
4 False 9.0 <50k
5 False 14.0 >=50k
6 False 13.0 <50k
7 False 9.0 <50k
8 False 10.0 >=50k
9 False 9.0 <50k
dls = get_mixed_dls(dls1, dls2, bs=8)
first(dls.train)
first(dls.valid)
torch.save(dls,'export/mixed_dls.pth')
del dls
dls = torch.load('export/mixed_dls.pth')
dls.train.show_batch()
workclass education marital-status age fnlwgt salary
0 Private 9th Never-married 18.999999 175081.000655 <50k
1 ? 10th Separated 19.999999 114813.000655 <50k
2 Private 12th Never-married 37.000000 301567.998732 <50k
3 Private Assoc-voc Divorced 49.000000 156925.999686 >=50k
4 State-gov Some-college Separated 52.000000 303461.998782 <50k
5 Private 10th Never-married 24.000000 280134.002665 <50k
6 Private Bachelors Married-civ-spouse 37.000000 105021.002051 >=50k
7 Private 10th Married-civ-spouse 64.000000 180401.000212 >=50k
education-num_na education-num salary
0 False 5.0 <50k
1 False 6.0 <50k
2 False 8.0 <50k
3 False 11.0 >=50k
4 False 10.0 <50k
5 False 6.0 <50k
6 False 13.0 >=50k
7 False 6.0 >=50k
xb, yb = first(dls.train)
xb
((tensor([[ 5,  7,  5],
          [ 1,  1,  6],
          [ 5,  3,  5],
          [ 5,  9,  1],
          [ 8, 16,  6],
          [ 5,  1,  5],
          [ 5, 10,  3],
          [ 5,  1,  3]]),
  tensor([[-1.4364, -0.1444],
          [-1.3631, -0.7122],
          [-0.1165,  1.0471],
          [ 0.7635, -0.3155],
          [ 0.9835,  1.0649],
          [-1.0698,  0.8452],
          [-0.1165, -0.8044],
          [ 1.8635, -0.0943]])),
 (tensor([[1],
          [1],
          [1],
          [1],
          [1],
          [1],
          [1],
          [1]]),
  tensor([[-1.9840],
          [-1.5930],
          [-0.8109],
          [ 0.3621],
          [-0.0289],
          [-1.5930],
          [ 1.1442],
          [-1.5930]])))
xs, ys = first(dls.train)
xs[0][0].shape, xs[0][1].shape, xs[1][0].shape, xs[1][1].shape
(torch.Size([8, 3]),
 torch.Size([8, 2]),
 torch.Size([8, 1]),
 torch.Size([8, 1]))