Main Tabular functions used throughout the library. This is helpful when you have additional time series data like metadata, time series features, etc.
get_tabular_dls[source]
get_tabular_dls(df,procs=[<class 'fastai.tabular.core.Categorify'>, <class 'fastai.tabular.core.FillMissing'>, <class 'fastai.data.transforms.Normalize'>],cat_names=None,cont_names=None,y_names=None,y_block=None,splits=None,do_setup=True,inplace=False,reduce_memory=True,device=None,path='.')
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
# df['salary'] = np.random.rand(len(df)) # uncomment to simulate a cont dependent variable
cat_names = ['workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
'capital-gain', 'capital-loss', 'native-country']
cont_names = ['age', 'fnlwgt', 'hours-per-week']
target = ['salary']
splits = RandomSplitter()(range_of(df))
dls = get_tabular_dls(df, cat_names=cat_names, cont_names=cont_names, y_names='salary', splits=splits, bs=512)
dls.show_batch()
| workclass | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | native-country | age | fnlwgt | hours-per-week | salary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Private | 1st-4th | 2 | Separated | Machine-op-inspct | Own-child | White | Female | 0 | 0 | Guatemala | 46.000000 | 324601.005084 | 40.000000 | <50k |
| 1 | Federal-gov | Bachelors | 13 | Married-civ-spouse | Adm-clerical | Husband | Asian-Pac-Islander | Male | 7298 | 0 | Philippines | 37.000000 | 22200.998857 | 40.000000 | >=50k |
| 2 | Private | Assoc-voc | 11 | Divorced | Transport-moving | Not-in-family | White | Male | 0 | 0 | United-States | 31.000000 | 272069.002516 | 40.000000 | <50k |
| 3 | Private | Some-college | 10 | Never-married | Adm-clerical | Not-in-family | White | Female | 0 | 0 | United-States | 31.000000 | 315128.005662 | 52.000000 | <50k |
| 4 | State-gov | Masters | 14 | Divorced | Adm-clerical | Not-in-family | White | Female | 0 | 0 | United-States | 38.000000 | 34364.005551 | 40.000000 | <50k |
| 5 | Private | Some-college | 10 | Never-married | Other-service | Not-in-family | White | Female | 0 | 0 | United-States | 31.000000 | 264935.998182 | 40.000000 | <50k |
| 6 | Self-emp-inc | Some-college | 10 | Married-civ-spouse | Sales | Husband | White | Male | 0 | 0 | United-States | 56.999999 | 244604.997183 | 50.000000 | >=50k |
| 7 | Private | Assoc-acdm | 12 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0 | 0 | United-States | 44.000000 | 174372.999302 | 40.000000 | >=50k |
| 8 | Private | Assoc-voc | 11 | Divorced | Sales | Own-child | White | Female | 594 | 0 | United-States | 26.000000 | 144482.999550 | 35.000000 | <50k |
| 9 | Private | 9th | 5 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 0 | 0 | ? | 47.000000 | 209212.000325 | 55.999999 | <50k |
metrics = mae if dls.c == 1 else accuracy
learn = tabular_learner(dls, layers=[200, 100], y_range=None, metrics=metrics)
learn.fit(1, 1e-2)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 0.303775 | 0.308885 | 0.855498 | 00:06 |
learn.dls.one_batch()
(tensor([[ 5, 12, 0, ..., 1, 1, 40],
[ 5, 2, 7, ..., 1, 1, 40],
[ 5, 1, 6, ..., 1, 1, 40],
...,
[ 5, 16, 10, ..., 1, 1, 40],
[ 5, 10, 13, ..., 1, 26, 40],
[ 5, 16, 10, ..., 1, 1, 40]]),
tensor([[ 0.6916, -1.6040, -0.2802],
[-1.5817, -0.5386, -1.9857],
[ 1.2783, -1.4951, -0.0365],
...,
[ 2.0116, 0.4297, 0.7757],
[-1.1417, -1.2915, -2.3106],
[ 0.6183, 0.0344, 1.5878]]),
tensor([[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[1],
[1],
[1],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[1],
[0],
[1],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[1],
[0],
[1],
[0],
[1],
[1],
[1],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[0],
[1],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[1],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[1],
[1],
[1],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[1],
[0],
[0],
[1],
[0],
[1],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[1],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[1],
[1],
[0],
[1],
[1],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[0],
[0],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0],
[1],
[1],
[1],
[0],
[1],
[0],
[1],
[0],
[0],
[0],
[1],
[1],
[1],
[0],
[1],
[0],
[0],
[0],
[0],
[0]], dtype=torch.int8))
learn.model
TabularModel(
(embeds): ModuleList(
(0): Embedding(10, 6)
(1): Embedding(17, 8)
(2): Embedding(17, 8)
(3): Embedding(8, 5)
(4): Embedding(16, 8)
(5): Embedding(7, 5)
(6): Embedding(6, 4)
(7): Embedding(3, 3)
(8): Embedding(119, 23)
(9): Embedding(90, 20)
(10): Embedding(43, 13)
)
(emb_drop): Dropout(p=0.0, inplace=False)
(bn_cont): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(layers): Sequential(
(0): LinBnDrop(
(0): BatchNorm1d(106, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=106, out_features=200, bias=False)
(2): ReLU(inplace=True)
)
(1): LinBnDrop(
(0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=200, out_features=100, bias=False)
(2): ReLU(inplace=True)
)
(2): LinBnDrop(
(0): Linear(in_features=100, out_features=2, bias=True)
)
)
)