- Input: :math:`(N, C, W_{in})`
- Output: :math:`(N, C, W_{out})` where

  :math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`

>>> m = nn.ConstantPad1d(2, 3.5)
>>> input = torch.randn(1, 2, 4)
>>> input
tensor([[[-1.0491, -0.7152, -0.0749,  0.8530],
         [-1.3287,  1.8966,  0.1466, -0.2771]]])
>>> m(input)
tensor([[[ 3.5000,  3.5000, -1.0491, -0.7152, -0.0749,  0.8530,  3.5000,
           3.5000],
         [ 3.5000,  3.5000, -1.3287,  1.8966,  0.1466, -0.2771,  3.5000,
           3.5000]]])
>>> m = nn.ConstantPad1d(2, 3.5)
>>> input = torch.randn(1, 2, 3)
>>> input
tensor([[[ 1.6616,  1.4523, -1.1255],
         [-3.6372,  0.1182, -1.8652]]])
>>> m(input)
tensor([[[ 3.5000,  3.5000,  1.6616,  1.4523, -1.1255,  3.5000,  3.5000],
         [ 3.5000,  3.5000, -3.6372,  0.1182, -1.8652,  3.5000,  3.5000]]])
>>> # using different paddings for different sides
>>> m = nn.ConstantPad1d((3, 1), 3.5)
>>> m(input)
tensor([[[ 3.5000,  3.5000,  3.5000,  1.6616,  1.4523, -1.1255,  3.5000],
         [ 3.5000,  3.5000,  3.5000, -3.6372,  0.1182, -1.8652,  3.5000]]])

init_linear(Conv1dSame(2, 3, 3), None, init='auto', bias_std=.01)

bs = 2
c_in = 3
c_out = 5
seq_len = 6
t = torch.rand(bs, c_in, seq_len)
test_eq(Conv1dSame(c_in, c_out, ks=3, stride=1, dilation=1, bias=False)(t).shape, (bs, c_out, seq_len))
test_eq(Conv1dSame(c_in, c_out, ks=3, stride=1, dilation=2, bias=False)(t).shape, (bs, c_out, seq_len))
test_eq(Conv1dSame(c_in, c_out, ks=3, stride=2, dilation=1, bias=False)(t).shape, (bs, c_out, seq_len//2))
test_eq(Conv1dSame(c_in, c_out, ks=3, stride=2, dilation=2, bias=False)(t).shape, (bs, c_out, seq_len//2))

- Input: :math:`(N, C, H_{in}, W_{in})`
- Output: :math:`(N, C, H_{out}, W_{out})` where

  :math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`

  :math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`

>>> m = nn.ConstantPad2d(2, 3.5)
>>> input = torch.randn(1, 2, 2)
>>> input
tensor([[[ 1.6585,  0.4320],
         [-0.8701, -0.4649]]])
>>> m(input)
tensor([[[ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  1.6585,  0.4320,  3.5000,  3.5000],
         [ 3.5000,  3.5000, -0.8701, -0.4649,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000,  3.5000]]])
>>> # using different paddings for different sides
>>> m = nn.ConstantPad2d((3, 0, 2, 1), 3.5)
>>> m(input)
tensor([[[ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000],
         [ 3.5000,  3.5000,  3.5000,  1.6585,  0.4320],
         [ 3.5000,  3.5000,  3.5000, -0.8701, -0.4649],
         [ 3.5000,  3.5000,  3.5000,  3.5000,  3.5000]]])

bs = 2
c_in = 3
c_out = 5
h = 16
w = 20
t = torch.rand(bs, c_in, h, w)
test_eq(Conv2dSame(c_in, c_out, ks=3, stride=1, dilation=1, bias=False)(t).shape, (bs, c_out, h, w))
test_eq(Conv2dSame(c_in, c_out, ks=(3, 1), stride=1, dilation=1, bias=False)(t).shape, (bs, c_out, h, w))
test_eq(Conv2dSame(c_in, c_out, ks=3, stride=(1, 1), dilation=(2, 2), bias=False)(t).shape, (bs, c_out, h, w))
test_eq(Conv2dSame(c_in, c_out, ks=3, stride=(2, 2), dilation=(1, 1), bias=False)(t).shape, (bs, c_out, h//2, w//2))
test_eq(Conv2dSame(c_in, c_out, ks=3, stride=(2, 2), dilation=(2, 2), bias=False)(t).shape, (bs, c_out, h//2, w//2))
test_eq(Conv2d(c_in, c_out, ks=3, padding='same', stride=1, dilation=1, bias=False)(t).shape, (bs, c_out, h, w))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

init_linear(Conv1dCausal(2, 3, 3), None, init='auto', bias_std=.01)

bs = 2
c_in = 3
c_out = 5
seq_len = 512
t = torch.rand(bs, c_in, seq_len)
dilation = 1
test_eq(Conv1dCausal(c_in, c_out, ks=3, dilation=dilation)(t).shape, Conv1dSame(c_in, c_out, ks=3, dilation=dilation)(t).shape)
dilation = 2
test_eq(Conv1dCausal(c_in, c_out, ks=3, dilation=dilation)(t).shape, Conv1dSame(c_in, c_out, ks=3, dilation=dilation)(t).shape)

bs = 2
ni = 3
nf = 5
seq_len = 6
ks = 3
t = torch.rand(bs, c_in, seq_len)
test_eq(Conv1d(ni, nf, ks, padding=0)(t).shape, (bs, c_out, seq_len - (2 * (ks//2))))
test_eq(Conv1d(ni, nf, ks, padding='valid')(t).shape, (bs, c_out, seq_len - (2 * (ks//2))))
test_eq(Conv1d(ni, nf, ks, padding='same')(t).shape, (bs, c_out, seq_len))
test_eq(Conv1d(ni, nf, ks, padding='causal')(t).shape, (bs, c_out, seq_len))
test_error('use kernel_size or ks but not both simultaneously', Conv1d, ni, nf, kernel_size=3, ks=3)
test_error('you need to pass a ks', Conv1d, ni, nf)

conv = Conv1d(ni, nf, ks, padding='same')
init_linear(conv, None, init='auto', bias_std=.01)
conv

Conv1d(3, 5, kernel_size=(3,), stride=(1,), padding=(1,))

conv = Conv1d(ni, nf, ks, padding='causal')
init_linear(conv, None, init='auto', bias_std=.01)
conv

Conv1dCausal(
  (conv_causal): Conv1d(3, 5, kernel_size=(3,), stride=(1,), padding=(2,))
)

conv = Conv1d(ni, nf, ks, padding='valid')
init_linear(conv, None, init='auto', bias_std=.01)
weight_norm(conv)
conv

Conv1d(3, 5, kernel_size=(3,), stride=(1,))

conv = Conv1d(ni, nf, ks, padding=0)
init_linear(conv, None, init='auto', bias_std=.01)
weight_norm(conv)
conv

Conv1d(3, 5, kernel_size=(3,), stride=(1,))

bs = 64
c_in = 6
c_out = 5
seq_len = 512
t = torch.rand(bs, c_in, seq_len)
test_eq(SeparableConv1d(c_in, c_out, 3)(t).shape, (bs, c_out, seq_len))

bs = 2
c_in = 3
c_out = 5
seq_len = 50

t = torch.rand(bs, c_in, seq_len)
t = (t - t.mean()) / t.std()
test_eq(AddCoords1d()(t).shape, (bs, c_in + 1, seq_len))
new_t = AddCoords1d()(t)
test_close(new_t.mean(),0, 1e-2)
test_close(new_t.std(), 1, 1e-2)

t = torch.rand(8, 32, 12)
test_eq(SEModule1d(t.shape[1], 16, act=nn.ReLU, act_kwargs={})(t).shape, t.shape)

bs = 2
ni = 3
nf = 5
sl = 4
ks = 5

t = torch.rand(bs, ni, sl)
test_eq(ConvBlock(ni, nf, ks)(t).shape, (bs, nf, sl))
test_eq(ConvBlock(ni, nf, ks, padding='causal')(t).shape, (bs, nf, sl))
test_eq(ConvBlock(ni, nf, ks, coord=True)(t).shape, (bs, nf, sl))
ConvBlock(ni, nf, ks, stride=2)(t).shape
test_eq(ConvBlock(ni, nf, ks, stride=2)(t).shape, (bs, nf, sl//2))

test_eq(BN1d(ni)(t).shape, (bs, ni, sl))
test_eq(BN1d(ni).weight.data.mean().item(), 1.)
test_eq(BN1d(ni, zero_norm=True).weight.data.mean().item(), 0.)

test_eq(ConvBlock(ni, nf, ks, norm='batch', zero_norm=True)[1].weight.data.unique().item(), 0)
test_ne(ConvBlock(ni, nf, ks, norm='batch', zero_norm=False)[1].weight.data.unique().item(), 0)
test_eq(ConvBlock(ni, nf, ks, bias=False)[0].bias, None)
ConvBlock(ni, nf, ks, act=Swish, coord=True)

ConvBlock(
  (0): AddCoords1d()
  (1): Conv1d(4, 5, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)
  (2): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Swish()
)

LinLnDrop(2, 3, p=.5)

LinLnDrop(
  (0): LayerNorm((2,), eps=1e-05, elementwise_affine=True)
  (1): Dropout(p=0.5, inplace=False)
  (2): Linear(in_features=2, out_features=3, bias=False)
)

bs = 2
nf = 5
sl = 4

t = torch.rand(bs, nf, sl)
test_eq(Permute(0,2,1)(t).shape, (bs, sl, nf))
test_eq(Max(1)(t).shape, (bs, sl))
test_eq(Transpose(1,2)(t).shape, (bs, sl, nf))
test_eq(Transpose(1,2, contiguous=True)(t).shape, (bs, sl, nf))
test_eq(View(-1, 2, 10)(t).shape, (bs, 1, 2, 10))
test_eq(Reshape(-1, 2, 10)(t).shape, (bs, 1, 2, 10))
Transpose(1,2), Permute(0,2,1), View(-1, 2, 10), Transpose(1,2, contiguous=True), Reshape(-1, 2, 10), Noop

(Transpose(1, 2),
 Permute(dims=0, 2, 1),
 View(bs, -1, 2, 10),
 Transpose(dims=1, 2).contiguous(),
 Reshape(bs, -1, 2, 10),
 Sequential())

n_samples = 1000
n_classes = 3

t = (torch.rand(n_samples, n_classes) - .5) * 10
probas = F.softmax(t, -1)
sharpened_probas = Sharpen()(probas)
plt.plot(probas.flatten().sort().values, color='r')
plt.plot(sharpened_probas.flatten().sort().values, color='b')
plt.show()
test_gt(sharpened_probas[n_samples//2:].max(-1).values.sum().item(), probas[n_samples//2:].max(-1).values.sum().item())

bs = 2
c_out = 3

t = torch.rand(bs, c_out)
for calibrator, cal_name in zip(['temp', 'vector', 'matrix'], ['Temp_Scale', 'Vector_Scale', 'Matrix_Scale']): 
    cal = get_calibrator(calibrator, n_classes=c_out)
#     print(calibrator)
#     print(cal.weight, cal.bias, '\n')
    test_eq(cal(t), t)
    test_eq(cal.__class__.__name__, cal_name)
for calibrator, cal_name in zip(['dtemp', 'dvector', 'dmatrix'], ['Temp_Scale', 'Vector_Scale', 'Matrix_Scale']):
    cal = get_calibrator(calibrator, n_classes=c_out)
#     print(calibrator)
#     print(cal.weight, cal.bias, '\n')
    test_eq(cal(t), F.log_softmax(t, dim=1))
    test_eq(cal.__class__.__name__, cal_name)

bs = 2
c_out = 3

t = torch.rand(bs, c_out)

test_eq(Temp_Scale()(t).shape, t.shape)
test_eq(Vector_Scale(c_out)(t).shape, t.shape)
test_eq(Matrix_Scale(c_out)(t).shape, t.shape)
test_eq(Temp_Scale(dirichlet=True)(t).shape, t.shape)
test_eq(Vector_Scale(c_out, dirichlet=True)(t).shape, t.shape)
test_eq(Matrix_Scale(c_out, dirichlet=True)(t).shape, t.shape)

test_eq(Temp_Scale()(t), t)
test_eq(Vector_Scale(c_out)(t), t)
test_eq(Matrix_Scale(c_out)(t), t)

bs = 2
c_out = 5

t = torch.rand(bs, c_out)
test_eq(Vector_Scale(c_out)(t), t)
test_eq(Vector_Scale(c_out).weight.data, torch.ones(c_out))
test_eq(Vector_Scale(c_out).weight.requires_grad, True)
test_eq(type(Vector_Scale(c_out).weight), torch.nn.parameter.Parameter)

bs = 2
c_out = 3
weight = 2
bias = 1

t = torch.rand(bs, c_out)
test_eq(Matrix_Scale(c_out)(t).shape, t.shape)
test_eq(Matrix_Scale(c_out).weight.requires_grad, True)
test_eq(type(Matrix_Scale(c_out).weight), torch.nn.parameter.Parameter)

bs, n_classes = 16, 3
class_priors = torch.rand(n_classes)
logits = torch.randn(bs, n_classes) * 2
test_eq(LogitAdjLayer(class_priors)(logits), logits + class_priors)

bs = 2
nf = 5
sl = 4

t = torch.rand(bs, nf, sl)
test_eq(MaxPPVPool1d()(t).shape, (bs, nf*2, 1))
test_eq(MaxPPVPool1d()(t).shape, AdaptiveConcatPool1d(1)(t).shape)

t = torch.randn(16, 64, 50)
head = gwa_pool_head(64, 5, 50)
test_eq(head(t).shape, (16, 5))

# Example of using Sequential
model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))

bs, c_in, seq_len = 16, 1, 50
c_out = 3
t = torch.rand(bs, c_in, seq_len)
test_eq(GAP1d()(t).shape, (bs, c_in))
test_eq(GACP1d()(t).shape, (bs, c_in*2))
bs, c_in, seq_len = 16, 4, 50
t = torch.rand(bs, c_in, seq_len)
test_eq(GAP1d()(t).shape, (bs, c_in))
test_eq(GACP1d()(t).shape, (bs, c_in*2))
test_eq(GAWP1d(c_in, seq_len, n_layers=2, ln=False, dropout=0.5, act=nn.ReLU(), zero_init=False)(t).shape, (bs, c_in))
test_eq(GAWP1d(c_in, seq_len, n_layers=2, ln=False, dropout=0.5, act=nn.ReLU(), zero_init=False)(t).shape, (bs, c_in))
test_eq(GAWP1d(c_in, seq_len, n_layers=1, ln=False, dropout=0.5, zero_init=False)(t).shape, (bs, c_in))
test_eq(GAWP1d(c_in, seq_len, n_layers=1, ln=False, dropout=0.5, zero_init=True)(t).shape, (bs, c_in))
test_eq(AttentionalPool1d(c_in, c_out)(t).shape, (bs, c_out, 1))

bs, c_in, seq_len = 16, 128, 50
c_out = 14
t = torch.rand(bs, c_in, seq_len)
attp = attentional_pool_head(c_in, c_out)
test_eq(attp(t).shape, (bs, c_out))

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_pool_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))
test_eq(create_pool_head(nf, c_out, seq_len, concat_pool=True, fc_dropout=0.5)(t).shape, (bs, c_out))
create_pool_head(nf, c_out, seq_len, concat_pool=True, bn=True, fc_dropout=.5)

Sequential(
  (0): GACP1d(
    (gacp): AdaptiveConcatPool1d(
      (ap): AdaptiveAvgPool1d(output_size=1)
      (mp): AdaptiveMaxPool1d(output_size=1)
    )
    (flatten): Flatten(full=False)
  )
  (1): LinBnDrop(
    (0): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=24, out_features=2, bias=False)
  )
)

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(max_pool_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_pool_plus_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))
test_eq(create_pool_plus_head(nf, c_out, concat_pool=True, fc_dropout=0.5)(t).shape, (bs, c_out))
create_pool_plus_head(nf, c_out, seq_len, fc_dropout=0.5)

Sequential(
  (0): AdaptiveConcatPool1d(
    (ap): AdaptiveAvgPool1d(output_size=1)
    (mp): AdaptiveMaxPool1d(output_size=1)
  )
  (1): Flatten(full=False)
  (2): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=24, out_features=512, bias=False)
  (5): ReLU(inplace=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5, inplace=False)
  (8): Linear(in_features=512, out_features=2, bias=False)
)

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_conv_head(nf, c_out, seq_len)(t).shape, (bs, c_out))
test_eq(create_conv_head(nf, c_out, adaptive_size=50)(t).shape, (bs, c_out))
create_conv_head(nf, c_out, 50)

Sequential(
  (0): ConvBlock(
    (0): Conv1d(12, 6, kernel_size=(1,), stride=(1,), bias=False)
    (1): BatchNorm1d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (1): ConvBlock(
    (0): Conv1d(6, 3, kernel_size=(1,), stride=(1,), bias=False)
    (1): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (2): ConvBlock(
    (0): Conv1d(3, 2, kernel_size=(1,), stride=(1,), bias=False)
    (1): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (3): GAP1d(
    (gap): AdaptiveAvgPool1d(output_size=1)
    (flatten): Flatten(full=False)
  )
)

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_mlp_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))
t = torch.rand(bs, nf, seq_len)
create_mlp_head(nf, c_out, seq_len, bn=True, fc_dropout=.5)

Sequential(
  (0): Flatten(full=False)
  (1): LinBnDrop(
    (0): BatchNorm1d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=240, out_features=2, bias=False)
  )
)

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_fc_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))
create_mlp_head(nf, c_out, seq_len, bn=True, fc_dropout=.5)

Sequential(
  (0): Flatten(full=False)
  (1): LinBnDrop(
    (0): BatchNorm1d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=240, out_features=2, bias=False)
  )
)

bs = 16
nf = 12
c_out = 2
seq_len = 20
t = torch.rand(bs, nf, seq_len)
test_eq(create_rnn_head(nf, c_out, seq_len, fc_dropout=0.5)(t).shape, (bs, c_out))
create_rnn_head(nf, c_out, seq_len, bn=True, fc_dropout=.5)

Sequential(
  (0): LastStep()
  (1): LinBnDrop(
    (0): BatchNorm1d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=12, out_features=2, bias=False)
  )
)

t = torch.randn(16, 3, 50)
head = conv_lin_3d_head(3, 20, 50, (4,5))
test_eq(head(t).shape, (16, 4, 5))
head = conv_lin_3d_head(3, 20, 50, (2, 10))
test_eq(head(t).shape, (16, 2, 10))
head

create_conv_lin_3d_head(
  (0): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Conv1d(3, 2, kernel_size=(1,), stride=(1,), bias=False)
  (2): Transpose(-1, -2)
  (3): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): Transpose(-1, -2)
  (5): Linear(in_features=50, out_features=10, bias=False)
)

t = torch.randn(16, 64, 50)
head = lin_3d_head(64, 10, 50, (5,2))
test_eq(head(t).shape, (16, 5, 2))
head = lin_3d_head(64, 5, 50, (5, 1))
test_eq(head(t).shape, (16, 5, 1))
head

create_lin_3d_head(
  (0): Flatten(full=False)
  (1): BatchNorm1d(3200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Linear(in_features=3200, out_features=5, bias=False)
  (3): Reshape(bs, 5, 1)
)

bs = 16
c_out = 4
seq_len = 50
d = (2,50)
nf = 128
t = torch.rand(bs, nf, seq_len)
test_eq(conv_3d_head(nf, c_out, seq_len, d)(t).shape, (bs, *d))

bs, c_in, seq_len = 16, 128, 50
c_out = 14
t = torch.rand(bs, c_in, seq_len)
uph = universal_pool_head(c_in, c_out, seq_len)
test_eq(uph(t).shape, (bs, c_out))
uph = universal_pool_head(c_in, c_out, seq_len, 2)
test_eq(uph(t).shape, (bs, c_out))

bs, c_in, seq_len = 16, 128, 50
c_out = 14
d = (7, 2)
t = torch.rand(bs, c_in, seq_len)
for head in heads: 
    print(head.__name__)
    if head.__name__ == 'create_conv_3d_head': 
        test_eq(head(c_in, c_out, seq_len, (d[0], seq_len))(t).shape, (bs, *(d[0], seq_len)))
    elif '3d' in head.__name__: 
        test_eq(head(c_in, c_out, seq_len, d)(t).shape, (bs, *d))
    else: 
        test_eq(head(c_in, c_out, seq_len)(t).shape, (bs, c_out))

create_mlp_head
create_fc_head
average_pool_head
max_pool_head
concat_pool_head
create_pool_plus_head
create_conv_head
create_rnn_head
create_conv_lin_3d_head
create_lin_3d_head
create_conv_3d_head
attentional_pool_head
universal_pool_head
gwa_pool_head

bs = 2
ni = 32
sl = 4
t = torch.rand(bs, ni, sl)
test_eq(SqueezeExciteBlock(ni)(t).shape, (bs, ni, sl))

t = torch.ones(2,3,4)
test_ne(GaussianNoise()(t), t)
test_eq(GaussianNoise()(t).shape, t.shape)
t = torch.ones(2,3)
test_ne(GaussianNoise()(t), t)
test_eq(GaussianNoise()(t).shape, t.shape)
t = torch.ones(2)
test_ne(GaussianNoise()(t), t)
test_eq(GaussianNoise()(t).shape, t.shape)

model_output = torch.rand(16, 3)
targets = torch.randint(0, 2, (16,))
criterion = gambler_loss(2)
criterion(model_output, targets)

tensor(0.7102)

output = torch.rand(16, 2)
target = torch.randint(0, 2, (16,))
CrossEntropyLossOneHot(output, target)

tensor(0.6620)

from tsai.data.transforms import OneHot
output = nn.Parameter(torch.rand(16, 2))
target = torch.randint(0, 2, (16,))
one_hot_target = OneHot()(target)
CrossEntropyLossOneHot(output, one_hot_target)

tensor(0.7780, grad_fn=<NllLossBackward>)

ttest_tensor(a, b)

tensor(-1.5827)

for _ in range(100):
    output = torch.rand(256, 2)
    target = torch.randint(0, 2, (256,))
    test_close(ttest_bin_loss(output, target).item(), 
               ttest_ind(nn.Softmax(dim=-1)(output[:, 1])[target == 0], nn.Softmax(dim=-1)(output[:, 1])[target == 1], equal_var=False)[0], eps=1e-3)

c_in = 10
x = torch.rand(64, c_in).to(device=default_device())
x = F.softmax(x, dim=1)
label = x.max(dim=1).indices
CenterLoss(c_in)(x, label), CenterPlusLoss(LabelSmoothingCrossEntropyFlat(), c_in)(x, label)

(tensor(8.4800, grad_fn=<DivBackward0>),
 TensorBase(2.3589, grad_fn=<AliasBackward>))

CenterPlusLoss(LabelSmoothingCrossEntropyFlat(), c_in)

CenterPlusLoss(loss=FlattenedLoss of LabelSmoothingCrossEntropy(), c_out=10, λ=0.01)

c_in = 10
x = torch.rand(64, c_in).to(device=default_device())
x = F.softmax(x, dim=1)
label = x.max(dim=1).indices
FocalLoss(c_in)(x, label)

TensorBase(0.7460)

c_in = 10
output = torch.rand(64).to(device=default_device())
target = torch.rand(64).to(device=default_device())
TweedieLoss()(output, target)

tensor(2.8527)

Layers

noop[source]

init_lin_zero[source]

class SwishBeta[source]

same_padding1d[source]

class Pad1d[source]

class Conv1dSame[source]

same_padding2d[source]

class Pad2d[source]

class Conv2dSame[source]

Conv2d[source]

class Chomp1d[source]

class Conv1dCausal[source]

Conv1d[source]

class SeparableConv1d[source]

class AddCoords1d[source]

class ConvBlock[source]

class ResBlock1dPlus[source]

SEModule1d[source]

Norm[source]

class LinLnDrop[source]

class LambdaPlus[source]

class Squeeze[source]

class Unsqueeze[source]

class Add[source]

class Concat[source]

class Permute[source]

class Transpose[source]

class View[source]

class Reshape[source]

class Max[source]

class LastStep[source]

class SoftMax[source]

class Clamp[source]

class Clip[source]

class Sharpen[source]

class Sequential[source]

class TimeDistributed[source]

class Temp_Scale[source]

class Vector_Scale[source]

class Matrix_Scale[source]

get_calibrator[source]

class LogitAdjustmentLayer[source]

class PPV[source]

class PPAuc[source]

class MaxPPVPool1d[source]

class AdaptiveWeightedAvgPool1d[source]

class GAP1d[source]

class GACP1d[source]

class GAWP1d[source]

class GlobalWeightedAveragePool1d[source]

gwa_pool_head[source]

class AttentionalPool1d[source]

class GAttP1d[source]

attentional_pool_head[source]

create_pool_head[source]

max_pool_head[source]

create_pool_plus_head[source]

create_conv_head[source]

create_mlp_head[source]

create_fc_head[source]

create_rnn_head[source]

class create_conv_lin_3d_head[source]

class create_lin_3d_head[source]

class create_conv_3d_head[source]

universal_pool_head[source]

class SqueezeExciteBlock[source]

class GaussianNoise[source]

gambler_loss[source]

CrossEntropyLossOneHot[source]

ttest_bin_loss[source]

ttest_reg_loss[source]

class CenterLoss[source]

class CenterPlusLoss[source]

class FocalLoss[source]

class TweedieLoss[source]

`noop`[source]

`init_lin_zero`[source]

`class` `SwishBeta`[source]

`same_padding1d`[source]

`class` `Pad1d`[source]

`class` `Conv1dSame`[source]

`same_padding2d`[source]

`class` `Pad2d`[source]

`class` `Conv2dSame`[source]

`Conv2d`[source]

`class` `Chomp1d`[source]

`class` `Conv1dCausal`[source]

`Conv1d`[source]

`class` `SeparableConv1d`[source]

`class` `AddCoords1d`[source]

`class` `ConvBlock`[source]

`class` `ResBlock1dPlus`[source]

`SEModule1d`[source]

`Norm`[source]

`class` `LinLnDrop`[source]

`class` `LambdaPlus`[source]

`class` `Squeeze`[source]

`class` `Unsqueeze`[source]

`class` `Add`[source]

`class` `Concat`[source]

`class` `Permute`[source]

`class` `Transpose`[source]

`class` `View`[source]

`class` `Reshape`[source]

`class` `Max`[source]

`class` `LastStep`[source]

`class` `SoftMax`[source]

`class` `Clamp`[source]

`class` `Clip`[source]

`class` `Sharpen`[source]

`class` `Sequential`[source]

`class` `TimeDistributed`[source]

`class` `Temp_Scale`[source]

`class` `Vector_Scale`[source]

`class` `Matrix_Scale`[source]

`get_calibrator`[source]

`class` `LogitAdjustmentLayer`[source]

`class` `PPV`[source]

`class` `PPAuc`[source]

`class` `MaxPPVPool1d`[source]

`class` `AdaptiveWeightedAvgPool1d`[source]

`class` `GAP1d`[source]

`class` `GACP1d`[source]

`class` `GAWP1d`[source]

`class` `GlobalWeightedAveragePool1d`[source]

`gwa_pool_head`[source]

`class` `AttentionalPool1d`[source]

`class` `GAttP1d`[source]

`attentional_pool_head`[source]

`create_pool_head`[source]

`max_pool_head`[source]

`create_pool_plus_head`[source]

`create_conv_head`[source]

`create_mlp_head`[source]

`create_fc_head`[source]

`create_rnn_head`[source]

`class` `create_conv_lin_3d_head`[source]

`class` `create_lin_3d_head`[source]

`class` `create_conv_3d_head`[source]

`universal_pool_head`[source]

`class` `SqueezeExciteBlock`[source]

`class` `GaussianNoise`[source]

`gambler_loss`[source]

`CrossEntropyLossOneHot`[source]

`ttest_bin_loss`[source]

`ttest_reg_loss`[source]

`class` `CenterLoss`[source]

`class` `CenterPlusLoss`[source]

`class` `FocalLoss`[source]

`class` `TweedieLoss`[source]