nb_blocks,
nb_lines=None,
caterpillar_height=None,
- dim_rec_v=-1,
causal=False,
dropout=0.0,
len_max=1e5,
return DumbRec(
dim_model=dim_model,
dim_qk=dim_keys,
- dim_v=dim_rec_v,
+ dim_v=dim_model // nb_heads,
nb_heads=nb_heads,
nb_lines=nb_lines,
attention_dropout=dropout,
return KVRec(
dim_model=dim_model,
dim_qk=dim_keys,
- dim_v=dim_rec_v,
+ dim_v=dim_model // nb_heads,
nb_heads=nb_heads,
nb_lines=nb_lines,
attention_dropout=dropout,
return Caterpillar(
dim_model=dim_model,
dim_qk=dim_keys,
- dim_v=dim_rec_v,
+ dim_v=dim_model // nb_heads,
nb_heads=nb_heads,
caterpillar_length=self.caterpillar_length,
caterpillar_height=self.caterpillar_height,