X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=pytorch.git;a=blobdiff_plain;f=attentiontoy1d.py;h=2cecad8b18c9fb14e779a3bb517da0eaeceefecf;hp=cff8350839b3f169da6512dbb73e127db7047a89;hb=de3a0375a79cbbf4299aacc41db1426a39ca9664;hpb=c8ca3a8eb2917f92db6e6f8ed7cb00595af02e52

diff --git a/attentiontoy1d.py b/attentiontoy1d.py
index cff8350..2cecad8 100755
--- a/attentiontoy1d.py
+++ b/attentiontoy1d.py
@@ -1,18 +1,20 @@
 #!/usr/bin/env python
 
-# @XREMOTE_HOST: elk.fleuret.org
-# @XREMOTE_EXEC: /home/fleuret/conda/bin/python
-# @XREMOTE_PRE: killall -q -9 python || echo "Nothing killed"
-# @XREMOTE_GET: *.pdf *.log
+# Any copyright is dedicated to the Public Domain.
+# https://creativecommons.org/publicdomain/zero/1.0/
+
+# Written by Francois Fleuret <francois@fleuret.org>
 
 import torch, math, sys, argparse
 
 from torch import nn
 from torch.nn import functional as F
 
+import matplotlib.pyplot as plt
+
 ######################################################################
 
-parser = argparse.ArgumentParser(description='Toy RNN.')
+parser = argparse.ArgumentParser(description='Toy attention model.')
 
 parser.add_argument('--nb_epochs',
                     type = int, default = 250)
@@ -29,8 +31,15 @@ parser.add_argument('--positional_encoding',
                     help = 'Provide a positional encoding',
                     action='store_true', default=False)
 
+parser.add_argument('--seed',
+                    type = int, default = 0,
+                    help = 'Random seed (default 0, < 0 is no seeding)')
+
 args = parser.parse_args()
 
+if args.seed >= 0:
+    torch.manual_seed(args.seed)
+
 ######################################################################
 
 label=''
@@ -60,8 +69,6 @@ if torch.cuda.is_available():
 else:
     device = torch.device('cpu')
 
-torch.manual_seed(1)
-
 ######################################################################
 
 seq_height_min, seq_height_max = 1.0, 25.0
@@ -69,7 +76,7 @@ seq_width_min, seq_width_max = 5.0, 11.0
 seq_length = 100
 
 def positions_to_sequences(tr = None, bx = None, noise_level = 0.3):
-    st = torch.arange(seq_length).float()
+    st = torch.arange(seq_length, device = device).float()
     st = st[None, :, None]
     tr = tr[:, None, :, :]
     bx = bx[:, None, :, :]
@@ -79,7 +86,6 @@ def positions_to_sequences(tr = None, bx = None, noise_level = 0.3):
 
     x = torch.cat((xtr, xbx), 2)
 
-    # u = x.sign()
     u = F.max_pool1d(x.sign().permute(0, 2, 1), kernel_size = 2, stride = 1).permute(0, 2, 1)
 
     collisions = (u.sum(2) > 1).max(1).values
@@ -93,12 +99,12 @@ def generate_sequences(nb):
 
     # Position / height / width
 
-    tr = torch.empty(nb, 2, 3)
+    tr = torch.empty(nb, 2, 3, device = device)
     tr[:, :, 0].uniform_(seq_width_max/2, seq_length - seq_width_max/2)
     tr[:, :, 1].uniform_(seq_height_min, seq_height_max)
     tr[:, :, 2].uniform_(seq_width_min, seq_width_max)
 
-    bx = torch.empty(nb, 2, 3)
+    bx = torch.empty(nb, 2, 3, device = device)
     bx[:, :, 0].uniform_(seq_width_max/2, seq_length - seq_width_max/2)
     bx[:, :, 1].uniform_(seq_height_min, seq_height_max)
     bx[:, :, 2].uniform_(seq_width_min, seq_width_max)
@@ -146,9 +152,6 @@ def generate_sequences(nb):
 
 ######################################################################
 
-import matplotlib.pyplot as plt
-import matplotlib.collections as mc
-
 def save_sequence_images(filename, sequences, tr = None, bx = None):
     fig = plt.figure()
     ax = fig.add_subplot(1, 1, 1)
@@ -165,10 +168,10 @@ def save_sequence_images(filename, sequences, tr = None, bx = None):
 
     delta = -1.
     if tr is not None:
-        ax.scatter(test_tr[k, :, 0], torch.full((test_tr.size(1),), delta), color = 'black', marker = '^', clip_on=False)
+        ax.scatter(tr[:, 0].cpu(), torch.full((tr.size(0),), delta), color = 'black', marker = '^', clip_on=False)
 
     if bx is not None:
-        ax.scatter(test_bx[k, :, 0], torch.full((test_bx.size(1),), delta), color = 'black', marker = 's', clip_on=False)
+        ax.scatter(bx[:, 0].cpu(), torch.full((bx.size(0),), delta), color = 'black', marker = 's', clip_on=False)
 
     fig.savefig(filename, bbox_inches='tight')
 
@@ -310,13 +313,16 @@ test_input = torch.cat((test_input, positional_input.expand(test_input.size(0),
 test_outputs = model((test_input - mu) / std).detach()
 
 if args.with_attention:
-    x = model[0:4]((test_input - mu) / std)
-    test_A = model[4].attention(x)
+    k = next(k for k, l in enumerate(model) if isinstance(l, AttentionLayer))
+    x = model[0:k]((test_input - mu) / std)
+    test_A = model[k].attention(x)
     test_A = test_A.detach().to('cpu')
 
 test_input = test_input.detach().to('cpu')
 test_outputs = test_outputs.detach().to('cpu')
 test_targets = test_targets.detach().to('cpu')
+test_bx = test_bx.detach().to('cpu')
+test_tr = test_tr.detach().to('cpu')
 
 for k in range(15):
     save_sequence_images(