graph.py

   1 #!/usr/bin/env python
   2
   3 import math
   4
   5 import torch, torchvision
   6
   7 from torch import nn
   8 from torch.nn import functional as F
   9
  10 import cairo
  11
  12
  13 ######################################################################
  14
  15
  16 def save_attention_image(
  17     filename,
  18     tokens_input,
  19     tokens_output,
  20     # An iterable set of BxHxTxT attention matrices
  21     attention_matrices,
  22     pixel_scale=8,
  23     token_gap=15,
  24     layer_gap=25,
  25     y_eps=0.5,
  26     padding=10,
  27     # do not draw links with a lesser attention
  28     min_link_attention=0,
  29     # draw only the strongest links necessary to reache
  30     # min_total_attention
  31     min_total_attention=None,
  32     # draw only the top k links
  33     k_top=None,
  34     curved=True,
  35 ):
  36     if k_top is not None:
  37         am = []
  38         for m in attention_matrices:
  39             am.append(m * (m.sort(dim=-1, descending=True).indices < k_top))
  40         attention_matrices = am
  41
  42     if min_total_attention is not None:
  43         am = []
  44         for m in attention_matrices:
  45             s = m.sort(dim=-1)
  46             m = 1 - (s.values.cumsum(-1) < 1 - min_total_attention).long()
  47             b = m.new(m.size()).scatter_(dim=-1, index=s.indices, src=m)
  48             am.append(m * b)
  49
  50     surface = cairo.RecordingSurface(cairo.CONTENT_COLOR_ALPHA, None)
  51
  52     ctx = cairo.Context(surface)
  53     ctx.scale(pixel_scale, pixel_scale)
  54
  55     ctx.set_source_rgb(0.0, 0.0, 0.0)
  56     ctx.set_font_size(4.0)
  57     # ctx.select_font_face("Arial", cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
  58
  59     x, y = 0, 0
  60
  61     ctx.set_line_width(0.25)
  62     for d in range(len(attention_matrices)):
  63         at = attention_matrices[d]
  64         ni = torch.arange(at.size(0))[:, None].expand_as(at)
  65         nj = torch.arange(at.size(1))[None, :].expand_as(at)
  66         at = at.flatten()
  67         o = at.sort().indices
  68         at = at[o]
  69         ni = ni.flatten()[o]
  70         nj = nj.flatten()[o]
  71         for i, j, a in zip(ni, nj, at):
  72             if a > 0 and a >= min_link_attention:
  73                 c = 1 - a.item()
  74                 ctx.set_source_rgb(c, c, c)
  75                 ax, ay = j * token_gap, y - y_eps
  76                 ctx.move_to(ax, ay)
  77                 dx, dy = i * token_gap, y - layer_gap + y_eps
  78                 if curved:
  79                     bx, by = ax, ay - layer_gap * 0.5
  80                     cx, cy = dx, dy + layer_gap * 0.5
  81                     ctx.curve_to(bx, by, cx, cy, dx, dy)
  82                 else:
  83                     ctx.line_to(dx, dy)
  84                 ctx.stroke()
  85         y -= layer_gap
  86
  87     for d in range(0, len(attention_matrices) + 1):
  88         n = (
  89             attention_matrices[0].size(-1)
  90             if d == 0
  91             else attention_matrices[d - 1].size(-2)
  92         )
  93         for n in range(n):
  94             xc, yc = n * token_gap, -d * layer_gap
  95             ctx.set_source_rgb(1.0, 1.0, 1.0)
  96             ctx.arc(xc, yc, token_gap / 10, 0, 2 * math.pi)
  97             ctx.fill()
  98             ctx.set_source_rgb(0.0, 0.0, 0.0)
  99             ctx.arc(xc, yc, token_gap / 20, 0, 2 * math.pi)
 100             ctx.fill()
 101
 102     ctx.set_source_rgb(0.0, 0.0, 0.0)
 103
 104     for k, t in enumerate(tokens_input):
 105         s = str(t)
 106         (
 107             x_bearing,
 108             y_bearing,
 109             width_t,
 110             height_t,
 111             x_advance,
 112             y_advance,
 113         ) = ctx.text_extents(s)
 114         ctx.move_to(k * token_gap - width_t / 2, token_gap / 5 - y_bearing)
 115         ctx.show_text(s)
 116
 117     for k, t in enumerate(tokens_output):
 118         s = str(t)
 119         (
 120             x_bearing,
 121             y_bearing,
 122             width_t,
 123             height_t,
 124             x_advance,
 125             y_advance,
 126         ) = ctx.text_extents(s)
 127         ctx.move_to(
 128             k * token_gap - width_t / 2,
 129             -token_gap / 5 - len(attention_matrices) * layer_gap,
 130         )
 131         ctx.show_text(s)
 132
 133     x, y, width, height = surface.ink_extents()
 134     x -= padding
 135     y -= padding
 136     width += 2 * padding
 137     height += 2 * padding
 138     pdf_surface = cairo.PDFSurface(filename, width, height)
 139     ctx_pdf = cairo.Context(pdf_surface)
 140     ctx_pdf.set_source_surface(surface, -x, -y)
 141     ctx_pdf.paint()
 142     pdf_surface.finish()
 143
 144
 145 ######################################################################
 146
 147 if __name__ == "__main__":
 148     import mygpt
 149
 150     tokens_output = ["<wat>", 2, 3, 4, "<end>"]
 151     tokens_input = [""] + tokens_output[:-1]
 152
 153     vocabulary_size = 3
 154     x = torch.randint(vocabulary_size, (1, len(tokens_input)))
 155
 156     model = mygpt.MyGPT(
 157         vocabulary_size=vocabulary_size,
 158         dim_model=4,
 159         dim_keys=2,
 160         dim_hidden=2,
 161         nb_heads=2,
 162         nb_blocks=5,
 163         dropout=0.1,
 164         causal=True,
 165     )
 166
 167     model.eval()
 168     model.record_attention()
 169
 170     y1 = model(mygpt.BracketedSequence(x)).x
 171
 172     attention_matrices = [m[0, 0] for m in model.retrieve_attention()]
 173
 174     # attention_matrices = [ torch.rand(3,5), torch.rand(8,3), torch.rand(5,8) ]
 175     # for a in attention_matrices: a=a/a.sum(-1,keepdim=True)
 176
 177     save_attention_image(
 178         "attention.pdf",
 179         tokens_input,
 180         tokens_output,
 181         attention_matrices,
 182         # k_top=2,
 183         min_total_attention=0.9,
 184     )