Update.
[tex.git] / bn.tex
1 % -*- mode: latex; mode: reftex; mode: auto-fill; mode: flyspell; -*-
2
3 % Any copyright is dedicated to the Public Domain.
4 % https://creativecommons.org/publicdomain/zero/1.0/
5
6 % Written by Francois Fleuret <francois@fleuret.org>
7
8 \documentclass[11pt,a4paper,twoside]{article}
9 \usepackage[a4paper,top=2.5cm,bottom=2cm,left=2.5cm,right=2.5cm]{geometry}
10 \usepackage{ifthen}
11 \usepackage{tikz}
12 \usetikzlibrary{calc}
13 \usetikzlibrary{positioning}
14
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17 \newcommand{\mygrid}[5]{%
18   \pgfmathsetmacro{\xmin}{#1+1}
19   \pgfmathsetmacro{\xmax}{#1+#3-1}
20   \pgfmathsetmacro{\ymin}{#2+1}
21   \pgfmathsetmacro{\ymax}{#2+#4-1}
22   \ifthenelse{\equal{#5}{}}
23              {\draw (#1,#2) rectangle ++(#3,#4);}
24              {\draw[fill=#5] (#1,#2) rectangle ++(#3,#4);}
25              \foreach \x in {\xmin,...,\xmax}{
26                \draw (\x,#2)-- ++(0,#4);
27              }
28              \foreach \y in {\ymin,...,\ymax}{
29                \draw (#1,\y)-- ++(#3,0);
30              }
31 }
32
33 \newcommand{\amatrix}[7]{%
34   \begin{tikzpicture}[scale=0.2]
35     \ifthenelse{\equal{#7}{}}
36                {}
37                {\draw[draw=none,fill=#7] (#3,#4) rectangle ++(#5,#6);}
38                \mygrid{0}{0}{#1}{#2}{}
39   \end{tikzpicture}%
40 }
41
42 \newcommand{\gridcube}[3]{% 7,4,6
43
44   \foreach \b in { 0,...,#2 }{
45     \draw (0,\b,0)--++(#1,0,0)--++(0,0,#3);
46   }
47
48   \foreach \d in { 0,...,#1 }{
49     \draw (\d,0,0)--++(0,#2,0)--++(0,0,#3);
50   }
51
52   \foreach \hw in { 0,...,#3 }{
53     \draw (0,0,\hw)++(#1,0,0)--++(0,#2,0)--++(-#1,0,0);
54   }
55 }
56
57 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
58
59 \begin{document}
60
61 \begin{tikzpicture}[
62     scale=0.2,
63     every text node part/.style={align=center},
64     rounded corners=0.75pt,
65     font=\footnotesize,
66     operator/.style={draw=black,fill=black!10,inner sep=1pt,minimum width=18pt,minimum height=10},
67   ]
68
69   \node[inner sep=1pt] (batchnorm output 1d) {\amatrix{10}{4}{3}{0}{1}{4}{red}};
70   \node[inner sep=1pt,below=1.3cm of batchnorm output 1d] (batchnorm input 1d) {\amatrix{10}{4}{3}{0}{1}{4}{blue}};
71   \draw[->] ($(batchnorm input 1d.north)+(-1.5,0)$) -- ($(batchnorm output 1d.south)+(-1.5,0)$)
72   node[operator,pos=0.45]
73   {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
74
75   \begin{pgfinterruptboundingbox}
76     \draw[->,transform canvas={yshift=3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.north east) node[midway,above] {$D$};
77     \draw[->,transform canvas={xshift=-3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.south west) node[midway,left] {$B$};
78   \end{pgfinterruptboundingbox}
79
80   \node[inner sep=1pt,right=1.1cm of batchnorm output 1d] (layernorm output 1d) {\amatrix{10}{4}{0}{2}{10}{1}{red}};
81   \node[inner sep=1pt] (layernorm input 1d) at (layernorm output 1d|-batchnorm input 1d) {\amatrix{10}{4}{0}{2}{10}{1}{blue}};
82
83   \begin{pgfinterruptboundingbox}
84     \draw[->,transform canvas={yshift=3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.north east) node[midway,above] {$D$};
85     \draw[->,transform canvas={xshift=-3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.south west) node[midway,left] {$B$};
86   \end{pgfinterruptboundingbox}
87
88   \draw[->] ($(layernorm input 1d.east)+(0,1)$) to[out=55,in=305]
89   node[operator,pos=0.5]
90   {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
91   %
92   ($(layernorm output 1d.east)+(0,-0.0)$);
93
94   %---------------------------------------------------------
95
96   \node[below=1.65cm of batchnorm input 1d] (batchnorm output 2d) {%
97     \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
98
99       \draw[fill=red] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
100       \gridcube{7}{4}{6}
101       \begin{pgfinterruptboundingbox}
102         \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
103         \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
104         \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
105         node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
106       \end{pgfinterruptboundingbox}
107     \end{tikzpicture}
108   };
109
110   \node[below=1.3cm of batchnorm output 2d] (batchnorm input 2d) {%
111     \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
112       \draw[fill=blue] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
113       \gridcube{7}{4}{6}
114     \end{tikzpicture}
115   };
116
117   \draw[->] ($(batchnorm input 2d.north)+(-1.5,0)$) -- ($(batchnorm output 2d.south)+(-1.5,0)$)
118   node[operator,pos=0.45]
119   {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
120
121   \node (layernorm output 2d) at (layernorm output 1d|-batchnorm output 2d) {%
122     \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
123       \draw[fill=red] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
124       \gridcube{7}{4}{6}
125       \begin{pgfinterruptboundingbox}
126         \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
127         \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
128         \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
129         node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
130       \end{pgfinterruptboundingbox}
131
132     \end{tikzpicture}
133   };
134
135   \node (layernorm input 2d) at (layernorm input 1d|-batchnorm input 2d) {%
136     \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
137       \draw[fill=blue] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
138       \gridcube{7}{4}{6}
139     \end{tikzpicture}
140   };
141
142
143   \draw[->] ($(layernorm input 2d.east)+(-1,2)$) to[out=55,in=305]
144   node[operator,pos=0.4]
145   {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
146   %
147   ($(layernorm output 2d.east)+(-1,1.0)$);
148
149   \node[below=5pt of batchnorm input 1d] {batchnorm};
150   \node[below=5pt of layernorm input 1d] {layernorm};
151   \node[below=5pt of batchnorm input 2d] {batchnorm 2d};
152   \node[below=5pt of layernorm input 2d] {layernorm 2d};
153
154 \end{tikzpicture}
155
156 \end{document}