1 % -*- mode: latex; mode: reftex; mode: auto-fill; mode: flyspell; -*-
3 % Any copyright is dedicated to the Public Domain.
4 % https://creativecommons.org/publicdomain/zero/1.0/
6 % Written by Francois Fleuret <francois@fleuret.org>
8 \documentclass[11pt,a4paper,twoside]{article}
9 \usepackage[a4paper,top=2.5cm,bottom=2cm,left=2.5cm,right=2.5cm]{geometry}
13 \usetikzlibrary{positioning}
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
17 \newcommand{\mygrid}[5]{%
18 \pgfmathsetmacro{\xmin}{#1+1}
19 \pgfmathsetmacro{\xmax}{#1+#3-1}
20 \pgfmathsetmacro{\ymin}{#2+1}
21 \pgfmathsetmacro{\ymax}{#2+#4-1}
22 \ifthenelse{\equal{#5}{}}
23 {\draw (#1,#2) rectangle ++(#3,#4);}
24 {\draw[fill=#5] (#1,#2) rectangle ++(#3,#4);}
25 \foreach \x in {\xmin,...,\xmax}{
26 \draw (\x,#2)-- ++(0,#4);
28 \foreach \y in {\ymin,...,\ymax}{
29 \draw (#1,\y)-- ++(#3,0);
33 \newcommand{\amatrix}[7]{%
34 \begin{tikzpicture}[scale=0.2]
35 \ifthenelse{\equal{#7}{}}
37 {\draw[draw=none,fill=#7] (#3,#4) rectangle ++(#5,#6);}
38 \mygrid{0}{0}{#1}{#2}{}
42 \newcommand{\gridcube}[3]{% 7,4,6
44 \foreach \b in { 0,...,#2 }{
45 \draw (0,\b,0)--++(#1,0,0)--++(0,0,#3);
48 \foreach \d in { 0,...,#1 }{
49 \draw (\d,0,0)--++(0,#2,0)--++(0,0,#3);
52 \foreach \hw in { 0,...,#3 }{
53 \draw (0,0,\hw)++(#1,0,0)--++(0,#2,0)--++(-#1,0,0);
57 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
63 every text node part/.style={align=center},
64 rounded corners=0.75pt,
66 operator/.style={draw=black,fill=black!10,inner sep=1pt,minimum width=18pt,minimum height=10},
69 \node[inner sep=1pt] (batchnorm output 1d) {\amatrix{10}{4}{3}{0}{1}{4}{red}};
70 \node[inner sep=1pt,below=1.3cm of batchnorm output 1d] (batchnorm input 1d) {\amatrix{10}{4}{3}{0}{1}{4}{blue}};
71 \draw[->] ($(batchnorm input 1d.north)+(-1.5,0)$) -- ($(batchnorm output 1d.south)+(-1.5,0)$)
72 node[operator,pos=0.45]
73 {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
75 \begin{pgfinterruptboundingbox}
76 \draw[->,transform canvas={yshift=3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.north east) node[midway,above] {$D$};
77 \draw[->,transform canvas={xshift=-3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.south west) node[midway,left] {$B$};
78 \end{pgfinterruptboundingbox}
80 \node[inner sep=1pt,right=1.1cm of batchnorm output 1d] (layernorm output 1d) {\amatrix{10}{4}{0}{2}{10}{1}{red}};
81 \node[inner sep=1pt] (layernorm input 1d) at (layernorm output 1d|-batchnorm input 1d) {\amatrix{10}{4}{0}{2}{10}{1}{blue}};
83 \begin{pgfinterruptboundingbox}
84 \draw[->,transform canvas={yshift=3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.north east) node[midway,above] {$D$};
85 \draw[->,transform canvas={xshift=-3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.south west) node[midway,left] {$B$};
86 \end{pgfinterruptboundingbox}
88 \draw[->] ($(layernorm input 1d.east)+(0,1)$) to[out=55,in=305]
89 node[operator,pos=0.5]
90 {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
92 ($(layernorm output 1d.east)+(0,-0.0)$);
94 %---------------------------------------------------------
96 \node[below=1.65cm of batchnorm input 1d] (batchnorm output 2d) {%
97 \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
99 \draw[fill=red] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
101 \begin{pgfinterruptboundingbox}
102 \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
103 \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
104 \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
105 node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
106 \end{pgfinterruptboundingbox}
110 \node[below=1.3cm of batchnorm output 2d] (batchnorm input 2d) {%
111 \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
112 \draw[fill=blue] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
117 \draw[->] ($(batchnorm input 2d.north)+(-1.5,0)$) -- ($(batchnorm output 2d.south)+(-1.5,0)$)
118 node[operator,pos=0.45]
119 {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
121 \node (layernorm output 2d) at (layernorm output 1d|-batchnorm output 2d) {%
122 \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
123 \draw[fill=red] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
125 \begin{pgfinterruptboundingbox}
126 \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
127 \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
128 \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
129 node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
130 \end{pgfinterruptboundingbox}
135 \node (layernorm input 2d) at (layernorm input 1d|-batchnorm input 2d) {%
136 \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
137 \draw[fill=blue] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
143 \draw[->] ($(layernorm input 2d.east)+(-1,2)$) to[out=55,in=305]
144 node[operator,pos=0.4]
145 {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
147 ($(layernorm output 2d.east)+(-1,1.0)$);
149 \node[below=5pt of batchnorm input 1d] {batchnorm};
150 \node[below=5pt of layernorm input 1d] {layernorm};
151 \node[below=5pt of batchnorm input 2d] {batchnorm 2d};
152 \node[below=5pt of layernorm input 2d] {layernorm 2d};