--- /dev/null
+% -*- mode: latex; mode: reftex; mode: auto-fill; mode: flyspell; -*-
+
+% Any copyright is dedicated to the Public Domain.
+% https://creativecommons.org/publicdomain/zero/1.0/
+
+% Written by Francois Fleuret <francois@fleuret.org>
+
+\documentclass[11pt,a4paper,twoside]{article}
+\usepackage[a4paper,top=2.5cm,bottom=2cm,left=2.5cm,right=2.5cm]{geometry}
+\usepackage{ifthen}
+\usepackage{tikz}
+\usetikzlibrary{calc}
+\usetikzlibrary{positioning}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\newcommand{\mygrid}[5]{%
+ \pgfmathsetmacro{\xmin}{#1+1}
+ \pgfmathsetmacro{\xmax}{#1+#3-1}
+ \pgfmathsetmacro{\ymin}{#2+1}
+ \pgfmathsetmacro{\ymax}{#2+#4-1}
+ \ifthenelse{\equal{#5}{}}
+ {\draw (#1,#2) rectangle ++(#3,#4);}
+ {\draw[fill=#5] (#1,#2) rectangle ++(#3,#4);}
+ \foreach \x in {\xmin,...,\xmax}{
+ \draw (\x,#2)-- ++(0,#4);
+ }
+ \foreach \y in {\ymin,...,\ymax}{
+ \draw (#1,\y)-- ++(#3,0);
+ }
+}
+
+\newcommand{\amatrix}[7]{%
+ \begin{tikzpicture}[scale=0.2]
+ \ifthenelse{\equal{#7}{}}
+ {}
+ {\draw[draw=none,fill=#7] (#3,#4) rectangle ++(#5,#6);}
+ \mygrid{0}{0}{#1}{#2}{}
+ \end{tikzpicture}%
+}
+
+\newcommand{\gridcube}[3]{% 7,4,6
+
+ \foreach \b in { 0,...,#2 }{
+ \draw (0,\b,0)--++(#1,0,0)--++(0,0,#3);
+ }
+
+ \foreach \d in { 0,...,#1 }{
+ \draw (\d,0,0)--++(0,#2,0)--++(0,0,#3);
+ }
+
+ \foreach \hw in { 0,...,#3 }{
+ \draw (0,0,\hw)++(#1,0,0)--++(0,#2,0)--++(-#1,0,0);
+ }
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{document}
+
+\begin{tikzpicture}[
+ scale=0.2,
+ every text node part/.style={align=center},
+ rounded corners=0.75pt,
+ font=\footnotesize,
+ operator/.style={draw=black,fill=black!10,inner sep=1pt,minimum width=18pt,minimum height=10},
+ ]
+
+ \node[inner sep=1pt] (batchnorm output 1d) {\amatrix{10}{4}{3}{0}{1}{4}{red}};
+ \node[inner sep=1pt,below=1.3cm of batchnorm output 1d] (batchnorm input 1d) {\amatrix{10}{4}{3}{0}{1}{4}{blue}};
+ \draw[->] ($(batchnorm input 1d.north)+(-1.5,0)$) -- ($(batchnorm output 1d.south)+(-1.5,0)$)
+ node[operator,pos=0.45]
+ {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
+
+ \begin{pgfinterruptboundingbox}
+ \draw[->,transform canvas={yshift=3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.north east) node[midway,above] {$D$};
+ \draw[->,transform canvas={xshift=-3pt}] (batchnorm output 1d.north west) -- (batchnorm output 1d.south west) node[midway,left] {$B$};
+ \end{pgfinterruptboundingbox}
+
+ \node[inner sep=1pt,right=1.1cm of batchnorm output 1d] (layernorm output 1d) {\amatrix{10}{4}{0}{2}{10}{1}{red}};
+ \node[inner sep=1pt] (layernorm input 1d) at (layernorm output 1d|-batchnorm input 1d) {\amatrix{10}{4}{0}{2}{10}{1}{blue}};
+
+ \begin{pgfinterruptboundingbox}
+ \draw[->,transform canvas={yshift=3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.north east) node[midway,above] {$D$};
+ \draw[->,transform canvas={xshift=-3pt}] (layernorm output 1d.north west) -- (layernorm output 1d.south west) node[midway,left] {$B$};
+ \end{pgfinterruptboundingbox}
+
+ \draw[->] ($(layernorm input 1d.east)+(0,1)$) to[out=55,in=305]
+ node[operator,pos=0.5]
+ {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
+ %
+ ($(layernorm output 1d.east)+(0,-0.0)$);
+
+ %---------------------------------------------------------
+
+ \node[below=1.65cm of batchnorm input 1d] (batchnorm output 2d) {%
+ \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
+
+ \draw[fill=red] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
+ \gridcube{7}{4}{6}
+ \begin{pgfinterruptboundingbox}
+ \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
+ \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
+ \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
+ node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
+ \end{pgfinterruptboundingbox}
+ \end{tikzpicture}
+ };
+
+ \node[below=1.3cm of batchnorm output 2d] (batchnorm input 2d) {%
+ \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
+ \draw[fill=blue] (2,0,0)--++(0,4,0)--++(0,0,6)--++(1,0,0)--++(0,0,-6)--++(0,-4,0)--cycle;
+ \gridcube{7}{4}{6}
+ \end{tikzpicture}
+ };
+
+ \draw[->] ($(batchnorm input 2d.north)+(-1.5,0)$) -- ($(batchnorm output 2d.south)+(-1.5,0)$)
+ node[operator,pos=0.45]
+ {$\gamma_d \frac{x-\hat{m}_d}{\sqrt{\hat{v}_d+\epsilon}}+\beta_d$};
+
+ \node (layernorm output 2d) at (layernorm output 1d|-batchnorm output 2d) {%
+ \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
+ \draw[fill=red] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
+ \gridcube{7}{4}{6}
+ \begin{pgfinterruptboundingbox}
+ \draw[->,transform canvas={xshift=-4pt}] (0,4,0) -- (0,0,0) node[midway,left] {$B$};
+ \draw[->,transform canvas={yshift=4pt}] (0,4,6) -- ++(7,0,0) node[midway,above] {$D$};
+ \draw[->,transform canvas={xshift=-4pt,yshift=2pt}] (0,4,6) -- ++(0,0,-6)
+ node[midway,left,xshift=3pt,yshift=4pt] {$H,W$};
+ \end{pgfinterruptboundingbox}
+
+ \end{tikzpicture}
+ };
+
+ \node (layernorm input 2d) at (layernorm input 1d|-batchnorm input 2d) {%
+ \begin{tikzpicture}[line join=bevel,x={(0:0.2cm)},y={(90:0.2cm)},z={(50:0.1cm)}]
+ \draw[fill=blue] (0,2,0)--++(7,0,0)--++(0,0,6)--++(0,1,0)--++(0,0,-6)--++(-7,0,0)--cycle;
+ \gridcube{7}{4}{6}
+ \end{tikzpicture}
+ };
+
+
+ \draw[->] ($(layernorm input 2d.east)+(-1,2)$) to[out=55,in=305]
+ node[operator,pos=0.4]
+ {$\gamma_b \frac{x-\hat{m}_b}{\sqrt{\hat{v}_b+\epsilon}}+\beta_b$}
+ %
+ ($(layernorm output 2d.east)+(-1,1.0)$);
+
+ \node[below=5pt of batchnorm input 1d] {batchnorm};
+ \node[below=5pt of layernorm input 1d] {layernorm};
+ \node[below=5pt of batchnorm input 2d] {batchnorm 2d};
+ \node[below=5pt of layernorm input 2d] {layernorm 2d};
+
+\end{tikzpicture}
+
+\end{document}