Updates from Twitter @michaelcarilli

author Francois Fleuret <francois@fleuret.org>

Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)

committer Francois Fleuret <francois@fleuret.org>

Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)
author Francois Fleuret <francois@fleuret.org>
Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)
committer Francois Fleuret <francois@fleuret.org>
Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)
diff --git a/speed.py b/speed.py

index f682a16..10a008f 100755 (executable)
--- a/speed.py
+++ b/speed.py
@@ -4,20 +4,24 @@ import time, torch
  
  if torch.cuda.is_available():
      device = torch.device('cuda')
+    sync = lambda: torch.cuda.synchronize()
  else:
      device = torch.device('cpu')
+    sync = lambda: None
  
  nb_runs = 10000
-d1, d2, d3 = 50000, 256, 512
+d1, d2, d3 = 2048, 2048, 2048
  
  a, b = torch.rand(d1, d2).to(device), torch.rand(d2, d3).to(device)
  
+sync
  start_time = time.perf_counter()
  for k in range(nb_runs):
      c = a @ b
+sync
  duration = time.perf_counter() - start_time
  
-nb_flop = float(nb_runs * d1 * d2 * d3)
+nb_flop = float(nb_runs * d1 * d2 * d3 * 2) # 1 multiply-and-add is 2 ops
  speed = nb_flop / duration
  
  for u in [ '', 'K', 'M', 'G', 'T', 'P' ]:
author	Francois Fleuret <francois@fleuret.org>
	Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)
committer	Francois Fleuret <francois@fleuret.org>
	Wed, 2 Sep 2020 20:13:28 +0000 (22:13 +0200)