2
2
from __future__ import absolute_import
3
3
from __future__ import division
4
4
5
+ import time
5
6
import timeit
6
7
import numpy as np
7
8
import torch .backends .cudnn as cudnn
@@ -103,7 +104,10 @@ def run_torch_tensorrt(
103
104
if precision == "int8" :
104
105
compile_settings .update ({"calib" : params .get ("calibration_cache" )})
105
106
107
+ start_compile = time .time_ns ()
106
108
model = torchtrt .compile (model , ** compile_settings )
109
+ end_compile = time .time_ns ()
110
+ compile_time_ms = (end_compile - start_compile ) / 1e6
107
111
108
112
iters = params .get ("iterations" , 20 )
109
113
# Warm up
@@ -123,7 +127,7 @@ def run_torch_tensorrt(
123
127
meas_time = end_time - start_time
124
128
timings .append (meas_time )
125
129
126
- recordStats ("Torch-TensorRT" , timings , precision , batch_size )
130
+ recordStats ("Torch-TensorRT" , timings , precision , batch_size , compile_time_ms )
127
131
128
132
129
133
# Runs inference using FX2TRT backend
@@ -136,13 +140,16 @@ def run_fx2trt(model, input_tensors, params, precision, batch_size):
136
140
model .half ()
137
141
input_tensors = [tensor .half () for tensor in input_tensors ]
138
142
# Run lowering eager mode benchmark
143
+ start_compile = time .time_ns ()
139
144
model = compile (
140
145
model ,
141
146
input_tensors ,
142
147
max_batch_size = batch_size ,
143
148
lower_precision = precision ,
144
149
verbose_log = False ,
145
150
)
151
+ end_compile = time .time_ns ()
152
+ compile_time_ms = (end_compile - start_compile ) / 1e6
146
153
147
154
iters = params .get ("iterations" , 20 )
148
155
# Warm up
@@ -162,7 +169,7 @@ def run_fx2trt(model, input_tensors, params, precision, batch_size):
162
169
meas_time = end_time - start_time
163
170
timings .append (meas_time )
164
171
165
- recordStats ("FX-TensorRT" , timings , precision , batch_size )
172
+ recordStats ("FX-TensorRT" , timings , precision , batch_size , compile_time_ms )
166
173
167
174
168
175
def torch_dtype_from_trt (dtype ):
@@ -331,7 +338,7 @@ def run(
331
338
332
339
333
340
# Generate report
334
- def recordStats (backend , timings , precision , batch_size = 1 ):
341
+ def recordStats (backend , timings , precision , batch_size = 1 , compile_time_ms = None ):
335
342
times = np .array (timings )
336
343
steps = len (times )
337
344
speeds = batch_size / times
@@ -350,6 +357,7 @@ def recordStats(backend, timings, precision, batch_size=1):
350
357
"Mean(FPS)" : speed_mean ,
351
358
"Median-Latency(ms)" : time_med * 1000 ,
352
359
"Mean-Latency(ms)" : time_mean * 1000 ,
360
+ "Compile Time(ms)" : compile_time_ms ,
353
361
}
354
362
results .append (stats )
355
363
0 commit comments