quantize_face_detector.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. from __future__ import print_function
  2. import sys
  3. import argparse
  4. import cv2 as cv
  5. import tensorflow as tf
  6. import numpy as np
  7. import struct
  8. if sys.version_info > (3,):
  9. long = int
  10. from tensorflow.python.tools import optimize_for_inference_lib
  11. from tensorflow.tools.graph_transforms import TransformGraph
  12. from tensorflow.core.framework.node_def_pb2 import NodeDef
  13. from google.protobuf import text_format
  14. parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph "
  15. "with weights from OpenCV's face detection network. "
  16. "Only backbone part of SSD model is converted this way. "
  17. "Look for .pbtxt configuration file at "
  18. "https://github.com/opencv/opencv_extra/tree/4.x/testdata/dnn/opencv_face_detector.pbtxt")
  19. parser.add_argument('--model', help='Path to .caffemodel weights', required=True)
  20. parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True)
  21. parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True)
  22. parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True)
  23. parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true')
  24. parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true')
  25. args = parser.parse_args()
  26. assert(not args.quantize or not args.fp16)
  27. dtype = tf.float16 if args.fp16 else tf.float32
  28. ################################################################################
  29. cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model)
  30. def dnnLayer(name):
  31. return cvNet.getLayer(long(cvNet.getLayerId(name)))
  32. def scale(x, name):
  33. with tf.variable_scope(name):
  34. layer = dnnLayer(name)
  35. w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
  36. if len(layer.blobs) > 1:
  37. b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add')
  38. return tf.nn.bias_add(tf.multiply(x, w), b)
  39. else:
  40. return tf.multiply(x, w, name)
  41. def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None):
  42. with tf.variable_scope(name):
  43. layer = dnnLayer(name)
  44. w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights')
  45. if dilation == 1:
  46. conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad)
  47. else:
  48. assert(stride == 1)
  49. conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad)
  50. if len(layer.blobs) > 1:
  51. b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias')
  52. conv = tf.nn.bias_add(conv, b)
  53. return activ(conv) if activ else conv
  54. def batch_norm(x, name):
  55. with tf.variable_scope(name):
  56. # Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.
  57. # Here we do a cast to fp32 but remove it in the frozen graph.
  58. if x.dtype != tf.float32:
  59. x = tf.cast(x, tf.float32)
  60. layer = dnnLayer(name)
  61. assert(len(layer.blobs) >= 3)
  62. mean = layer.blobs[0].flatten()
  63. std = layer.blobs[1].flatten()
  64. scale = layer.blobs[2].flatten()
  65. eps = 1e-5
  66. hasBias = len(layer.blobs) > 3
  67. hasWeights = scale.shape != (1,)
  68. if not hasWeights and not hasBias:
  69. mean /= scale[0]
  70. std /= scale[0]
  71. mean = tf.Variable(mean, dtype=tf.float32, name='mean')
  72. std = tf.Variable(std, dtype=tf.float32, name='std')
  73. gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma')
  74. beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta')
  75. bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps,
  76. is_training=False)[0]
  77. if bn.dtype != dtype:
  78. bn = tf.cast(bn, dtype)
  79. return bn
  80. def l2norm(x, name):
  81. with tf.variable_scope(name):
  82. layer = dnnLayer(name)
  83. w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
  84. return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w
  85. ### Graph definition ###########################################################
  86. inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data')
  87. data_bn = batch_norm(inp, 'data_bn')
  88. data_scale = scale(data_bn, 'data_scale')
  89. # Instead of tf.pad we use tf.space_to_batch_nd layers which override convolution's padding strategy to explicit numbers
  90. # data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]])
  91. data_scale = tf.space_to_batch_nd(data_scale, [1, 1], [[3, 3], [3, 3]], name='Pad')
  92. conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h')
  93. conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h')
  94. conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h')
  95. conv1_relu = tf.nn.relu(conv1_scale_h)
  96. conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2),
  97. padding='SAME', name='conv1_pool')
  98. layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h')
  99. layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h')
  100. layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h')
  101. layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h)
  102. layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h')
  103. layer_64_1_sum = layer_64_1_conv2_h + conv1_pool
  104. layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h')
  105. layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h')
  106. layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h)
  107. layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h')
  108. layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2')
  109. layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2')
  110. layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2)
  111. layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2')
  112. layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h')
  113. layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h
  114. layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1')
  115. layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1')
  116. layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1)
  117. # layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]])
  118. layer_256_1_conv1 = tf.space_to_batch_nd(layer_256_1_relu1, [1, 1], [[1, 1], [1, 1]], name='Pad_1')
  119. layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1')
  120. layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2')
  121. layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2')
  122. layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2)
  123. layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2')
  124. layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand')
  125. layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand
  126. layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1')
  127. layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1')
  128. layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1)
  129. layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h')
  130. layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h')
  131. layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h')
  132. layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h)
  133. layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h')
  134. layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h')
  135. layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h
  136. last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h')
  137. last_scale_h = scale(last_bn_h, 'last_scale_h')
  138. fc7 = tf.nn.relu(last_scale_h, name='last_relu')
  139. conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu)
  140. conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu)
  141. conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu)
  142. # conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]])
  143. conv7_2_h = tf.space_to_batch_nd(conv7_1_h, [1, 1], [[1, 1], [1, 1]], name='Pad_2')
  144. conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu)
  145. conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu)
  146. conv8_2_h = conv(conv8_1_h, pad='VALID', name='conv8_2_h', activ=tf.nn.relu)
  147. conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu)
  148. conv9_2_h = conv(conv9_1_h, pad='VALID', name='conv9_2_h', activ=tf.nn.relu)
  149. conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm')
  150. ### Locations and confidences ##################################################
  151. locations = []
  152. confidences = []
  153. flattenLayersNames = [] # Collect all reshape layers names that should be replaced to flattens.
  154. for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']):
  155. for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h],
  156. ['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']):
  157. name += suffix
  158. flat = tf.layers.flatten(conv(bottom, name))
  159. flattenLayersNames.append(flat.name[:flat.name.find(':')])
  160. top.append(flat)
  161. mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc')
  162. mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf')
  163. total = int(np.prod(mbox_conf.shape[1:]))
  164. mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape')
  165. mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax')
  166. mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten')
  167. flattenLayersNames.append('mbox_conf_flatten')
  168. with tf.Session() as sess:
  169. sess.run(tf.global_variables_initializer())
  170. ### Check correctness ######################################################
  171. out_nodes = ['mbox_loc', 'mbox_conf_flatten']
  172. inp_nodes = [inp.name[:inp.name.find(':')]]
  173. np.random.seed(2701)
  174. inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32)
  175. cvNet.setInput(inputData)
  176. cvNet.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
  177. outDNN = cvNet.forward(out_nodes)
  178. outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)})
  179. print('Max diff @ locations: %e' % np.max(np.abs(outDNN[0] - outTF[0])))
  180. print('Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])))
  181. # Save a graph
  182. graph_def = sess.graph.as_graph_def()
  183. # Freeze graph. Replaces variables to constants.
  184. graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes)
  185. # Optimize graph. Removes training-only ops, unused nodes.
  186. graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum)
  187. # Fuse constant operations.
  188. transforms = ["fold_constants(ignore_errors=True)"]
  189. if args.quantize:
  190. transforms += ["quantize_weights(minimum_size=0)"]
  191. transforms += ["sort_by_execution_order"]
  192. graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms)
  193. # By default, float16 weights are stored in repeated tensor's field called
  194. # `half_val`. It has type int32 with leading zeros for unused bytes.
  195. # This type is encoded by Variant that means only 7 bits are used for value
  196. # representation but the last one is indicated the end of encoding. This way
  197. # float16 might takes 1 or 2 or 3 bytes depends on value. To improve compression,
  198. # we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.
  199. for node in graph_def.node:
  200. if 'value' in node.attr:
  201. halfs = node.attr["value"].tensor.half_val
  202. if not node.attr["value"].tensor.tensor_content and halfs:
  203. node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)
  204. node.attr["value"].tensor.ClearField('half_val')
  205. # Serialize
  206. with tf.gfile.FastGFile(args.pb, 'wb') as f:
  207. f.write(graph_def.SerializeToString())
  208. ################################################################################
  209. # Write a text graph representation
  210. ################################################################################
  211. def tensorMsg(values):
  212. msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)
  213. for value in values:
  214. msg += 'float_val: %f ' % value
  215. return msg + '}'
  216. # Remove Const nodes and unused attributes.
  217. for i in reversed(range(len(graph_def.node))):
  218. if graph_def.node[i].op in ['Const', 'Dequantize']:
  219. del graph_def.node[i]
  220. for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',
  221. 'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',
  222. 'Tpaddings', 'Tblock_shape', 'Tcrops']:
  223. if attr in graph_def.node[i].attr:
  224. del graph_def.node[i].attr[attr]
  225. # Append prior box generators
  226. min_sizes = [30, 60, 111, 162, 213, 264]
  227. max_sizes = [60, 111, 162, 213, 264, 315]
  228. steps = [8, 16, 32, 64, 100, 300]
  229. aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
  230. layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h]
  231. for i in range(6):
  232. priorBox = NodeDef()
  233. priorBox.name = 'PriorBox_%d' % i
  234. priorBox.op = 'PriorBox'
  235. priorBox.input.append(layers[i].name[:layers[i].name.find(':')])
  236. priorBox.input.append(inp_nodes[0]) # data
  237. text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"])
  238. text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"])
  239. text_format.Merge('b: true', priorBox.attr["flip"])
  240. text_format.Merge('b: false', priorBox.attr["clip"])
  241. text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"])
  242. text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
  243. text_format.Merge('f: %f' % steps[i], priorBox.attr["step"])
  244. text_format.Merge('f: 0.5', priorBox.attr["offset"])
  245. graph_def.node.extend([priorBox])
  246. # Concatenate prior boxes
  247. concat = NodeDef()
  248. concat.name = 'mbox_priorbox'
  249. concat.op = 'ConcatV2'
  250. for i in range(6):
  251. concat.input.append('PriorBox_%d' % i)
  252. concat.input.append('mbox_loc/axis')
  253. graph_def.node.extend([concat])
  254. # DetectionOutput layer
  255. detectionOut = NodeDef()
  256. detectionOut.name = 'detection_out'
  257. detectionOut.op = 'DetectionOutput'
  258. detectionOut.input.append('mbox_loc')
  259. detectionOut.input.append('mbox_conf_flatten')
  260. detectionOut.input.append('mbox_priorbox')
  261. text_format.Merge('i: 2', detectionOut.attr['num_classes'])
  262. text_format.Merge('b: true', detectionOut.attr['share_location'])
  263. text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
  264. text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold'])
  265. text_format.Merge('i: 400', detectionOut.attr['top_k'])
  266. text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
  267. text_format.Merge('i: 200', detectionOut.attr['keep_top_k'])
  268. text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
  269. graph_def.node.extend([detectionOut])
  270. # Replace L2Normalization subgraph onto a single node.
  271. for i in reversed(range(len(graph_def.node))):
  272. if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square',
  273. 'conv4_3_norm/l2_normalize/Sum',
  274. 'conv4_3_norm/l2_normalize/Maximum',
  275. 'conv4_3_norm/l2_normalize/Rsqrt']:
  276. del graph_def.node[i]
  277. for node in graph_def.node:
  278. if node.name == 'conv4_3_norm/l2_normalize':
  279. node.op = 'L2Normalize'
  280. node.input.pop()
  281. node.input.pop()
  282. node.input.append(layer_256_1_relu1.name)
  283. node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
  284. break
  285. softmaxShape = NodeDef()
  286. softmaxShape.name = 'reshape_before_softmax'
  287. softmaxShape.op = 'Const'
  288. text_format.Merge(
  289. 'tensor {'
  290. ' dtype: DT_INT32'
  291. ' tensor_shape { dim { size: 3 } }'
  292. ' int_val: 0'
  293. ' int_val: -1'
  294. ' int_val: 2'
  295. '}', softmaxShape.attr["value"])
  296. graph_def.node.extend([softmaxShape])
  297. for node in graph_def.node:
  298. if node.name == 'mbox_conf_reshape':
  299. node.input[1] = softmaxShape.name
  300. elif node.name == 'mbox_conf_softmax':
  301. text_format.Merge('i: 2', node.attr['axis'])
  302. elif node.name in flattenLayersNames:
  303. node.op = 'Flatten'
  304. inpName = node.input[0]
  305. node.input.pop()
  306. node.input.pop()
  307. node.input.append(inpName)
  308. tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True)