rnn_example.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. # -*- codeing: utf-8 -*-
  2. import numpy as np
  3. import tensorflow as tf
  4. # 以下两行用于解决错误: _tkinter.TclError: no display name and no $DISPLAY environment variable
  5. # import matplotlib as mpl
  6. # mpl.use("Agg")
  7. import matplotlib.pyplot as plt
  8. def rnn_forward_prop():
  9. # define input
  10. x = [1, 2]
  11. state = [0.0, 0.0]
  12. # define inner weight and bias
  13. w_cell_state = np.asarray([[0.1, 0.2], [0.3, 0.4]])
  14. w_cell_input = np.asarray([0.5, 0.6])
  15. b_cell = np.asarray([0.1, -0.1])
  16. # define output weight and bias
  17. w_output = np.asarray([1.0, 2.0])
  18. b_outoput = 0.1
  19. for i in range(len(x)):
  20. before_activation = np.dot(state, w_cell_state) + x[i] * w_cell_input + b_cell
  21. # tanh as activation function
  22. state = np.tanh(before_activation)
  23. final_output = np.dot(state, w_output) + b_outoput
  24. print("before activation: ", before_activation)
  25. print("state: ", state)
  26. print("output: ", final_output, "\n")
  27. def lstm_structure():
  28. lstm_hidden_size = 1
  29. batch_size = 10
  30. num_steps = 10 # the length of data
  31. num_of_layers = 5
  32. lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_hidden_size)
  33. # 带dropout的多层循环神经网络
  34. stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(
  35. [tf.nn.rnn_cell.DropoutWrapper(lstm(lstm_hidden_size)) for _ in range(num_of_layers)]
  36. )
  37. # state is a LSTEMStateTuple instance with two Tensors, state.c and state.h
  38. state = lstm.zero_state(batch_size, tf.float32)
  39. loss = 0.0
  40. for i in range(num_steps):
  41. # assign variables in the first time step, and reuse variables in the following steps
  42. if i > 0:
  43. tf.get_variable_scope().reuse_variables()
  44. # current_input represent xt, input prev state ht-1 and ct-1
  45. # lstm_output can be sent to other layers, state can be used for the next time step
  46. lstm_output, state = lstm(current_input, state)
  47. # connect to a fc layer to generate the final output
  48. final_output = fully_connected(lstm_output)
  49. # calc current loss
  50. loss += calc_loss(final_output, expected_output)
  51. HIDDEN_SIZE = 30 # No. of hidden node
  52. NUM_LAYERS = 2
  53. TIMESTEPS = 10
  54. TRAINING_STEPS = 10000
  55. BATCH_SIZE = 32
  56. TRAINING_EXAMPLES = 10000
  57. TEST_EXAMPLES = 1000
  58. SAMPlE_GAP = 0.01 # 采样间隔
  59. def generate_data(seq):
  60. x = []
  61. y = []
  62. # input from i to i+TIMESTEPS-1
  63. # output i+TIMESTEPS, which uses TIMESTEPS samples to predict the TIMESTEPSth result
  64. for i in range(len(seq) - TIMESTEPS):
  65. x.append([seq[i:i + TIMESTEPS]])
  66. y.append([seq[i + TIMESTEPS]])
  67. return np.array(x, dtype=np.float32), np.array(y, dtype=np.float32)
  68. def lstm_model(x, y, is_training):
  69. with tf.name_scope("rnn"):
  70. cell = tf.nn.rnn_cell.MultiRNNCell([
  71. tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)
  72. ])
  73. # output for every timestep, shape=[batch_size, time, HIDDEN_SIZE]
  74. outputs, _ = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
  75. output = outputs[:, -1, :]
  76. # add a fc layer for output
  77. predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
  78. if not is_training:
  79. return predictions, None, None
  80. with tf.name_scope("loss"):
  81. loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
  82. # create model optimizer
  83. with tf.name_scope("train"):
  84. train_op = tf.contrib.layers.optimize_loss(
  85. loss, tf.train.get_global_step(),
  86. optimizer="Adagrad", learning_rate=0.1
  87. )
  88. return predictions, loss, train_op
  89. def train(sess, train_x, train_y, writer):
  90. # generate dataset
  91. dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
  92. dataset = dataset.repeat().shuffle(1000).batch(BATCH_SIZE)
  93. x, y = dataset.make_one_shot_iterator().get_next()
  94. with tf.variable_scope("model"):
  95. predictions, loss, train_op = lstm_model(x, y, True)
  96. sess.run(tf.global_variables_initializer())
  97. merged = tf.summary.merge_all()
  98. for i in range(TRAINING_STEPS):
  99. if i % 500 == 0:
  100. # config necessary info when training
  101. run_options = tf.RunOptions(
  102. trace_level=tf.RunOptions.FULL_TRACE
  103. )
  104. # record proto when training
  105. run_metadata = tf.RunMetadata()
  106. summary, _, l = sess.run([merged, train_op, loss], options=run_options, run_metadata=run_metadata)
  107. writer.add_run_metadata(run_metadata, 'step%03d' % i)
  108. writer.add_summary(summary, i)
  109. print("train step: " + str(i) + ", loss: " + str(l))
  110. else:
  111. _, l = sess.run([train_op, loss])
  112. def run_eval(sess, test_x, test_y):
  113. # generate dataset
  114. dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y))
  115. dataset = dataset.batch(1)
  116. x, y = dataset.make_one_shot_iterator().get_next()
  117. with tf.variable_scope("model", reuse=True):
  118. # unnecessary to input real y value
  119. prediction, _, _ = lstm_model(x, [0.0], False)
  120. predictions = []
  121. labels = []
  122. for i in range(TEST_EXAMPLES):
  123. p, l = sess.run([prediction, y])
  124. predictions.append(p)
  125. labels.append(l)
  126. predictions = np.array(predictions).squeeze()
  127. labels = np.array(labels).squeeze()
  128. # root mean square error
  129. rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
  130. print("MSE: %.4f" % rmse)
  131. # draw fitting lines
  132. plt.figure(1)
  133. plt.plot(predictions, label="predictions")
  134. plt.plot(labels, label="real_sin")
  135. plt.legend()
  136. plt.show()
  137. def sample_generator(data):
  138. amps = [0.6, 0.2, 0.8]
  139. phases = [0.1, 0.4, 0.7]
  140. return amps[0] * np.sin(data + np.pi * phases[0]) + amps[1] * np.sin(data + np.pi * phases[1]) + amps[2] * np.sin(
  141. data + np.pi * phases[2]) # + np.random.rand(len(data))*0.02
  142. def rnn_example():
  143. test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPlE_GAP
  144. test_end = test_start * 2
  145. train_x, train_y = generate_data(
  146. sample_generator(np.linspace(0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
  147. test_x, test_y = generate_data(
  148. sample_generator(np.linspace(test_start, test_end, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
  149. # writer = tf.summary.FileWriter("log", tf.get_default_graph())
  150. # with tf.device("/gpu:0"):
  151. with tf.Session() as sess:
  152. writer = tf.summary.FileWriter("log", sess.graph)
  153. print("train")
  154. train(sess, train_x, train_y, writer)
  155. print("eval")
  156. run_eval(sess, test_x, test_y)
  157. writer.close()
  158. def main():
  159. # rnn_forward_prop()
  160. rnn_example()
  161. if __name__ == "__main__":
  162. main()