Prechádzať zdrojové kódy

implement LeNet5 structure using dataset read from tfrecord. code debug. image sometimes stored as uint8 or float32, which need to be clear in mind when using the dataset. Some image preprocessing functions may require 3 channels image rather than grayscale image, such as modify saturation, hue and contrast.

youchen 5 rokov pred
rodič
commit
448ad3851d

+ 1 - 0
.gitignore

@@ -1,5 +1,6 @@
 inceptionv3/model/*
 inceptionv3/preprocess/*
+*.tfrecord
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

+ 0 - 0
__init__.py


+ 0 - 0
img_proc/__init__.py


+ 126 - 0
img_proc/mnist_eval.py

@@ -0,0 +1,126 @@
+# -*- coding: utf8 -*-
+import time
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+import matplotlib.pyplot as plt
+import numpy as np
+import mnist_inference
+import mnist_train
+from numpy.random import RandomState
+import os
+
+# generate new random dataset for test in 3 secs after close figure window manually
+EVAL_INTERVAL_SECS = 3
+NUMBER_OF_SAMPLES = 36
+FIG_ROWS = 3
+
+
+# display images and recognition result rather than accuracy diagram
+def evaluation(mnist):
+    with tf.Graph().as_default() as g:
+        x = tf.placeholder(tf.float32, [NUMBER_OF_SAMPLES,
+                                        mnist_inference.IMAGE_SIZE,
+                                        mnist_inference.IMAGE_SIZE,
+                                        mnist_inference.NUM_CHANNELS], name='x-input')
+        y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='input-y')
+
+        # replace accuracy with actual recognition result
+        y = mnist_inference.inference(x, False, None)
+        indices = tf.argmax(y, 1)
+        correct_indices = tf.argmax(y_, 1)
+
+        # correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+        # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+
+        variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
+        variables_to_restore = variable_averages.variables_to_restore()
+        saver = tf.train.Saver(variables_to_restore)
+
+        while True:
+            # configure TF to allocate mem properly, rather than consume all GPU mem
+            config = tf.ConfigProto(allow_soft_placement=True)
+            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
+            config.gpu_options.allow_growth = True
+            with tf.Session(config=config) as sess:
+                ckpt = tf.train.get_checkpoint_state(
+                    mnist_train.MODEL_SAVE_PATH
+                )
+                if ckpt and ckpt.model_checkpoint_path:
+                    saver.restore(sess, ckpt.model_checkpoint_path)
+                    rdm = RandomState(int(time.time()))
+                    sample_index = rdm.randint(0, mnist.validation.num_examples - NUMBER_OF_SAMPLES)
+                    xs = mnist.validation.images[sample_index:sample_index + NUMBER_OF_SAMPLES]
+                    validation_feed = {
+                        x: np.reshape(xs, (NUMBER_OF_SAMPLES,
+                                           mnist_inference.IMAGE_SIZE,
+                                           mnist_inference.IMAGE_SIZE,
+                                           mnist_inference.NUM_CHANNELS)),
+                        y_: mnist.validation.labels[sample_index:sample_index + NUMBER_OF_SAMPLES]}
+
+                    # txs = mnist.test.images
+                    # test_feed = {
+                    #     x: np.reshape(txs, (mnist.test.num_examples,
+                    #                         mnist_inference.IMAGE_SIZE,
+                    #                         mnist_inference.IMAGE_SIZE,
+                    #                         mnist_inference.NUM_CHANNELS)),
+                    #     y_: mnist.test.labels}
+                    # # define accuracy score, generate image
+                    # accuracy_score = sess.run(accuracy, feed_dict=test_feed)
+
+                    # get global step from file name
+                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
+                    # print("after %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
+
+                    indices_score, correct_indices_score = sess.run(
+                        [indices, correct_indices], feed_dict=validation_feed)
+                    print("after %s training step(s), validation result = \n%s\n, correct answer: \n%s" \
+                          % (global_step, indices_score, correct_indices_score))
+                    fig = plt.figure(1)
+                    fig.set_size_inches(15, 6)
+                    for n in range(1, NUMBER_OF_SAMPLES + 1):
+                        fig.add_subplot(FIG_ROWS, (NUMBER_OF_SAMPLES / FIG_ROWS + 1), n)
+                        plt.title("predict: [%s]\nanswer: [%s]"
+                                  % (indices_score[n - 1], correct_indices_score[n - 1]))
+                        plt.imshow(mnist.validation.images[sample_index + n - 1].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 1)
+                    # plt.imshow(mnist.validation.images[sample_index].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 2)
+                    # plt.imshow(mnist.validation.images[sample_index + 1].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 3)
+                    # plt.imshow(mnist.validation.images[sample_index + 2].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 4)
+                    # plt.imshow(mnist.validation.images[sample_index + 3].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 5)
+                    # plt.imshow(mnist.validation.images[sample_index + 4].reshape(28, 28))
+                    # fig.add_subplot(2, 3, 6)
+                    # plt.imshow(mnist.validation.images[sample_index + 5].reshape(28, 28))
+                    plt.subplots_adjust(
+                        top=0.95, bottom=0.05, left=0.05, right=0.95, hspace=0.35, wspace=0.6)
+                    try:
+                        os.mkdir('images/')
+                    except:
+                        print("directory already exist")
+
+                    plt.savefig('images/mnist_result_evaluation.jpg', format='jpg')
+                    plt.show()
+
+                else:
+                    print("no checkpoint file found")
+                    return
+
+            time.sleep(EVAL_INTERVAL_SECS)
+
+
+def main(argv=None):
+    mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)
+    print("basic information of mnist dataset")
+    print("mnist training data size: ", mnist.train.num_examples)
+    print("mnist validating data size: ", mnist.validation.num_examples)
+    print("mnist testing data size: ", mnist.test.num_examples)
+    # print("mnist example training data: ", mnist.train.images[0])
+    # print("mnist example training data label", mnist.train.labels[0])
+    evaluation(mnist)
+
+
+if __name__ == '__main__':
+    tf.app.run()

+ 82 - 0
img_proc/mnist_inference.py

@@ -0,0 +1,82 @@
+# -*- coding: utf8 -*-
+import tensorflow as tf
+
+# define basic params
+INPUT_NODE = 784
+OUTPUT_NODE = 10
+
+IMAGE_SIZE = 28
+NUM_CHANNELS = 1
+NUM_LABELS = 10
+
+CONV1_DEPTH = 6
+CONV1_SIZE = 5
+
+CONV2_DEPTH = 16
+CONV2_SIZE = 5
+
+FC_SIZE = 84
+
+
+def inference(input_tensor, train, regularizer):
+    # print(input_tensor.get_shape())
+    # define layer1 forward propagation
+    with tf.variable_scope('layer1-conv1'):
+        conv1_weights = tf.get_variable(
+            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEPTH],
+            initializer=tf.truncated_normal_initializer(stddev=0.1)
+        )
+        conv1_biases = tf.get_variable("bias", [CONV1_DEPTH], initializer=tf.constant_initializer(0.0))
+        # strides 中间两项表示长宽方向步长1
+        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
+        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
+    # define layer2 forward propagation, max pooling, size 2*2, step 2*2, all 0 filling
+    with tf.variable_scope('layer2-pool1'):
+        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    # print(pool1.get_shape())
+    with tf.variable_scope('layer3-conv2'):
+        conv2_weights = tf.get_variable(
+            "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEPTH, CONV2_DEPTH],
+            initializer=tf.truncated_normal_initializer(stddev=0.1)
+        )
+        conv2_biases = tf.get_variable("bias", [CONV2_DEPTH], initializer=tf.constant_initializer(0.0))
+        # size 5*5, depth 64, step 1, all 0 filling
+        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
+        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
+
+    with tf.variable_scope('layer4-poll2'):
+        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    # print(pool2.get_shape())
+    # pool_shape[0] means the num of data from a batch, get_shape->[num, width, height, depth]
+    pool_shape = pool2.get_shape().as_list()
+    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
+    reshaped = tf.reshape(pool2, [tf.shape(pool2)[0], nodes])
+    # print(reshaped.get_shape())
+    with tf.variable_scope('layer5-fc1'):
+        fc1_weights = tf.get_variable(
+            'weights',
+            [nodes, FC_SIZE],
+            initializer=tf.truncated_normal_initializer(stddev=0.1)
+        )
+        # fc layer regularize
+        if regularizer is not None:
+            tf.add_to_collection('losses', regularizer(fc1_weights))
+        fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(0.1))
+
+        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
+        if train:
+            fc1 = tf.nn.dropout(fc1, 0.5)
+
+    with tf.variable_scope('layer6-fc2'):
+        fc2_weight = tf.get_variable(
+            'weight',
+            [FC_SIZE, NUM_LABELS],
+            initializer=tf.truncated_normal_initializer(stddev=0.1)
+        )
+        if regularizer is not None:
+            tf.add_to_collection('losses', regularizer(fc2_weight))
+        fc2_biases = tf.get_variable('bias', [NUM_LABELS], initializer=tf.constant_initializer(0.1))
+
+        logit = tf.matmul(fc1, fc2_weight) + fc2_biases
+
+    return logit

+ 140 - 0
img_proc/mnist_train.py

@@ -0,0 +1,140 @@
+# -*- coding: utf8 -*-
+import os
+
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+import numpy as np
+import matplotlib.pyplot as plt
+import mnist_inference
+
+# define input, output, batch and training params
+
+BATCH_SIZE = 50
+LEARNING_RATE_BASE = 0.8
+LEARNING_RATE_DECAY = 0.99
+REGULARIZATION_RATE = 0.0001
+TRAINING_STEPS = 10000
+MOVING_AVERAGE_DECAY = 0.99
+
+MODEL_SAVE_PATH = "model/"
+MODEL_NAME = "model.ckpt"
+score_filename = "accuracy_score_cnn.txt"
+
+
+# train a convolutional neural network
+def train(mnist, continue_train=False):
+    x = tf.placeholder(tf.float32, [BATCH_SIZE,
+                                    mnist_inference.IMAGE_SIZE,
+                                    mnist_inference.IMAGE_SIZE,
+                                    mnist_inference.NUM_CHANNELS], name='x-input')
+    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
+
+    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
+    y = mnist_inference.inference(x, True, regularizer)
+    global_step = tf.Variable(0, trainable=False)
+
+    # moving average, cross entropy, loss function with regularization and learning rate
+    variable_average = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
+    variable_average_op = variable_average.apply(tf.trainable_variables())
+    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
+    cross_entropy_mean = tf.reduce_mean(cross_entropy)
+    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
+    learning_rate = tf.train.exponential_decay(
+        LEARNING_RATE_BASE,
+        global_step,
+        mnist.train.num_examples / BATCH_SIZE,
+        LEARNING_RATE_DECAY
+    )
+
+    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
+    with tf.control_dependencies([train_step, variable_average_op]):
+        train_op = tf.no_op(name='train')
+
+    # initialize persistence class
+    saver = tf.train.Saver()
+
+    config = tf.ConfigProto(allow_soft_placement=True)
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
+    config.gpu_options.allow_growth = True
+    with tf.Session(config=config) as sess:
+        if continue_train:
+            ckpt = tf.train.get_checkpoint_state(
+                MODEL_SAVE_PATH
+            )
+            if ckpt and ckpt.model_checkpoint_path:
+                saver.restore(sess, ckpt.model_checkpoint_path)
+        else:
+            sess.run(tf.global_variables_initializer())
+        # create directory
+        try:
+            os.mkdir(MODEL_SAVE_PATH)
+        except:
+            print("directory already exist")
+
+        # define accuracy
+        correct_prediction = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_, 1))
+        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+        test_result = list(range(int(TRAINING_STEPS / 1000)))
+
+        for i in range(TRAINING_STEPS):
+            xs, ys = mnist.train.next_batch(BATCH_SIZE)
+            reshaped_xs = np.reshape(xs, (
+                BATCH_SIZE,
+                mnist_inference.IMAGE_SIZE,
+                mnist_inference.IMAGE_SIZE,
+                mnist_inference.NUM_CHANNELS))
+
+            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})
+
+            txs = mnist.test.images[0:BATCH_SIZE]
+            test_feed = {
+                x: np.reshape(txs, (BATCH_SIZE,
+                                    mnist_inference.IMAGE_SIZE,
+                                    mnist_inference.IMAGE_SIZE,
+                                    mnist_inference.NUM_CHANNELS)),
+                y_: mnist.test.labels[0:BATCH_SIZE]}
+
+            accuracy_score = sess.run(accuracy, feed_dict=test_feed)
+            test_result[int(i / 1000)] = accuracy_score
+
+            if i % 1000 == 0:
+                print("after %d training step(s), loss on training batch is %g , validation accuracy = %g" % (
+                    step, loss_value, accuracy_score))
+                saver.save(
+                    sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step
+                )
+
+    # np.savetxt(score_filename, test_result, fmt="%0.4f")
+    #
+    # dispImg(test_result, 'accuracy_score')
+    # plt.show()
+
+
+def dispImg(test_result, filename):
+    # draw a graph of accuracy using matplotlib
+    iteration_count = range(0, TRAINING_STEPS, 1000)
+    plt.figure(num=1, figsize=(15, 8))
+    plt.title("Plot accuracy", size=20)
+    plt.xlabel("iteration count", size=14)
+    plt.ylabel("accuracy/%", size=14)
+    test_note = [TRAINING_STEPS - 1000, test_result[TRAINING_STEPS / 1000 - 1]]
+    plt.annotate('test-' + str(test_note), xy=(test_note[0], test_note[1]),
+                 xytext=(test_note[0] + 1000, test_note[1] - 0.07), arrowprops=dict(facecolor='black', shrink=0.05))
+    plt.grid(True)
+    plt.plot(iteration_count, test_result, linestyle='-.', marker='X', label='test data')
+    plt.legend(loc="upper left")
+    try:
+        os.mkdir('images/')
+    except:
+        print("directory already exist")
+    plt.savefig('images/%s.png' % filename, format='png')
+
+
+def main(argv=None):
+    mnist = input_data.read_data_sets("../MNIST_data", one_hot=True)
+    print("start")
+    train(mnist, True)
+
+
+if __name__ == '__main__':
+    tf.app.run()

+ 3 - 0
img_proc/model/checkpoint

@@ -0,0 +1,3 @@
+model_checkpoint_path: "model.ckpt-55000"
+all_model_checkpoint_paths: "model.ckpt-5500"
+all_model_checkpoint_paths: "model.ckpt-55000"

BIN
img_proc/model/model.ckpt-5500.data-00000-of-00001


BIN
img_proc/model/model.ckpt-5500.index


BIN
img_proc/model/model.ckpt-5500.meta


BIN
img_proc/model/model.ckpt-55000.data-00000-of-00001


BIN
img_proc/model/model.ckpt-55000.index


BIN
img_proc/model/model.ckpt-55000.meta


+ 465 - 1
img_proc/multiThread.py

@@ -2,6 +2,10 @@ import tensorflow as tf
 import numpy as np
 import threading
 import time
+import os
+import preprocessing
+import mnist_inference
+import matplotlib.pyplot as plt
 
 
 # ********** queue operation ***********
@@ -68,5 +72,465 @@ def threads_mgmt():
     coord.join(threads)
 
 
+def _int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+# simulate big data situation
+def generate_files():
+    # how many files to write
+    num_shard = 3
+    # how much data in a file
+    instances_per_shard = 6
+    record_path = "record/"
+    try:
+        os.mkdir(record_path)
+    except:
+        print("directory already exist")
+
+    # data 0000n-of-0000m, n means file No., m means how many files the data has been stored as
+    for i in range(num_shard):
+
+        filename = (os.path.join(record_path, "data.tfrecords-%.5d-of-%.5d" % (i, num_shard)))
+        writer = tf.python_io.TFRecordWriter(filename)
+        for j in range(instances_per_shard):
+            example = tf.train.Example(features=tf.train.Features(feature={
+                'i': _int64_feature(i),
+                'j': _int64_feature(j)
+            }))
+            writer.write(example.SerializeToString())
+        writer.close()
+
+
+def read_files():
+    # 获取文件列表
+    record_path = "record/"
+    files = tf.train.match_filenames_once(os.path.join(record_path, "data.tfrecords-*"))
+
+    # 1 epochs means 1 cycle
+    filename_queue = tf.train.string_input_producer(files, num_epochs=1, shuffle=True)
+
+    reader = tf.TFRecordReader()
+    _, serialized_example = reader.read(filename_queue)
+    features = tf.parse_single_example(
+        serialized_example,
+        features={
+            'i': tf.FixedLenFeature([], tf.int64),
+            'j': tf.FixedLenFeature([], tf.int64),
+        }
+    )
+
+    with tf.Session() as sess:
+        # match_filename_once() needs to be initialized
+        tf.local_variables_initializer().run()
+        print(sess.run(files))
+
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+        for i in range(18):
+            print(sess.run([features['i'], features['j']]))
+        coord.request_stop()
+        coord.join(threads)
+
+    return features
+
+
+def batch_example():
+    features = read_files()
+
+    print("____ end of read files _____")
+
+    example, label = features['i'], features['j']
+    batch_size = 3
+    # queue capacity, larger means more memory usage, smaller means can be blocked and less efficient
+    capacity = 1000 + 3 * batch_size
+    # example_batch, label_batch = tf.train.batch([example, label], batch_size=batch_size, capacity=capacity)
+    # min_after_dequeue represent the num of data needed for dequeue operation which is blocked when the num inadequate
+    example_batch, label_batch = tf.train.shuffle_batch([example, label], batch_size=batch_size, capacity=capacity,
+                                                        min_after_dequeue=6)
+
+    with tf.Session() as sess:
+        tf.local_variables_initializer().run()
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+
+        # combine
+        for i in range(6):
+            curr_exp_b, curr_lab_b = sess.run([example_batch, label_batch])
+            print(curr_exp_b, curr_lab_b, "lll")
+
+        coord.request_stop()
+        coord.join(threads)
+
+
+# ************* use inceptionV3 data to generate data for training **************
+def write_record(name, image, label):
+    writer = tf.python_io.TFRecordWriter(name)
+    for index in range(len(image)):
+        # convert img to str
+        image_raw = image[index].tobytes()
+        print(label[index])
+        print(image[index].shape[0])
+        print(image[index].shape[1])
+        print(image[index].shape[2])
+        # create Example Protocol Buffer
+        example = tf.train.Example(features=tf.train.Features(feature={
+            'image': _bytes_feature(image_raw),
+            'label': _int64_feature(label[index]),
+            'height': _int64_feature(image[index].shape[0]),
+            'width': _int64_feature(image[index].shape[1]),
+            'channels': _int64_feature(image[index].shape[2]),
+        }))
+        writer.write(example.SerializeToString())
+    writer.close()
+
+
+def generate_record(output_filename="output_flower.tfrecords"):
+    input_data = "../inceptionv3/preprocess/validation_flower.npy"
+    processed_data = np.load(input_data, allow_pickle=True)
+    training_images = processed_data[0]
+    training_labels = processed_data[1]
+
+    input_data = "../inceptionv3/preprocess/test_flower.npy"
+    processed_data = np.load(input_data, allow_pickle=True)
+    validation_images = processed_data[0]
+    validation_labels = processed_data[1]
+
+    write_record("output_flower_train.tfrecord", training_images, training_labels)
+    write_record("output_flower_validation.tfrecord", validation_images, validation_labels)
+
+    print("training_images: " + str(len(training_labels)))
+    print("validation_images: " + str(len(validation_labels)))
+
+
+def read_record(file_regex="record/output_flower_*.tfrecord"):
+    files = tf.train.match_filenames_once(file_regex)
+    filename_queue = tf.train.string_input_producer(files, shuffle=False)
+
+    reader = tf.TFRecordReader()
+    _, serialized_example = reader.read(filename_queue)
+    features = tf.parse_single_example(
+        serialized_example,
+        features={
+            'image': tf.FixedLenFeature([], tf.string),
+            'label': tf.FixedLenFeature([], tf.int64),
+            'height': tf.FixedLenFeature([], tf.int64),
+            'width': tf.FixedLenFeature([], tf.int64),
+            'channels': tf.FixedLenFeature([], tf.int64)
+        })
+
+    image, label = features['image'], tf.cast(features['label'], tf.int32)
+    height, width = tf.cast(features['height'], tf.int32), tf.cast(features['width'], tf.int32)
+    channels = tf.cast(features['channels'], tf.int32)
+
+    # image decoding
+    decoded_img = tf.decode_raw(image, tf.float32)
+    # decoded_img.set_shape(268203)
+    decoded_img = tf.reshape(decoded_img,
+                             shape=[height, width, channels])
+    return decoded_img, label
+
+
+def tfrecord_parser(record):
+    features = tf.parse_single_example(
+        record,
+        features={
+            'image': tf.FixedLenFeature([], tf.string),
+            'label': tf.FixedLenFeature([], tf.int64),
+            'height': tf.FixedLenFeature([], tf.int64),
+            'width': tf.FixedLenFeature([], tf.int64),
+            'channels': tf.FixedLenFeature([], tf.int64)
+        })
+    image, label = features['image'], tf.cast(features['label'], tf.int32)
+    height, width = tf.cast(features['height'], tf.int32), tf.cast(features['width'], tf.int32)
+    channels = tf.cast(features['channels'], tf.int32)
+
+    # image decoding
+    decoded_img = tf.decode_raw(image, tf.uint8)
+    # decoded_img.set_shape(268203)
+    # decoded_img.set_shape([height, width, channels])
+    decoded_img = tf.reshape(decoded_img,
+                             shape=[height, width, channels])
+    return decoded_img, label
+
+
+# ** wrong image dtype may cause " Input to reshape is a tensor with xxx values, but the requested shape has xxx "
+# such as uint8 and float32, float32 is usually used for training, whereas uint8 more likely used for image storage
+# ** must have channel 3 but has channels 1 problem is caused by image preprocessing
+def process_data(doTrain=True):
+    image_size = 28
+    num_channels = 1
+    num_of_labels = 10
+    min_after_dequeue = 2000
+    shuffle_buffer = 10000
+    num_epochs = 50  # same effect as training_rounds
+    batch_size = 500
+    training_rounds = 5000
+    training_images = 55000  # 362
+    validation_images = 5000  # 367
+    test_images = 10000
+    train_files = tf.train.match_filenames_once("record/mnist_train.tfrecord")
+    validation_files = tf.train.match_filenames_once("record/mnist_validation.tfrecord")
+    test_files = tf.train.match_filenames_once("record/mnist_test.tfrecord")
+
+    # ********** define neural network structure and forward propagation **********
+    learning_rate_base = 0.8
+    learning_rate_decay = 0.99
+    regularization_rate = 0.0001
+    moving_average_decay = 0.99
+    x = tf.placeholder(tf.float32, [None,
+                                    image_size,
+                                    image_size,
+                                    num_channels], name='x-input')
+    y_ = tf.placeholder(tf.float32, [None], name='y-input')
+    regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
+    y = mnist_inference.inference(x, True, regularizer)
+
+    global_step = tf.Variable(0, trainable=False)
+
+    # moving average, cross entropy, loss function with regularization and learning rate
+    variable_average = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
+    variable_average_op = variable_average.apply(tf.trainable_variables())
+    # calc loss
+    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.cast(y_, tf.int32))
+    cross_entropy_mean = tf.reduce_mean(cross_entropy)
+    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
+    learning_rate = tf.train.exponential_decay(
+        learning_rate_base,
+        global_step,
+        training_images / batch_size,
+        learning_rate_decay
+    )
+
+    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
+    with tf.control_dependencies([train_step, variable_average_op]):
+        train_op = tf.no_op(name='train')
+
+    # define accuracy
+    prediction = tf.argmax(y, 1)
+    answer = tf.cast(y_, tf.int64)
+    correct_prediction = tf.equal(tf.argmax(y, 1), tf.cast(y_, tf.int64))
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    # test_result = list(range(int(training_rounds / 500)))
+
+    # # ********** original tfrecord data operator **********
+    # decoded_img, label = read_record("record/mnist_train.tfrecord")
+    # # img preprocessing
+    # # distorted_img = tf.image.resize_images(decoded_img, [image_size, image_size], method=0)
+    # distorted_img = preprocessing.process_for_train(decoded_img, image_size, image_size, None, 1)
+    # distorted_img.set_shape([image_size, image_size, num_channels])
+    # # print(distorted_img.shape)
+    #
+    # # create batch
+    # total_sample = training_images + validation_images
+    # capacity = min_after_dequeue + batch_size * 3
+    # image_batch, label_batch = tf.train.shuffle_batch([distorted_img, label], batch_size=batch_size,
+    #                                               capacity=capacity, num_threads=64,
+    #                                               min_after_dequeue=min_after_dequeue)
+
+    # ********** tfrecord dataset **********
+    dataset = tf.data.TFRecordDataset(train_files)
+    dataset = dataset.map(tfrecord_parser)
+    dataset = dataset.map(
+        lambda image, label: (
+            preprocessing.process_for_train(tf.image.convert_image_dtype(image, dtype=tf.float32), image_size,
+                                            image_size, None, 1), label
+        # tf.image.resize_images(tf.image.convert_image_dtype(image, dtype=tf.float32), [image_size, image_size]), label
+        ))
+    dataset = dataset.shuffle(shuffle_buffer).batch(batch_size)
+    dataset = dataset.repeat(num_epochs)
+    # match_filename_once has similar mechanism as placeholder
+    iterator = dataset.make_initializable_iterator()
+    image_batch, label_batch = iterator.get_next()
+
+    # ********** validation dataset **********
+    validation_dataset = tf.data.TFRecordDataset(validation_files)
+    validation_dataset = validation_dataset.map(tfrecord_parser).map(
+        lambda image, label: (
+            tf.image.resize_images(tf.image.convert_image_dtype(image, dtype=tf.float32), [image_size, image_size]),
+            label
+        ))
+    validation_dataset = validation_dataset.batch(validation_images)
+    validation_dataset = validation_dataset.repeat(None)
+    validation_iterator = validation_dataset.make_initializable_iterator()
+    validation_image_batch, validation_label_batch = validation_iterator.get_next()
+
+    # ********** test dataset **********
+    test_dataset = tf.data.TFRecordDataset(test_files)
+    test_dataset = test_dataset.map(tfrecord_parser).map(
+        lambda image, label: (
+            tf.image.resize_images(tf.image.convert_image_dtype(image, dtype=tf.float32), [image_size, image_size]),
+            label
+        ))
+    test_dataset = test_dataset.batch(test_images)
+    test_iterator = test_dataset.make_initializable_iterator()
+    test_image_batch, test_label_batch = test_iterator.get_next()
+
+    # logit = inference(image_batch)
+    # loss = calc_loss(logit, label_batch)
+    # train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
+
+    # initialize persistence class
+    saver = tf.train.Saver()
+    config = tf.ConfigProto(allow_soft_placement=True)
+    config.gpu_options.allow_growth = True
+    with tf.Session(config=config) as sess:
+        sess.run(tf.global_variables_initializer())
+        sess.run(tf.local_variables_initializer())
+
+        # print(sess.run(tf.cast(features['label'], tf.int32)))
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+        print("start training........")
+        # for i in range(training_rounds):
+        i = 0
+        step = 0
+        if doTrain:
+            sess.run(iterator.initializer)
+            sess.run(validation_iterator.initializer)
+            while True:
+                i += 1
+                try:
+                    # img = sess.run(distorted_img)
+                    # plt.imshow(img)
+                    # plt.show()
+
+                    xs, ys = sess.run([image_batch, label_batch])
+                    # print(xs.shape)
+                    # print(ys.shape)
+                    _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
+
+                    if i % 200 == 0:
+                        vxs, vys = sess.run([validation_image_batch, validation_label_batch])
+                        p, a, accuracy_score = sess.run([prediction, answer, accuracy], feed_dict={x: vxs, y_: vys})
+                        print("prediction: \t%s, \nanswer: \t\t%s" % (p[0:10], a[0:10]))
+                        print("after %d steps, loss: %.3f, accuracy: %.3f" % (step, loss_value, accuracy_score))
+                except tf.errors.OutOfRangeError:
+                    # i = step
+                    break
+            sess.run(test_iterator.initializer)
+            tp = []
+            ta = []
+            while True:
+                try:
+                    txs, tys = sess.run([test_image_batch, test_label_batch])
+                    p, a = sess.run([prediction, answer], feed_dict={x: txs, y_: tys})
+                    tp.extend(p)
+                    ta.extend(a)
+                except tf.errors.OutOfRangeError:
+                    break
+
+            correct = [float(y == y_) for (y, y_) in zip(tp, ta)]
+            accuracy_score = sum(correct) / len(correct)
+            print("in total %d steps, total accuracy: %.3f" % (i, accuracy_score))
+            try:
+                os.mkdir("model/")
+            except:
+                print("directory already exist")
+            saver.save(
+                sess, os.path.join("model/", "model.ckpt"), global_step=global_step
+            )
+
+        else:
+
+            ckpt = tf.train.get_checkpoint_state("model/")
+            if ckpt and ckpt.model_checkpoint_path:
+                sess.run(test_iterator.initializer)
+                saver.restore(sess, ckpt.model_checkpoint_path)
+                start = np.random.randint(int(test_images/3), int(test_images/2))
+                length = 10
+                txs, tys = sess.run([test_image_batch, test_label_batch])
+                p, a = sess.run([prediction, answer], feed_dict={x: txs[start:start+length], y_: tys[start:start+length]})
+                print("prediction: \t%s, \nanswer: \t\t%s" % (p, a))
+
+            else:
+                print("model not exist")
+        coord.request_stop()
+        coord.join(threads)
+
+
+# ************* dataset operation **************
+def parser(record):
+    features = tf.parse_single_example(
+        record,
+        features={
+            'feat1': tf.FixedLenFeature([], tf.int64),
+            'feat2': tf.FixedLenFeature([], tf.int64),
+        })
+    return features['feat1'], features['feat2']
+
+
+def dataset_basic_test():
+    # 从tensor构建数据集
+    input_data = [1, 2, 3, 5, 8]
+    dataset = tf.data.Dataset.from_tensor_slices(input_data)
+    # traverse dataset
+    iterator = dataset.make_one_shot_iterator()
+    x = iterator.get_next()
+    y = x * x
+
+    # 从文本构建数据集
+    # input_files = ["file1", "file2"]
+    # dataset = tf.data.TextLineDataset(input_files)
+
+    # 从tfrecord构建数据集
+    input_files = ["file1", "file2"]
+    dataset = tf.data.TFRecordDataset(input_files)
+    # call parser and replace each element with returned value
+    dataset = dataset.map(parser)
+    # make_one_shot_iterator 所有参数必须确定, 使用placeholder需使用initializable_iterator
+    # reinitializable_iterator, initialize multiple times for different data source
+    # feedable_iterator, use feed_dict to assign iterators to run
+    iterator = dataset.make_one_shot_iterator()
+    feat1, feat2 = iterator.get_next()
+
+    with tf.Session() as sess:
+        # for i in range(len(input_data)):
+        #     print(sess.run(y))
+
+        for i in range(10):
+            f1, f2 = sess.run([feat1, feat2])
+
+    # 从tfrecord构建数据集, placeholder
+    input_files = tf.placeholder(tf.string)
+    dataset = tf.data.TFRecordDataset(input_files)
+    dataset = dataset.map(parser)
+    iterator = dataset.make_initializable_iterator()
+    feat1, feat2 = iterator.get_next()
+
+    with tf.Session() as sess:
+        sess.run(iterator.initializer, feed_dict={
+            input_files: ["file1", "file2"]
+        })
+        while True:
+            try:
+                sess.run([feat1, feat2])
+            except tf.errors.OutOfRangeError:
+                break
+
+    # dataset high level API
+    image_size = 299
+    buffer_size = 1000  # min_after_dequeue
+    batch_size = 100
+    N = 10  # num_epoch
+    dataset = dataset.map(
+        lambda x: preprocessing.process_for_train(x, image_size, image_size, None)
+    )
+    dataset = dataset.shuffle(buffer_size=buffer_size)
+    dataset = dataset.batch(batch_size=batch_size)
+    dataset = dataset.repeat(N)
+
+
 if __name__ == '__main__':
-    threads_mgmt()
+    # threads_mgmt()
+    # generate_files()
+    # read_files()
+    # batch_example()
+    # process_data()
+    # generate_record()
+    process_data(doTrain=False)
+    # dataset_basic_test()

BIN
img_proc/output_mnist.tfrecords


+ 7 - 3
img_proc/preprocessing.py

@@ -101,7 +101,7 @@ def distort_color(image, color_ordering=0):
     return tf.clip_by_value(image, 0.0, 1.0)
 
 
-def process_for_train(image, height, width, bbox):
+def process_for_train(image, height, width, bbox, channels=3):
     if bbox is None:
         bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
 
@@ -119,7 +119,11 @@ def process_for_train(image, height, width, bbox):
     # filp img
     distorted_img = tf.image.random_flip_left_right(distorted_img)
     distorted_img = tf.image.random_flip_up_down(distorted_img)
-    distorted_img = distort_color(distorted_img, np.random.randint(3))
+    if channels == 3:
+        distorted_img = distort_color(distorted_img, np.random.randint(3))
+    # distorted_img = tf.image.convert_image_dtype(distorted_img, dtype=tf.uint8)
+    # print(distorted_img.shape)
+    distorted_img.set_shape([height, width, channels])
     return distorted_img
 
 
@@ -132,7 +136,7 @@ def main():
 
         for i in range(6):
             plt.figure(i)
-            result = process_for_train(img_data, 500,300,boxes)
+            result = process_for_train(img_data, 500, 300, boxes)
             plt.imshow(result.eval())
 
         plt.show()

+ 0 - 0
img_proc/record/__init__.py


BIN
img_proc/record/data.tfrecords-00000-of-00003


BIN
img_proc/record/data.tfrecords-00001-of-00003


BIN
img_proc/record/data.tfrecords-00002-of-00003


+ 32 - 9
img_proc/tfRecordExample.py

@@ -18,23 +18,44 @@ def _bytes_feature(value):
     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 
 
-def save_mnist_record(output_filename="output_mnist.tfrecords"):
+def save_mnist_record(dataset=0, output_filename="record/output_mnist.tfrecords"):
     mnist = input_data.read_data_sets("../MNIST_data", dtype=tf.uint8, one_hot=True)
-    images = mnist.train.images
-    labels = mnist.train.labels
+    images = []
+    labels = []
+    num_examples=0
+    if dataset == 0:
+        images = mnist.train.images
+        labels = mnist.train.labels
+        num_examples = mnist.train.num_examples
+    elif dataset == 1:
+        images = mnist.validation.images
+        labels = mnist.validation.labels
+        num_examples = mnist.validation.num_examples
+    elif dataset == 2:
+        images = mnist.test.images
+        labels = mnist.test.labels
+        num_examples = mnist.test.num_examples
+    print(num_examples)
     # define resolution
-    pixels = images.shape[1]
-    num_examples = mnist.train.num_examples
+    # pixels = images.shape[1]
+    # print(images[0].shape)
 
     writer = tf.python_io.TFRecordWriter(output_filename)
     for index in range(num_examples):
         # convert img to str
         image_raw = images[index].tostring()
         # create Example Protocol Buffer
+        # example = tf.train.Example(features=tf.train.Features(feature={
+        #     'pixels': _int64_feature(pixels),
+        #     'label': _int64_feature(np.argmax(labels[index])),
+        #     'image_raw': _bytes_feature(image_raw)
+        # }))
         example = tf.train.Example(features=tf.train.Features(feature={
-            'pixels': _int64_feature(pixels),
+            'image': _bytes_feature(image_raw),
             'label': _int64_feature(np.argmax(labels[index])),
-            'image_raw': _bytes_feature(image_raw)
+            'height': _int64_feature(28),
+            'width': _int64_feature(28),
+            'channels': _int64_feature(1),
         }))
         writer.write(example.SerializeToString())
     writer.close()
@@ -74,8 +95,10 @@ def read_mnist_record(input_filename="output_mnist.tfrecords"):
 
 
 def main():
-    # save_mnist_record()
-    read_mnist_record()
+    save_mnist_record(0, "record/mnist_train.tfrecord")
+    save_mnist_record(1, "record/mnist_validation.tfrecord")
+    save_mnist_record(2, "record/mnist_test.tfrecord")
+    # read_mnist_record()
 
 
 if __name__ == '__main__':