yct
/
LearnTF


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
							# -*- utf-8 -*-

import glob
import os.path
import tensorflow as tf
import numpy as np
from tensorflow.python.platform import gfile

INPUT_DATA = "../../dataset/flower_photos"

OUTPUT_FILE = "preprocess/flower_processed_data.npy"

# test and validation ratio
VALIDATION_PERCENTAGE = 10
TEST_PERCENTAGE = 10


def create_image_lists(sess, testing_percentage, validation_percentage):
    # '../../dataset/flower_photos', '../../dataset/flower_photos/daisy', '../../dataset/flower_photos/tulips',
    # '../../dataset/flower_photos/dandelion', '../../dataset/flower_photos/sunflowers',
    # '../../dataset/flower_photos/roses']
    subdirs = [x[0] for x in os.walk(INPUT_DATA)]
    # print(subdirs)
    is_root_dir = True

    count = 0
    # init datasets
    training_images = []
    training_labels = []
    testing_images = []
    testing_labels = []
    validation_images = []
    validation_labels = []
    current_label = 0

    # read all subdirs
    for sub_dir in subdirs:
        if is_root_dir:
            is_root_dir = False
            continue

        extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
        file_list = []
        dir_name = os.path.basename(sub_dir)
        # print(dir_name)
        for extension in extensions:
            # find all images in sub_dir
            file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extension)
            file_list.extend(glob.glob(file_glob))
            if not file_list:
                continue

        # deal with images
        for file_name in file_list:
            print(str(current_label) + file_name + "\t\t" + str(count))
            count += 1
            image_raw_data = gfile.FastGFile(file_name, 'rb').read()
            image = tf.image.decode_jpeg(image_raw_data)
            if image.dtype != tf.float32:
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
            image = tf.image.resize_images(image, [299, 299])
            image_value = sess.run(image)

            # split dataset randomly
            chance = np.random.randint(100)
            if chance < validation_percentage:
                validation_images.append(image_value)
                validation_labels.append(current_label)
            elif chance < (validation_percentage + testing_percentage):
                testing_images.append(image_value)
                testing_labels.append(current_label)
            else:
                training_images.append(image_value)
                training_labels.append(current_label)
        current_label += 1

    state = np.random.get_state()
    np.random.shuffle(training_images)
    np.random.set_state(state)
    np.random.shuffle(training_labels)
    return np.asarray(
        [training_images, training_labels, validation_images, validation_labels, testing_images, testing_labels])


def main():
    with tf.Session() as sess:
        processed_data = create_image_lists(sess, TEST_PERCENTAGE, VALIDATION_PERCENTAGE)

        np.save(OUTPUT_FILE, processed_data)


if __name__ == '__main__':
    main()

    # a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    # b = [9, 8, 7, 6, 5, 4, 3, 2, 1]
    # state = np.random.get_state()
    # np.random.shuffle(a)
    # np.random.set_state(state)
    # np.random.shuffle(b)
    # print(a)
    # print(b)