python - Simple Autoencoder on Tensorflow using pokemon -
i have implemented simple autoencoder close reference to: https://hackernoon.com/how-to-autoencode-your-pok%c3%a9mon-6b0f5c7b7d97 . results different expected (as unable post images now, please check them out in github: https://github.com/notha99y/autoencoder)
does know why model seems not learning?
thanks in advance.
my implementation is:
1) flatten out images. (none,64,64,3) => (none, 12288)
2) mean , variance normalized
3) encoding: 12288 => 1024 => 64 => 4. decoding: 4 => 64 => 1024 => 1228
4) multiply variance , add mean
code:
import numpy np import matplotlib.pyplot plt import os scipy.misc import imread import time sklearn.utils import shuffle import tensorflow tf os.environ['tf_cpp_min_log_level']='2' def getimages(batchsz = 100,random = false): '''access directory holding pokemon , returns images in array of shape (batchsz,width,breath,channels) if random set true, function take randomly selected set of images ''' directory = 'data/pokemon/jpeg/' names = os.listdir(directory) if random == true: np.random.shuffle(names) filenames = [os.path.join(directory,name) name in names if '.jpg' in name] # print('total number of images: {}'.format(len(names))) if batchsz == 'all': # print('taking entire batch size of {}'.format(len(filenames))) labels = [x.replace('.jpg','') x in names] images = np.array([plt.imread(x) x in filenames]) else: # print('taking batch size of {}'.format(batchsz)) labels = [x.replace('.jpg','') x in names[:batchsz]] images = np.array([plt.imread(x) x in filenames[:batchsz]]) # print(images.shape) return images,labels def montage(images,labels,title = none): '''plots montage of first 100 images corresponding labels ''' f, axs = plt.subplots(10, 10, sharex='all', sharey='all') f.suptitle(title) axs = axs.ravel() in range(100): axs[i].imshow(images[i].astype(np.uint8)) axs[i].set_title(labels[i], fontsize = '6') axs[i].axis('off') # plt.show() def meanimage(images): '''takes in set of images , plot mean image ''' mean = np.mean(images,axis =0).astype(np.uint8) plt.figure() plt.imshow(mean) # plt.show() def stdimage(images): '''takes in set of images , plot stdev image ''' stdev = np.std(images,axis=0).astype(np.uint8) plt.figure() plt.imshow(stdev) def preprocess(images, labels, allimages): '''preprocesses images minus-ing mean , diving stdev ''' mean = np.mean(allimages, axis = 0) stdev = np.std(allimages, axis = 0) newimages = ((images - mean)/stdev) return newimages def postprocess(allimages,labels,normimag,plot = false, epoch = none): '''postprocesses images multiplying stdev , adding mean ''' mean = np.mean(allimages,axis = 0) stdev = np.std(allimages,axis = 0) newimages = normimag*stdev + mean # clipped = np.clip(newimages, 0, 255) if plot == true: f, axs = plt.subplots(10, 10, sharex='all', sharey='all') f.suptitle('epoch: {}'.format(epoch)) axs = axs.ravel() in range(100): axs[i].imshow(newimages[i].astype(np.uint8)) axs[i].set_title(labels[i], fontsize = '6') axs[i].axis('off') return newimages def autoencoder(allimages,alllabels): ''' 12288 => 1024 => 64 => 4 => 64 => 1024 => 12288 ''' #variables x = tf.placeholder(tf.float32, shape=[none,12288], name='inputs') #encoding w1 = tf.get_variable(name = 'encoder1', shape=[12288,1024], dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer()) w2 = tf.get_variable(name = 'encoder2', shape=[1024,64], dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer()) w3 = tf.get_variable(name = 'encoder3', shape=[64,4], dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer()) # w1 = tf.variable(tf.truncated_normal(shape=[12288, 1024], stddev=0.1, name='encoder1')) # w2 = tf.variable(tf.truncated_normal(shape=[1024, 64], stddev=0.1, name='encoder2')) # w3 = tf.variable(tf.truncated_normal(shape=[64, 4], stddev=0.1, name='encoder3')) b1 = tf.variable(tf.constant(0.0,shape = [1024],name = 'encoderbias1')) b2 = tf.variable(tf.constant(0.0,shape = [64],name = 'encoderbias2')) b3 = tf.variable(tf.constant(0.0,shape = [4],name = 'encoderbias3')) #decoding w4 = tf.transpose(w3,name='decoder1') w5 = tf.transpose(w2, name='decoder2') w6 = tf.transpose(w1, name='decoder3') b4 = tf.variable(tf.constant(0.0,shape = [64],name = 'decoderbias1')) b5 = tf.variable(tf.constant(0.0,shape = [1024],name = 'decoderbias2')) b6 = tf.variable(tf.constant(0.0,shape = [12288],name = 'decoderbias3')) #operations initop = tf.global_variables_initializer() encode1 = tf.nn.relu(tf.matmul(x,w1) +b1, name = 'reluencode1') #12288 => 1024 encode2 = tf.nn.relu(tf.matmul(encode1,w2) +b2, name = 'reluencode2') #1024 => 64 encode3 = tf.nn.relu(tf.matmul(encode2,w3) +b3, name = 'reluencode3') #64 => 4 # encode1 = tf.matmul(x,w1) +b1 #12288 => 1024 # encode2 = tf.matmul(encode1,w2) +b2 #1024 => 64 # encode3 = tf.matmul(encode2,w3) +b3 #64 => 4 decode1 = tf.nn.relu(tf.add(tf.matmul(encode3,w4) , b4), name = 'reludecode1') #4 => 64 decode2 = tf.nn.relu(tf.add(tf.matmul(decode1,w5) , b5), name = 'reludecode2') #64 => 1024 y = tf.nn.relu(tf.add(tf.matmul(decode2,w6) , b6), name = 'outputs') #1024 => 12288 #checking shape print('shape of w1: ', w1.shape) #training cost = tf.reduce_mean(tf.squared_difference(x,y), name = 'costfunction') train = tf.train.gradientdescentoptimizer(learning_rate = 10e-3).minimize(cost) #session tf.session() sess: tic = time.time() sess.run(initop) # print(type(w1.eval())) assert np.allclose(w1.eval(), tf.transpose(w6).eval()),'w1: {} w6t: {}'.format(w1.eval(), tf.transpose(w6).eval()) err = [] epoch = 3001 firstbatch = getimages(100,true) in range(epoch): images, labels = getimages(100,true) inputimages = preprocess(images,labels,allimages).reshape([-1,12288]) error = cost.eval(feed_dict={x: inputimages}) err.append(error) if i%100 == 0: toc = time.time() print('epoch: ', i, 'error: ',error, 'time elapsed: ', toc-tic ) recon = y.eval(feed_dict = {x:firstbatch[0].reshape([-1,12288])}).reshape([-1,64,64,3]) output = postprocess(allimages,firstbatch[1],recon,true,i) # reconplot = montage(recon,labels,'reconstructed') # postplot = montage(output,labels, 'postprocess') # plt.show() # print('yhat:', yhat.eval(feed_dict = {x:batch[0], y:batch[1], keepprob : 1})) # print('predicted y: ', tf.argmax(yhat,1).eval(feed_dict={x:batch[0], y:batch[1], keepprob : 1}), ' true y: ', tf.argmax(y,1).eval(feed_dict={x:batch[0], y:batch[1], keepprob : 1})) sess.run(train,feed_dict={x: inputimages}) originalplot = montage(firstbatch[0],firstbatch[1],'original') # normplot = montage(preprocess(images,labels,allimages),labels, 'normalized images') plt.show() # savepath = saver.save(sess,'savenettest/mnistcnn.ckpt') toc = time.time() # print('neural net saved in {}. total time elapsed: {}'.format(savepath,toc-tic)) plt.plot(err) plt.xlabel('epoch') plt.ylabel('mean squared error') plt.title('autoencoder') plt.show() # images, labels = getimages(100,true) allimages, alllabels = getimages('all') # testplot1 = montage(images,labels) # test1 = preprocess(images,labels) # testplot2 = montage(test1,labels) # test2 = postprocess(images,labels,test1,true) # plt.show() testautoencoder = autoencoder(allimages,alllabels)
Comments
Post a Comment