official tutorial
https://www.tensorflow.org/tutorials/
Learning notebook of Udacity's deep learning course
Install Tensorflow https://www.tensorflow.org/install/
Implementation
* scikit-learn implementation
http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
from sklearn.metrics import accuracy_score, classification_report
%matplotlib inline
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
plt.figure(figsize=(10,10))
print (train_labels[:12])
for i in range(12):
plt.subplot(1,12,i+1)
plt.imshow(train_dataset[i], cmap='gray')
plt.axis('off')
Reformat into a shape that's more adapted to the models we're going to train:
valid_labels
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
labels = pd.get_dummies(labels).values.astype(np.float32)
# labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
plt.figure(figsize=(10,10))
plt.imshow(plt.imread('2-1.jpg'))
graph = tf.Graph()
The difference is that with tf.Variable you have to provide an initial value when you declare it.
### Variables.
Training computation.
loss function
cross-entropy across all training examples ### tensorflow functions
tf.nn.softmax_cross_entropy_with_logits(labels = , logits = )
logits
and labels
.tf.nn.softmax(logits)
Graph
the default graph.¶with graph.as_default():
pass
batch_size = 128
beta = 0.001
with graph.as_default():
# Input data.
# For the training data, we use a placeholder that will be fed.
tf_train_dataset = tf.placeholder(tf.float32, shape = (None, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape = (None, num_labels))
# valid dataset
# Load the validation data into constants that are attached to the graph.
tf_valid_dataset = tf.constant(valid_dataset)
# Load the test data into constants that are attached to the graph.
tf_test_dataset = tf.constant(test_dataset)
# Variables.
# These are the parameters that we are going to be training.
# The weight matrix will be initialized using random values following a (truncated) normal distribution.
# The biases get initialized to zero.
weights = tf.Variable(tf.truncated_normal(shape = [image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros(shape = [num_labels]))
# Training computation.
# We multiply the inputs with the weight matrix, and add biases.
# We compute the softmax and cross-entropy
# (it's one operation in TensorFlow, because it's very common, and it can be optimized).
# We take the average of this cross-entropy across all training examples: that's our loss.
# tf.matul(t1, t2): matrix multiply
# add dropout
def kernel(in_put):
# global weights, biases
out_put = tf.matmul(in_put, weights) + tf.nn.dropout(biases, 1)
return out_put
logits = kernel(tf_train_dataset)
# tf.reduce_mean(t): Computes the mean of elements across dimensions of a tensor.
# tf.nn.softmax_cross_entropy_with_logits(labels = , logits = ): Computes softmax cross entropy between logits and labels.
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits( labels = tf_train_labels, logits = logits )
+ beta*(tf.nn.l2_loss(weights)+tf.nn.l2_loss(biases))
)
# Optimizer.
# We are going to find the minimum of this loss using gradient descent.
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss = loss)
# Predictions for the training, validation, and test data.
# These are not part of training, but merely here so that we can report accuracy figures as we train.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(kernel(tf_valid_dataset))
test_prediction = tf.nn.softmax(kernel(tf_test_dataset))
# def accuracy(predictions, labels):
# return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
# / predictions.shape[0])
# # accuracy_score?
def accuracy(labels, predictions):
from sklearn.metrics import accuracy_score
percent = accuracy_score(
np.argmax(labels,1), np.argmax(predictions,1)
)*100
return percent
Then you can run the operations on this graph as many times as you want by calling session.run(),
providing it outputs to fetch from the graph that get returned.
This runtime operation is all contained in the block below:
with tf.Session(graph=graph) as session:
...
create a Placeholder node which will be fed actual data at every call of session.run().
tf.global_variables_initializer().run()
fetches
.Operation
Tensor
in fetches
, feed_dict
for the corresponding input values.fetches
argument¶Operation
], The corresponding fetched value will be None
.Tensor
], The corresponding fetched value will be a numpy ndarray containing the value of that tensor.SparseTensor
] The corresponding fetched value will be a [SparseTensorValue
], containing the value of that sparse tensor.get_tensor_handle
op. The corresponding fetched value will be a numpy ndarray containing the handle of that tensor.string
which is the name of a tensor or operation in the graph.run()
has the same shape as the fetches
argument,train_dataset.shape[0]
batch_size = 128
num_steps = 10001
for i in range(num_steps):
c = ( i * batch_size) % (train_labels.shape[0] - batch_size)
if i%1000==0:
print (c, end = ',')
print (c)
batch_size = 128
num_steps = 40001
from time import time
start = time()
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data,
tf_train_labels : batch_labels}
_, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 5000 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(batch_labels, predictions))
print("Validation accuracy: %.1f%%" % accuracy(valid_labels, valid_prediction.eval()))
pred, real = test_prediction.eval(), test_labels
print("Test accuracy: %.1f%%" % accuracy(pred, real))
print ('\ntesting')
print('\ntotal time: %.2f s'%(time()-start))
batch_size = 128
num_steps = 8001
letters = list('ABCDEFGHIJK')
letters
letters[0]
dic = {i: letters[i] for i in range(len(letters))}
print(dic)
plt.figure(figsize=(10,10))
for i in range(12):
plt.subplot(1,12,i+1)
plt.imshow(test_dataset[i].reshape(28,28), cmap='gray')
plt.axis('off')
for i in pd.Series(pred.argmax(1)).map(dic)[:12]:print (i, end=' ')
for i in pd.Series(real.argmax(1)).map(dic)[:12]:print (i, end=' ')
mnist dataset exploration