Tensorflow 1.9.0

1.
Building Tensorflow

Building Tensorflow allows use of CPU enhancements like SSE. GCC must be ≤ 5 for CUDA, and ≤ 5.4 for Tensorflow w/ AVX—4.8 is what's available, so 4.8 is what we get. To get the Nvidia CUDA libraries we must set the environment variable NEXTJOURNAL_MOUNT_CUDA in the runtime configuration.

apt-get update >/dev/null
apt-get install -y \
  pkg-config zlib1g-dev git zip xutils-dev gnupg \
  gcc-4.8 g++-4.8

update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 25
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 25
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50

ldconfig

Install Bazel. This is very persnickity and likes to break compatibility with new versions quite often.

export BAZEL_VERSION=0.14.1 # 0.11.1 worked for 1.8.0-rc1
export BAZEL_FILE=bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh
wget --progress=dot:giga \
  https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/$BAZEL_FILE
chmod +x $BAZEL_FILE
./$BAZEL_FILE

Clone the source and switch to the right version, before a change that interfered with compiling under nvcc.

git clone https://github.com/tensorflow/tensorflow
cd tensorflow
git checkout v1.9.0

This configure script uses environment variables to do a non-interactive config.

15.7s
cd /tensorflow

export TF_ROOT="/opt/tensorflow"

export PYTHON_BIN_PATH="/opt/conda/bin/python"
export PYTHON_LIB_PATH="$($PYTHON_BIN_PATH -c 'import site; print(site.getsitepackages()[0])')"
export PYTHONPATH=${TF_ROOT}/lib
export PYTHON_ARG=${TF_ROOT}/lib
export CUDA_TOOLKIT_PATH=/usr/local/cuda
export CUDNN_INSTALL_PATH=/usr/local/cuda

export TF_NEED_GCP=1
export TF_NEED_S3=0
export TF_NEED_KAFKA=0
export TF_NEED_GDR=0
export TF_NEED_CUDA=1
export TF_CUDA_VERSION="$($CUDA_TOOLKIT_PATH/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p')"
export TF_CUDA_COMPUTE_CAPABILITIES=6.1,5.2,3.5
export TF_NEED_HDFS=0
export TF_NEED_OPENCL=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_JEMALLOC=1
export TF_ENABLE_XLA=0
export TF_NEED_VERBS=0
export TF_CUDA_CLANG=0
export TF_CUDNN_VERSION="$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn.h)"
export TF_NEED_MKL=0
export TF_DOWNLOAD_MKL=0
export TF_NEED_MPI=0
export TF_NEED_TENSORRT=0
export TF_NCCL_VERSION=1.3.5
export TF_SET_ANDROID_WORKSPACE=0

export GCC_HOST_COMPILER_PATH=$(which gcc)
export CC_OPT_FLAGS="-march=corei7"

./configure

Finally, the build—this takes over two hours.

export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/nvidia/lib64"
export CUDNN_INCLUDE_DIR="/usr/local/cuda/include"
export CUDNN_LIBRARY="/usr/local/cuda/lib64/libcudnn.so"

cd /tensorflow
bazel build --config=opt --config=cuda --verbose_failures --jobs 5 \
  --action_env="LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
  --action_env="CUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}" \
  --action_env="CUDNN_LIBRARY=${CUDNN_LIBRARY}" \
  //tensorflow/tools/pip_package:build_pip_package 

We'll export this environment just in case anyone wants to play with the compiled result, but the important part here is the creation of a .whl wheel file which can be installed via pip.

cd /tensorflow
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
cp /tmp/tensorflow_pkg/tensorflow*.whl /results/
tensorflow-1.9.0-cp36-cp36m-linux_x86_64.whl

Finally, we'll install the package we created in a clean environment.

# preemptively install deps
conda install -c anaconda -c intel \
  absl-py astor gast grpcio markdown protobuf termcolor werkzeug \
  cython

# pip needs a specific filename format
TF_FILE="tensorflow-1.9.0-cp36-cp36m-linux_x86_64.whl"
cp tensorflow-1.9.0-cp36-cp36m-linux_x86_64.whl \
  $TF_FILE
pip install $TF_FILE
rm $TF_FILE

ldconfig

2.
Use Case

We'll follow the deep convolutional generative adversarial networks (DCGAN) example by Aymeric Damien, from the Tensorflow Examples project, to generate digit images from a noise distribution.

Reference paper: Unsupervised representation learning with deep convolutional generative adversarial networks. A Radford, L Metz, S Chintala. arXiv:1511.06434.

First, parameters.

# Training Params
num_steps = 5000
batch_size = 32

# Network Params
image_dim = 784 # 28*28 pixels * 1 channel
gen_hidden_dim = 256
disc_hidden_dim = 256
noise_dim = 200 # Noise data points

Define networks.

0.1s
# Generator Network
# Input: Noise, Output: Image
def generator(x, reuse=False):
    with tf.variable_scope('Generator', reuse=reuse):
        # TensorFlow Layers automatically create variables and calculate their
        # shape, based on the input.
        x = tf.layers.dense(x, units=6 * 6 * 128)
        x = tf.nn.tanh(x)
        # Reshape to a 4-D array of images: (batch, height, width, channels)
        # New shape: (batch, 6, 6, 128)
        x = tf.reshape(x, shape=[-1, 6, 6, 128])
        # Deconvolution, image shape: (batch, 14, 14, 64)
        x = tf.layers.conv2d_transpose(x, 64, 4, strides=2)
        # Deconvolution, image shape: (batch, 28, 28, 1)
        x = tf.layers.conv2d_transpose(x, 1, 2, strides=2)
        # Apply sigmoid to clip values between 0 and 1
        x = tf.nn.sigmoid(x)
        return x

# Discriminator Network
# Input: Image, Output: Prediction Real/Fake Image
def discriminator(x, reuse=False):
    with tf.variable_scope('Discriminator', reuse=reuse):
        # Typical convolutional neural network to classify images.
        x = tf.layers.conv2d(x, 64, 5)
        x = tf.nn.tanh(x)
        x = tf.layers.average_pooling2d(x, 2, 2)
        x = tf.layers.conv2d(x, 128, 5)
        x = tf.nn.tanh(x)
        x = tf.layers.average_pooling2d(x, 2, 2)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024)
        x = tf.nn.tanh(x)
        # Output 2 classes: Real and Fake images
        x = tf.layers.dense(x, 2)
    return x

Network setup.

8.5s
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# Import MNIST data (http://yann.lecun.com/exdb/mnist/)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Build Networks
# Network Inputs
noise_input = tf.placeholder(tf.float32, shape=[None, noise_dim])
real_image_input = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])

# Build Generator Network
gen_sample = generator(noise_input)

# Build 2 Discriminator Networks (one from noise input, one from generated samples)
disc_real = discriminator(real_image_input)
disc_fake = discriminator(gen_sample, reuse=True)
disc_concat = tf.concat([disc_real, disc_fake], axis=0)

# Build the stacked generator/discriminator
stacked_gan = discriminator(gen_sample, reuse=True)

# Build Targets (real or fake images)
disc_target = tf.placeholder(tf.int32, shape=[None])
gen_target = tf.placeholder(tf.int32, shape=[None])

# Build Loss
disc_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=disc_concat, labels=disc_target))
gen_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=stacked_gan, labels=gen_target))

# Build Optimizers
optimizer_gen = tf.train.AdamOptimizer(learning_rate=0.001)
optimizer_disc = tf.train.AdamOptimizer(learning_rate=0.001)

# Training Variables for each optimizer
# By default in TensorFlow, all variables are updated by each optimizer, so we
# need to precise for each one of them the specific variables to update.
# Generator Network Variables
gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
# Discriminator Network Variables
disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')

# Create training operations
train_gen = optimizer_gen.minimize(gen_loss, var_list=gen_vars)
train_disc = optimizer_disc.minimize(disc_loss, var_list=disc_vars)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

Finally, training.

82.3s
# Start training
sess = tf.Session()

# Run the initializer
sess.run(init)

for step in range(1, num_steps+1):

	# Prepare Input Data
	# Get the next batch of MNIST data (only images are needed, not labels)
	batch_x, _ = mnist.train.next_batch(batch_size)
	batch_x = np.reshape(batch_x, newshape=[-1, 28, 28, 1])
	# Generate noise to feed to the generator
	z = np.random.uniform(-1., 1., size=[batch_size, noise_dim])

	# Prepare Targets (Real image: 1, Fake image: 0)
	# The first half of data fed to the generator are real images,
	# the other half are fake images (coming from the generator).
	batch_disc_y = np.concatenate(
		[np.ones([batch_size]), np.zeros([batch_size])], axis=0)
	# Generator tries to fool the discriminator, thus targets are 1.
	batch_gen_y = np.ones([batch_size])

	# Training
	feed_dict = {real_image_input: batch_x, noise_input: z,
				 disc_target: batch_disc_y, gen_target: batch_gen_y}
	_, _, gl, dl = sess.run([train_gen, train_disc, gen_loss, disc_loss],
							feed_dict=feed_dict)
	if step % 1000 == 0 or step == 1:
		print('Step %i: Generator Loss: %f, Discriminator Loss: %f' % (step, gl, dl))
		
		# Generate images from noise, using the generator network.
		f, a = plt.subplots(4, 10, figsize=(10, 4))
		for i in range(10):
			# Noise input.
			z = np.random.uniform(-1., 1., size=[4, noise_dim])
			g = sess.run(gen_sample, feed_dict={noise_input: z})
			for j in range(4):
				# Generate image from noise. Extend to 3 channels for matplot figure.
				img = np.reshape(np.repeat(g[j][:, :, np.newaxis], 3, axis=2),
					newshape=(28, 28, 3))
				a[j][i].imshow(img)
				
		#f.show()
		plt.suptitle("Step {}".format(step))
		plt.savefig("/results/step-{}.svg".format(step))
		plt.close()