"""These are a series of functions used to score models based on how
distant their false positives and false negatives over some data set are
from the model's decision boundary. The only function used externally is
combined_false_score, which discusses how the scoring is done. All other
functions in this file are only used internally by combined_false_score
and the functions that it calls.
"""
import tensorflow as tf
import numpy as np
[docs]
def combined_false_score(model, ds, dist = tf.constant(0.05)):
"""A method to evaluate how "wrong" the model's predictions of the
validation set actually are, based on how far false positives and
false negatives are from the decision boundary. Returns two sets
of information for the false positives and the false negatives.
For false positives (negatives), uses _find_accurate_points to find
nearby sets of coefficients that are correctly classified as
negative (positive). The angular distance between these new points
and the corresponding false positive (negative) points is then
computed in radians. The function returns the mean,
standard deviation, and maximum of these distances for both
false positives and negatives, as well as the number of points for
which the angular distance exceeds a specified angle in radians.
Parameters
----------
model : tf.keras.Model
ds : tf.data.Dataset
A labelled Tensorflow dataset of sets of quartic potential
coefficients.
dist : tf.float32
A maximum angular distance in radians between an incorrectly
classified point and the classifier's decision boundary that the
user deems acceptable. For small values of dist, this corresponds
to the maximum difference in a (normalized) quartic potential
coefficient between an incorrectly classified point and a
correctly classified one.
Returns
-------
tuple of lists of floats.
Each list contains the mean, standard deviation, and maximum of
the angular distance between incorrectly classified points in ds
and correctly classified ones generated with
_find_accurate_points. The final element of each list gives the
number of incorrectly classified points that are greater than
dist radians away from a correctly classified one.
"""
#Find the false positives and false negatives from the dataset.
false_positives, false_negatives = get_false_pos_and_neg_tf(model, ds)
# Create tf.Variable objects representing the false positives and
# false negatives, so that we can deform them to create new instances
# which cross the decision boundary.
neighbors_fp = tf.Variable(tf.identity(false_positives))
neighbors_fn = tf.Variable(tf.identity(false_negatives))
# Also create two more identical tf.Variable tensors. These will keep
# track of the initial states of the false positives and
# false negatives and be used as contrapoints when finding the
# decision boundary of the neural network.
init_fp = tf.Variable(tf.identity(false_positives))
init_fn = tf.Variable(tf.identity(false_negatives))
#Now generate points near these points which the neural network
# correctly classifies.
_find_accurate_points(model, neighbors_fp, init_fp, false_pos = tf.constant(True), maxiter = tf.constant(10000), init_rot = dist/50.)
_find_accurate_points(model, neighbors_fn, init_fn, false_pos = tf.constant(False), maxiter = tf.constant(10000), init_rot = dist/50.)
# Compute the angular distance between the false positives
# and the correctly labelled points.
results_fp = tf.acos(tf.clip_by_value(tf.math.reduce_sum(false_positives*neighbors_fp, axis = 1), -1., 1.))
results_fn = tf.acos(tf.clip_by_value(tf.math.reduce_sum(false_negatives*neighbors_fn, axis = 1), -1., 1.))
#Return information about the minimum distances between falsely
# classified points and correctly classified bounded-from-below points.
return [tf.math.reduce_mean(results_fp).numpy(), tf.math.reduce_std(results_fp).numpy(), tf.math.reduce_max(results_fp).numpy(), tf.shape(tf.gather(results_fp, tf.where(tf.math.greater(results_fp, dist))))[0].numpy()], [tf.math.reduce_mean(results_fn).numpy(), tf.math.reduce_std(results_fn).numpy(), tf.math.reduce_max(results_fn).numpy(), tf.shape(tf.gather(results_fn, tf.where(tf.math.greater(results_fn, dist))))[0].numpy()]
def _tf_flatten(t):
"""A method which returns a flattened tensor.
Parameters
----------
t : tf.Tensor
Returns
-------
tf.Tensor
A tensor with the same content as t, but flattened to be
one-dimensional.
"""
return tf.reshape(t, shape = [-1])
@tf.function(jit_compile = True)
def _model_grad(model, lams_var):
"""A convenience function to compute the gradient of the model
prediction as a function of the inputs. Used to help validate
the accuracy of the model.
Parameters
----------
lams_var : tf.Variable
A tf.Variable object which holds a 2-D tensor representing
sets of quartic potential coefficients.
Returns
-------
tf.tensor(tf.float32, tf.float32), tf.tensor(tf.float32, tf.float32)
Two Tensorflow tensors: A tensor representing the model output
evaluated at lams_var, and a 2-D Tensorflow tensor representing
the gradient of the model prediction on a batch of inputs.
"""
with tf.GradientTape(watch_accessed_variables=False) as tape:
tape.watch(lams_var)
y = model(lams_var)
return y, tape.gradient(y, lams_var)
def _rand_rotate(lams, rot_dist):
"""A function which randomly rotates a given set of quartic potential
coefficients by a specified angular distance on the unit hypersphere.
Parameters
----------
lams : tf.Variable
A 2-D tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
rot_dist : float
A float that determines how far (in radians) each quartic
coefficient should be rotated.
Returns
-------
tf.Tensor(tf.float32, tf.float32)
A tf.Tensor obtained by rotating each set of quartic coefficients
in lams by rot_dist in random directions.
"""
#Generate an ensemble of random unit vectors that are orthogonal to lams
orth_rands = tf.random.normal(tf.shape(lams))
orth_rands = orth_rands /(tf.norm(orth_rands, axis = 1, keepdims = True))
orth_rands = (orth_rands - tf.math.reduce_sum(orth_rands * lams, axis=1)[:,np.newaxis]*lams)
orth_rands = orth_rands /(tf.norm(orth_rands, axis = 1, keepdims = True))
#Rotate lams in the direction of orth_rands by the angle rot_dist
return orth_rands*(tf.math.sin(rot_dist)) + lams*(tf.math.cos(rot_dist))
def _one_step_validation(lams_var, grad_array, stepsize, false_pos, mask):
"""A convenience function to help in finding sets of quartic
coefficients that are correctly labelled in the vicinity of points
which are incorrectly labelled. Given a tf.Variable lams_var, follows
the direction of steepest descent (ascent) in the model prediction
for false positives (negatives) in order to locate a point very close
by that is correctly labelled by the neural network.
Parameters
----------
lams_var : tf.Variable
A tf.Variable object which holds a 2-D tensor representing
sets of quartic potential coefficients.
grad_array : tf.tensor(tf.float32, tf.float32)
A Tensorflow tensor that holds the gradient of the model output
with respect to the input quartic coefficients.
stepsize : tf.Tensor(tf.float32)
Controls how rapidly to follow the direction of steepest descent
or ascent for each point. Automatically computed by
_estimate_step_size.
false_pos : bool
If True, the function will assume that lams_var are points in the
vicinity of false positives, and so should be looking for points
that the network classifies as negative. Otherwise, the function
will assume that lams_var are points in the vicinity of
false negatives, and so should be looking for points that the
network classifies as positive.
mask : tf.Tensor(bool)
A 1-dimensional tensor of booleans. The function will only
update indices of lams_var where mask is True.
"""
# Follow the gradient to produce nearby points which the network classifies as more negative (positive) for false positives (negatives).
if(false_pos):
lams_var.scatter_nd_sub(tf.where(mask), tf.gather(stepsize*(grad_array),_tf_flatten(tf.where(mask))))
else:
lams_var.scatter_nd_add(tf.where(mask), tf.gather(stepsize*(grad_array),_tf_flatten(tf.where(mask))))
# Now project the result back onto the unit hypersphere.
lams_var.assign((lams_var.value())/(tf.norm(lams_var.value(), axis = 1, keepdims = True)))
def _find_prediction_boundary(model, lams_var, false_pos, converged, maxiter, init_rot):
"""A convenience function for finding how far misclassified points
are from the decision boundary. Uses gradient ascent/descent to modify
quartic coefficients of false positives (negatives) until the neural
network classifies them as negative (positive). Updates the values
in a tf.Variable object representing sets of quartic coupling
coefficients, and returns an array that describes whether or not the
attempt to locate all decision boundaries was successful.
Parameters
----------
lams_var : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
stepsize : float
Governs how rapidly the function follows the gradient of the
model prediction.
false_pos : bool
If True, the function assumes that lams_var are false positives.
If False, assumes they are false negatives.
converged : tf.Tensor(bool)
A Tensorflow tensor of Boolean values that determine whether or
not a given set of quartic coefficients in lams_var has been
sufficiently changed to cross the decision boundary.
When this function is called, every element should be False.
maxiter : int
The maximum number of gradient ascent/descent iterations to perform.
init_rot : float
A parameter for randomly rotating coefficients that have zero
gradient, to avoid encountering local extrema that aren't past
the decision boundary.
Returns
-------
tf.Tensor(bool)
A Tensorflow tensor that labels whether the quartic coefficients
in a given index of lams_var have been sufficiently changed to
cross the decision boundary. If the function is successful in
locating all decision boundaries, every element should be True.
"""
i = tf.constant(0)
while (tf.logical_and(tf.reduce_any(~converged), tf.less(i, maxiter))):
# If any points have zero gradient and aren't past the decision boundary, fix them by randomly perturbing until they have nonzero gradients.
_fix_zero_gradients(model, lams_var, init_rot, ~converged)
# Now compute the model value and the gradient at lams_var.
model_val, grad_array = _model_grad(model, lams_var)
# Determine if any new points have converged.
if(false_pos):
converged = _tf_flatten(tf.math.less(model_val, 0.5))
else:
converged = _tf_flatten(tf.math.greater(model_val, 0.5))
# Estimate the step size for the optimization step using backtracking
stepsize = _estimate_step_size(model, lams_var, model_val, grad_array, false_pos, ~converged)
_one_step_validation(lams_var, grad_array, stepsize, false_pos, ~converged)
i = i + tf.constant(1)
return converged
def _tf_scatter_nd_mask(tensor, mask, update):
"""Updates a tensor's values according to an update tensor, only for
indices where a a mask is not true.
Parameters
----------
tensor : tf.Tensor
Some input tensor
mask : tf.Tensor(bool)
A tensor of booleans which should be "True" for indices where the
values of tensor should be replaced with update.
update : tf.Tensor
A tensor of the same shape and dtype as tensor.
Returns
-------
tf.Tensor
A tf.Tensor which has the values of update where mask is True,
and tensor where mask is False.
"""
indices = tf.where(mask)
return tf.tensor_scatter_nd_update(tensor, indices, tf.gather(update, _tf_flatten(indices)))
def _estimate_step_size(model, lams_var, model_val, grad_array, false_pos, mask):
"""Estimate the optimal size for each gradient descent step using
backtracking. Note that we don't use the backtracking for projected
gradient descent here, since the neural network already automatically
projects the input onto the unit hypersphere before evaluation, so the
result using the simpler unconstrained backtracking strategy will be
the same as the projected gradient result.
Parameters
----------
model : tf.keras.Model
lams_var : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
model_val : tf.Tensor(tf.float32)
The model predictions on the initial value of lams_var.
grad_array : tf.Tensor(tf.float32, tf.float32)
A Tensorflow tensor that holds the gradient of the model output
with respect to the initial value of lams_var
false_pos : bool
If True, the points in lams_var denote false positives.
Otherwise, they denote false negatives.
mask : tf.Tensor(bool)
A boolean mask. The method will only increment the step size
estimate for indices where mask is True.
Returns
-------
tf.Tensor(tf.float32)
A tf.Tensor of step sizes.
"""
stepsize = tf.ones(shape=(tf.shape(lams_var)[0],1), dtype = tf.float32)
squared_grads = tf.math.square(tf.linalg.norm(grad_array, axis = 1, keepdims = True))
if(false_pos):
converged = tf.logical_or(~mask, _tf_flatten(tf.math.less_equal(model(lams_var - stepsize*grad_array)-model_val, - 0.5*stepsize*squared_grads)))
else:
converged = tf.logical_or(~mask, _tf_flatten(tf.math.greater_equal(model(lams_var + stepsize*grad_array)-model_val, 0.5*stepsize*squared_grads)))
i = 0
while tf.logical_and(tf.reduce_any(~converged), i < 25):
stepsize = _tf_scatter_nd_mask(stepsize, ~converged, 0.5*stepsize)
if(false_pos):
converged = tf.logical_or(~mask, _tf_flatten(tf.math.less_equal(model(lams_var - stepsize*grad_array)-model_val, - 0.5*stepsize*squared_grads)))
else:
converged = tf.logical_or(~mask, _tf_flatten(tf.math.greater_equal(model(lams_var + stepsize*grad_array)-model_val, 0.5*stepsize*squared_grads)))
i += 1
return tf.clip_by_value(2.*stepsize, 1e-8, 0.01)
def _fix_zero_gradients(model, lams_var, init_rot, mask):
"""A method to deal with the possibility of points in the model that
have a gradient of exactly zero (so that it isn't suitable to use
gradient descent/ascent to find nearby points with the appropriate
label). This method finds such points and randomly rotates them a
small angle until the gradient is nonzero.
Parameters
----------
model : tf.keras.Model
lams_var : tf.Variable
Represents sets of quartic potential coefficients in the vicinity
of points that the neural network labels as either false positives
or false negatives.
init_rot : float
The initial angular distance which variables with zero gradient
should be rotated to find points with nonzero gradient. If this
angular distance fails to find points with nonzero gradients,
progressively larger rotations will be attempted until a nonzero
gradient is found.
mask : tf.Tensor(bool)
A mask denoting which zero gradients should be "fixed". Used to
avoid randomly rotating points that have already been deformed
past the decision boundary.
"""
init_lams = tf.identity(lams_var.value())
_, init_grads = _model_grad(model, lams_var)
zero_grad = tf.logical_and(tf.math.less(tf.norm(init_grads, axis = 1), 1e-7), mask)
rot_dist = init_rot
i = 0
while tf.math.reduce_any(zero_grad):
# increase rot_dist if too many iterations have gone by without fixing all the zero-gradient points.
if(tf.logical_and(tf.math.greater(i, 0), tf.math.equal(tf.math.floormod(i, 1000), tf.constant(0)))):
rot_dist = tf.constant(2.)*rot_dist
# Update the lams_var to randomly rotate those points at which the model has zero gradient.
lams_var.scatter_nd_update(tf.where(zero_grad), _rand_rotate(tf.gather(lams_var, _tf_flatten(tf.where(zero_grad))), rot_dist))
# Check to see whether the updated points still have zero gradient.
_, init_grads = _model_grad(model, lams_var)
zero_grad = tf.logical_and(tf.norm(init_grads, axis = 1) == 0., mask)
# If there are still points with zero gradient, reset these points to their initial values so we can rotate them randomly again in the next iteration.
lams_var.scatter_nd_update(tf.where(zero_grad), tf.gather(init_lams, _tf_flatten(tf.where(zero_grad))))
# Increment a counter of how many loops have been performed.
i += 1
def _random_rot_search(model, lams_var, false_pos, init_rot):
"""A method to deal with points that still haven't been deformed past
the decision boundary by the gradient descent/ascent strategy.
This function randomly rotates these points until points that are past
the decision boundary are found.
Parameters
----------
model : tf.keras.Model
lams_var : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
false_pos : bool
If True, the function will assume that lams_var are points in the
vicinity of false positives, and so should be looking for points
that the network classifies as negative. Otherwise, the function
will assume that lams_var are points in the vicinity of
false negatives, and so should be looking for points that the
network classifies as positive.
init_rot : float
The initial angular distance which variables should be rotated to
find points across the decision boundary. If this angular distance
fails to find valid points, progressively larger rotations will be
attempted until a point across the decision boundary is found.
"""
# Keep track of the positions of the variables before rotation.
init_lams = tf.identity(lams_var.value())
# Determine which points are not yet deformed across the
# decision boundary.
if(false_pos):
converged = tf.less(_tf_flatten(model(lams_var)), 0.5)
else:
converged = tf.greater(_tf_flatten(model(lams_var)), 0.5)
rot_dist = init_rot
i = 0
while tf.math.reduce_any(~converged):
# increase rot_dist if too many iterations have gone by
# without converging for all points.
if(tf.logical_and(tf.math.greater(i, 0), tf.math.equal(tf.math.floormod(i, 1000), tf.constant(0)))):
rot_dist = tf.constant(2.)*rot_dist
# Update the lams_var to randomly rotate those points
# at which the model has zero gradient.
lams_var.scatter_nd_update(tf.where(~converged), _rand_rotate(tf.gather(lams_var, _tf_flatten(tf.where(~converged))), rot_dist))
# Check to see whether the updated points are now
# across the decision boundary.
if(false_pos):
converged = tf.less(_tf_flatten(model(lams_var)), 0.5)
else:
converged = tf.greater(_tf_flatten(model(lams_var)), 0.5)
# If there are still points that aren't past the decision boundary,
# reset these points to their initial values so we can rotate them
# randomly again in the next iteration.
lams_var.scatter_nd_update(tf.where(~converged), tf.gather(init_lams, _tf_flatten(tf.where(~converged))))
# Increment a counter of how many loops have been performed.
i += 1
@tf.function
def _find_accurate_points(model, lams_var, lams_init, false_pos, maxiter = tf.constant(10000), init_rot = tf.constant(1e-3)):
"""A function which, given tf.Variables of points in the quartic
potential which the neural network classifies incorrectly as
false positives (negatives), deforms them into nearby points across
the neural network's decision boundary. Used to validate the neural
network by determining how far away an incorrectly classified point
is from the decision boundary. The strategy employed here is to
follow the direction of steepest descent (ascent) of the neural
networks' prediction function with respect to the quartic coefficients
to find points across the decision boundary, and then use bisection
root-finding methods to deformed points to be as near to the decision
boundary as possible.
Parameters
----------
lams_var : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
lams_init : tf.Variable
Another tf.Variable that initially carries identical values to
lams_var. This will be updated as part of the bisection search
for the decision boundary.
false_pos : bool
If True, the function will assume that lams_var are
false positives, and so should be looking for points that
the network classifies as negative. Otherwise, the function will
assume that lams are false negatives, and so should be looking for
points that the network classifies as positive.
maxiter : int
An integer which controls how long to continue iterating in order
to find correctly labelled points in the vicinity of
false positives or negatives. This value should be large, and the
number of iterations should generally never approach it, but
this ensures that the program will not run indefinitely.
init_rot : float
A parameter that governs how far to rotate points which have
exactly zero gradient, as in _fix_zero_gradients, as well
as for the search based on random rotations for points for which
the gradient ascent/descent-based strategy fails.
"""
# If any points have zero gradient, randomly rotate them by small
# angles until they don't.
_fix_zero_gradients(model, lams_var, init_rot, tf.ones(tf.shape(lams_var)[0], dtype = bool))
# Consider points as "converged" if they are on the other
# side of the decision boundary from the initial point.
if(false_pos):
converged = _tf_flatten(tf.math.less(model(lams_var), 0.5))
else:
converged = _tf_flatten(tf.math.greater(model(lams_var), 0.5))
# Perform the gradient descent/ascent search for points past
# the decision boundary. The array converged keeps track of
# whether this attempt was successful.
converged = _find_prediction_boundary(model, lams_var, false_pos, converged, maxiter, init_rot)
# If the gradient descent strategy was unsuccessful, randomly rotate
# the unconverged points until they are past the decision boundary.
if(tf.reduce_any(~converged)):
_random_rot_search(model, lams_var, false_pos, init_rot)
# Finally, refine the deformed points in lams_var to as close to
# the decision boundary as possible using bisection. The parameter
# init_rot, which by default will already be at least an order of
# magnitude lower than the accuracy tolerance of the active learning
# program, will also serve as the tolerance for the bisection root
# finding algorithm.
_bisection_method(model, lams_var, lams_init, false_pos, init_rot/10.)
def _bisection_method(model, lams_var, lams_init, false_pos, tol):
"""A method that, given sets of points lams_var and lams_init that
are on either side of the decision boundary, updates all elements
of lams_var to have its model prediction be within tol of 0.5,
the decision boundary. It accomplishes this by repeatedly iterating
bisections (projected onto the unit hypersphere) between lams_var
and lams_init.
Parameters
----------
model : tf.keras.Model
lams_var : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients in the vicinity of points that the neural network
labels as either false positives or false negatives.
lams_init : tf.Variable
A tf.Variable which represents sets of quartic potential
coefficients that ARE false positives or false negatives.
false_pos : bool
If True, the function will assume that lams_init are
false positives, and so lams_var should be points
which are classified as negative. Otherwise, the function
will assume that lams_init are false negatives, and so
lams_var should be points which are classified as positive.
tol : float
The level of closeness to the decision boundary that lams_var
should be deformed to reach, without crossing.
"""
# Assess which points are not yet converged to values within
# tol from the decision boundary, but across it.
converged = tf.math.less(tf.math.abs(_tf_flatten(model(lams_var)) - 0.5), tol)
# As long as some lams_var elements are more than tol away
# from the decision boundary, perform bisection root finding iterations.
while tf.reduce_any(~converged):
# Locate the midpoints between each lams_var and lams_init,
# and project it onto the unit hypersphere.
bisection = lams_var + ((lams_init - lams_var)/2.)
bisection = bisection / tf.norm(bisection, axis = 1, keepdims = True)
# Update each element of lams_var and lams_init to the corresponding
# value of the midpoint, depending on which side of the decision boundary
# the midpoint is on.
if(false_pos):
init_update = tf.where(tf.logical_and(tf.greater(_tf_flatten(model(bisection)), 0.5), ~converged))
var_update = tf.where(tf.logical_and(tf.less_equal(_tf_flatten(model(bisection)), 0.5), ~converged))
else:
init_update = tf.where(tf.logical_and(tf.less_equal(_tf_flatten(model(bisection)), 0.5), ~converged))
var_update = tf.where(tf.logical_and(tf.greater(_tf_flatten(model(bisection)), 0.5), ~converged))
lams_init.scatter_nd_update(init_update, tf.gather(bisection, _tf_flatten(init_update)))
lams_var.scatter_nd_update(var_update, tf.gather(bisection, _tf_flatten(var_update)))
# Update converged.
converged = tf.math.less(tf.math.abs(_tf_flatten(model(lams_var)) - 0.5), tol)
[docs]
def get_false_pos_and_neg_tf(model, ds):
"""A function which extracts all sets of quartic coefficients in a
Tensorflow dataset that the neural network classifies incorrectly,
either false positives (points it incorrectly classifies as
bounded-from-below) or false negatives (points it incorrectly
classifies as NOT bounded-from-below).
Parameters
----------
model : tf.keras.Model
ds: tf.data.Dataset
A Tensorflow dataset representing labelled sets of quartic
potential coefficients.
Returns
-------
tuple of tf.Tensors
Two 2-D tensors representing the sets of false positive and false
negative quartic coefficients, respectively.
"""
false_pos = []
false_neg = []
for x, y in ds:
pred = _tf_flatten(model(x, training=False))
false_pos.append(tf.gather(x, _tf_flatten(tf.where(tf.logical_and(tf.math.greater(pred, tf.constant(0.5)), ~y)))))
false_neg.append(tf.gather(x, _tf_flatten(tf.where(tf.logical_and(tf.math.less_equal(pred, tf.constant(0.5)), y)))))
return tf.concat(false_pos, axis = 0), tf.concat(false_neg, axis = 0)