"""A module containing the DataManager class, which handles the generation
and labelling of training and validation data for the BFBLearner class.
"""
from bfbrain.Jax_Oracle import label_func, test_labeller
import jax
import sympy as sym
from sympy import lambdify, re
import tensorflow as tf
import numpy as np
from numpy.random import default_rng
from numpy.random import SeedSequence
from bfbrain.Hypersphere_Formulas import rand_nsphere, convert_from_polar_sym
[docs]
class labeller_wrapper:
"""Wrapper for the labelling function which serves as the active
learning oracle.
Attributes
----------
func : callable
A numeric function for the potential. This is a numeric function
generated by the class DataManager in its init method. It will
take numeric arrays (of a format depending on the DataManager
class) representing a scalar vev and a set of quartic potential
coefficients and return the numerical value of the quartic part
of the potential function and its gradient with respect to the vev.
phi_len : int
The number of real parameters necessary to uniquely specify a vev
in the model.
lam_len : int
The number of independent real quartic coefficients in the model.
rng : np.random.Generator
The random number generator which governs any random processes
that the oracle may use.
polar : bool
If True, then the analysis of the potential will be conducted with
a polar coordinate parameterization of the vev parameters.
If False, then Cartesian coordinates will be used, albeit with the
vev parameters restricted to a phi_len-dimensional unit
hypersphere.
label_fn : callable, optional.
A function that takes a 2-D NumPy array of quartic coefficients
and returns a list of Boolean labels for them. This is for
implementing customized oracle functions. Must have the signature
(func: Callable, phi_len: int, polar: bool, rng: NumpyGenerator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool)
If this argument is not specified, the default oracle
BFBrain.Jax_Oracle.label_func is used.
label_check : callable, optional
A function that can be used to test the reliability of a custom
oracle given by label_fn, or if label_fn is None, the default
oracle BFBrain.Jax_Oracle.label_func. Must have the same signature
as label_fn, up to additional keyword arguments. If this argument
is not specified, the a tester for the default oracle is used:
BFBrain.Jax_Oracle.test_labeller
**label_kwargs : dict, default=dict(niter = 250)
A dictionary of additional keyword arguments needed for the
labelling function label_func. The default values are applicable
for the default oracle function, BFBrain.Jax_Oracle.label_func
"""
def __init__(self, func, phi_len, lam_len, rng, polar, label_fn = None, label_check = None, label_kwargs = dict(niter = 250)):
self.func = func
self.phi_len = phi_len
self.lam_len = lam_len
self.rng = rng
self.polar = polar
self.label_kwargs = label_kwargs
if(label_fn is None):
self.label_fn = label_func
else:
self.label_fn = label_fn
if(label_check is None):
self.label_check = test_labeller
else:
self.label_check = label_check
[docs]
def do_labelling(self, lam, label_kwargs = None):
"""Performs labelling using the class's oracle function.
Parameters
----------
lam : np.array(np.float32, np.float32)
A 2-D NumPy array of sets of quartic potential coefficients.
label_kwargs : dict, optional
An optional alternative set of oracle keyword arguments. If
not specified, the class instance's label_kwargs
attribute is used.
Returns
-------
np.array(bool)
A 1-D NumPy array of labels for lam, for which points that
are bounded from below are labelled "True" and points which
are not are labelled "False".
"""
if label_kwargs is not None:
return self.label_fn(self.func, self.phi_len, self.polar, self.rng, lam, **label_kwargs)
else:
return self.label_fn(self.func, self.phi_len, self.polar, self.rng, lam, **self.label_kwargs)
[docs]
def check_labeller(self, lam, **tester_kwargs):
"""Tests the reliability of the labelling-- calls
self.label_check. Depending on the methodology of label_func,
this function may or may not be useful. For example, a rigorous
computation of boundedness-from-below based on resultants would
not require any consistency or reliability checks.
Parameters
----------
lam : np.array(np.float32, np.float32)
A 2-D NumPy array of sets of quartic potential coefficients.
tester_kwargs : dict
A set of keyword arguments for self.label_check.
Returns
-------
Any
Will return what self.label_check returns.
"""
return self.label_check(self.func, self.phi_len, self.polar, self.rng, lam, **(self.label_kwargs | tester_kwargs))
[docs]
class np_data:
"""Holds labelled sets of quartic coefficients in CPU memory in a
format that's easy to save, load, and manipulate.
Attributes
----------
pos : np.array(np.float32, np.float32)
A 2-D NumPy array of sets of quartic coefficients in the
potential, which the labeller has determined are
bounded-from-below.
neg : np.array(np.float32, np.float32)
A 2-D NumPy array of sets of quartic coefficients in the
potential, which the labeller has determined are NOT
bounded-from-below.
"""
def __init__(self, pos, neg):
self.pos = pos
self.neg = neg
[docs]
@classmethod
def from_file(cls, path):
"""A constructor for loading an np_data object from a .npz file
(see NumPy documentation), likely created in a previous BFBrain
analysis.
Parameters
----------
path : str
A string with a file name. '.npz' is appended to the end of
the string, and should not be included in path.
Returns
-------
np_data
"""
npz_data = np.load(path+'.npz')
return cls(pos = np.copy(npz_data['pos']), neg = np.copy(npz_data['neg']))
[docs]
def save_data(self, path):
"""Saves the data object to the filepath specified as an npz
object (see NumPy documentation)
Parameters
----------
path : str
A string with a file name. If .npz is not at the end of the
string, it is appended to it.
"""
np.savez(path, pos = self.pos, neg = self.neg)
[docs]
def append_data(self, new_data):
"""Given another np_data object, appends its data to this
object in place.
Parameters
----------
new_data: np_data
"""
if(len(new_data.pos) > 0):
self.pos = np.concatenate((self.pos, new_data.pos), axis=0)
if(len(new_data.neg) > 0):
self.neg = np.concatenate((self.neg, new_data.neg), axis=0)
[docs]
def n_elements(self):
"""
Computes the total number of sets of quartic coefficients in
the object (both bounded-from-below and not bounded-from-below)
Returns
-------
int
The total number of sets of quartic coefficients in the
np_data object.
"""
return len(self.pos) + len(self.neg)
[docs]
class DataManager:
"""A class containing methods which process and generate data.
Note that this class contains all the random number generation that's
not specifically associated with the neural network and its optimizer.
Generally one should use the 'from_seed' or 'from_file' constructor
rather than constructing from the base initialization method.
Attributes
----------
phi_len : int
The number of independent real parameters needed to uniquely
specify a vev in the model.
lam_len : int
The number of independent real quartic potential coefficients
in the model.
rng : list of numpy.random.Generator
A list of NumPy random number generators which control all the
random generation related to the generation and labelling of data.
In total there are 6 random number generators, each differently
seeded using NumPy's SeedSequence.spawn method. Each random number
generator is used only for one specific task: Generating training
data, generating validation data, generating random points in the
vicinity of other points (two rng's are used here, one for
rotation direction and the other for rotation angle),
doing random number generation associated with labelling, and
shuffling data for training.
polar : bool
If true, the potential is analyzed with the vev coordinates
converted to a polar form. If false, they are analyzed in their
Cartesian form.
sym_expr : SymPy expression
Represents the potential function in a form that is both picklable
and can easily be used to generate the gradient symbolically.
sym_grad_expr : SymPy expression
Represents the gradient of the potential function in a form that
is both picklable and can easily be used to generate the gradient
symbolically.
phisym_var : sympy.Array
The symbols representing the quartic potential coefficients in
sym_expr.
lamsym : sympy.Array
The symbols representing the quartic potential coefficients in
sym_expr.
lambdify_mode : {'jax', 'numpy', 'scipy', 'math', 'mpmath', 'numexpr', 'sympy', 'tensorflow'}
Passed directly as the argument "modules" in sympy.lambdify, the
function used to generate numerical functions from the symbolic
expression for the scalar potential. Default value is 'jax',
consistent with the default oracle function,
BFBrain.Jax_Oracle.label_func
label_fn : callable, optional
A function that takes a 2-D NumPy array of quartic coefficients
and returns a list of Boolean labels for them. This is for
implementing customized oracle functions. Must have the signature
(func: Callable, phi_len: int, polar: bool, rng: NumpyGenerator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool)
If this argument is not specified, the default oracle
BFBrain.Jax_Oracle.label_func is used.
label_check : callable, optional
A function that can be used to test the reliability of a custom
oracle given by label_fn, or if label_fn is None, the default
oracle BFBrain.Jax_Oracle.label_func. Must have the same signature
as label_fn, up to additional keyword arguments. If this argument
is not specified, the a tester for the default oracle is used:
BFBrain.Jax_Oracle.test_labeller
**label_kwargs : dict, optional
A dictionary of additional keyword arguments needed for the
labelling function label_func.
"""
def __init__(self, phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym_var, lamsym, lambdify_mode = 'jax', label_fn = None, label_check = None, **label_kwargs):
self.phi_len = phi_len
self.lam_len = lam_len
self.rng = rng
self.polar = polar
self.sym_expr = sym_expr
self.sym_grad_expr = sym_grad_expr
self.phisym_var = phisym_var
self.lamsym = lamsym
self.lambdify_mode = lambdify_mode
self.label_fn = label_fn
self.label_check = label_check
self.label_kwargs = label_kwargs
#Now create numerical functions for the value and gradient of the
# quartic part of the potential from the symbolic function:
if(lambdify_mode == 'jax'):
num_func = jax.jit(lambdify([phisym_var, lamsym], sym_expr, lambdify_mode))
num_grad = jax.jit(lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode))
elif(lambdify_mode == 'tensorflow'):
num_func = tf.function(lambdify([phisym_var, lamsym], sym_expr, lambdify_mode))
num_grad = tf.function(lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode))
else:
num_func = sym.lambdify([phisym_var, lamsym], sym_expr, lambdify_mode)
num_grad = sym.lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode)
if(lambdify_mode == 'jax'):
@jax.jit
def min_func(phi, lam):
return num_func(phi, lam), num_grad(phi, lam)
elif(lambdify_mode == 'tensorflow'):
@tf.function
def min_func(phi, lam):
return num_func(phi, lam), tf.stack(num_grad(phi, lam))
else:
def min_func(phi, lam):
return num_func(phi, lam), num_grad(phi, lam)
#Initialize the labeller.
self.labeller = labeller_wrapper(min_func, self.phi_len, self.lam_len, self.rng[5], self.polar, self.label_fn, self.label_check, self.label_kwargs)
[docs]
@classmethod
def from_func(cls, sym_func, phi_len, lam_len, seed = None, polar = False, lambdify_mode = 'jax', label_fn = None, label_check = None, **label_kwargs):
"""Preferred constructor for initializing DataManager.
Parameters
----------
sym_func : SymPy function.
A SymPy function that expresses the quartic part of the
potential. Must have the signature
(sympy.Array, sympy.Array) -> sympy.Expr, where the first
sympy.Array object corresponds to the vev configuration
and the second corresponds to the quartic coefficients
in the potential.
phi_len : int
The number of real parameters needed to uniquely specify the
vev in the model.
lam_len : int
The number of independent real quartic coupling coefficients
in the model's potential function.
seed : int, optional
A random number seed. Used to spawn a sequence of random
generators with SeedSequence.
polar : bool, default=False
If true, the potential is analyzed with the vev coordinates
converted to a polar form. If false, they are analyzed in
their Cartesian form.
lambdify_mode : {'jax', 'numpy', 'scipy', 'math', 'mpmath', 'numexpr', 'sympy', 'tensorflow'}
The "module" input to sympy.lambdify, used to extract
numerical expressions from the symbolic SymPy function.
See SymPy documentation for details.
label_fn : callable, optional
A function that takes a 2-D NumPy array of quartic
coefficients and returns a list of Boolean labels for them.
This is for implementing customized oracle functions. Must have the signature
(func: Callable, phi_len: int, polar: bool, rng: numpy.random.Generator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool)
If this argument is not specified, the default oracle
BFBrain.Jax_Oracle.label_func is used.
label_check : callable, optional
A function that can be used to test the reliability of a
custom oracle given by label_fn, or if label_fn is None, the
default oracle BFBrain.Jax_Oracle.label_func. Must have the
same signature as label_fn, up to additional keyword arguments.
If this argument is not specified, the a tester for the
default oracle is used: BFBrain.Jax_Oracle.test_labeller
**label_kwargs : dict, optional
A dictionary of additional keyword arguments needed for the
labelling function label_func.
"""
#Create the symbols for some symbolic manipulation:
phisym = sym.Array(sym.symbols('phi:'+str(phi_len), real = True))
lamsym = sym.Array(sym.symbols('lambda:'+str(lam_len), real = True))
#Initialize the random number generators:
seeds = SeedSequence(seed).spawn(6)
rng = [default_rng(seeds[i]) for i in range(6)]
#Now create a simplified symbolic expression for the quartic part of the potential from the symbolic function:
if(polar):
phisym_pol = sym.Array(sym.symbols('theta:'+str(phi_len-1), real = True))
sym_expr = (re(sym_func(phisym, lamsym).subs(zip(phisym, convert_from_polar_sym(phisym_pol))).diff(lamsym))).applyfunc(sym.simplify).dot(lamsym)
sym_grad_expr = (sym_expr.diff(phisym_pol).applyfunc(sym.simplify))
return cls(phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym_pol, lamsym, lambdify_mode, label_fn, label_check, **label_kwargs)
else:
sym_expr = re(sym_func(phisym, lamsym))
sym_grad_expr = (sym_expr.diff(phisym).applyfunc(sym.simplify))
return cls(phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym, lamsym, lambdify_mode, label_fn, label_check, **label_kwargs)
def __getstate__(self):
"""Modify pickle's saving of this class to avoid unpicklable
objects.
"""
state = self.__dict__.copy()
del state['labeller']
return state
def __setstate__(self, state):
"""Modify pickle's loading of this class to reconstruct
unpicklable objects.
"""
self.__dict__.update(state)
#Now create numerical functions for the value and gradient of the
# quartic part of the potential from the symbolic function:
if(self.lambdify_mode == 'jax'):
num_func = jax.jit(lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode))
num_grad = jax.jit(lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode))
elif(self.lambdify_mode == 'tensorflow'):
num_func = tf.function(lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode))
num_grad = tf.function(lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode))
else:
num_func = sym.lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode)
num_grad = sym.lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode)
if(self.lambdify_mode == 'jax'):
@jax.jit
def min_func(phi, lam):
return num_func(phi, lam), num_grad(phi, lam)
elif(self.lambdify_mode == 'tensorflow'):
@tf.function
def min_func(phi, lam):
return num_func(phi, lam), tf.stack(num_grad(phi, lam))
else:
def min_func(phi, lam):
return num_func(phi, lam), num_grad(phi, lam)
#Initialize the labeller.
self.labeller = labeller_wrapper(min_func, self.phi_len, self.lam_len, self.rng[5], self.polar, self.label_fn, self.label_check, self.label_kwargs)
[docs]
def create_random_lambdas(self, nlams, validation = False):
"""Create a list of random sets of quartic potential coefficients
(but don't label them yet). Use independent uncorrelated rng's for
the generation of a validation and training set. Notice that these
lambdas are Cartesian coordinates that uniformly sample the
unit hypersphere.
Parameters
----------
nlams : int
The number of sets of quartic coefficients to generate
randomly.
validation: bool, default=False
A flag denoting which random number generator to use,
ensuring independently-generated validation and training sets.
If True, use the random number generator for the
validation set, while if False, use the random number
generator for the training set.
Returns
-------
np.array(np.float32, np.float32)
A 2-D NumPy array representing a list of sets of quartic
coefficients for the potential.
"""
if(validation):
rand_lams = rand_nsphere(nlams, self.lam_len, self.rng[1]).astype(np.float32)
else:
rand_lams = rand_nsphere(nlams, self.lam_len, self.rng[0]).astype(np.float32)
return rand_lams
[docs]
def check_labeller(self, nlams, **tester_kwargs):
"""A wrapper for calling the labeller's check_labeller function.
Generates sample quartic coefficients randomly before running
labeller.check_labeller on them.
Parameters
----------
nlams : int
The number of sets of quartic coefficients to randomly
generate for testing the labeller function's consistency.
tester_kwargs : dict
Additional keyword arguments required by the check_labeller
function. If the default oracle and tester are used, possible
keyword arguments are niter_step, count_success, max_iter,
verbose. See BFBrain.Jax_Oracle.test_labeller for details.
Returns
-------
Same type as labeller.check_labeller, which may be a
user-written function. If the default oracle and tester are
used, this function will be BFBrain.Jax_Oracle.test_labeller.
"""
lams = self.create_random_lambdas(nlams, validation = True)
return self.labeller.check_labeller(lams, **tester_kwargs)
[docs]
def checklam_all(self, lams, truth_label_fn = None, label_kwargs = None):
"""Labels sets of quartic potential coefficients with True
(for bounded from below) or False (not bounded from below).
Parameters
----------
lams : np.array(np.float32, np.float32)
A 2-D NumPy array of quartic coefficients of the potential.
Each entry along the 0 axis corresponds to a single set of
quartic potential coefficients specifying a potential
function.
truth_label_fn : callable, optional
Must take a 1-D NumPy array representing a single set of
quartic coefficients and return a Boolean True if the
potential they describe is bounded from below, False
otherwise. If this argument is specified, the method
will use this callable to label lams instead of the labeller
class. This is used in specific instances when a fast symbolic
expression for the bounded-from-below constraints is known,
and the performance of the classifier training loop can be
evaluated in the absence of noise due to the approximate
labeller. Obviously the use case of the classifier is for
potentials where such a symbolic expression is NOT known,
so the real-world model building usefulness of this option
is limited.
label_kwargs : dict, optional
If these are specified, the oracle will use the keyword
arguments given here instead of the keyword arguments
specified in the DataManager constructor.
Returns
-------
np.array(bool)
A Boolean NumPy array of labels for each set of coefficients
in lams.
"""
if truth_label_fn is not None:
return np.array([truth_label_fn(lam) for lam in lams])
else:
return self.labeller.do_labelling(lams, label_kwargs)
[docs]
def create_data(self, lams, truth_label_fn = None, label_kwargs = None):
"""Given an unlabelled 2-D NumPy array of sets of quartic
coefficients, label them and return an np_data object.
Parameters
----------
lams : np.array(np.float32, np.float32)
A 2-D NumPy array of quartic coefficients of the potential.
Each entry along the 0 axis corresponds to a single set of
quartic potential coefficients specifying a
potential function.
truth_label_fn : callable, optional
Must take a 1-D NumPy array representing a single set of
quartic coefficients and return a Boolean True if the
potential they describe is bounded from below, False
otherwise. If this argument is specified, the method will use
this callable to label lams instead of the labeller class.
This is used in specific instances when a fast symbolic
expression for the bounded-from-below constraints is known,
and the performance of the classifier training loop can be
evaluated in the absence of noise due to the approximate
oracle. Obviously the use case of the classifier is for
potentials where such a symbolic expression is NOT known,
so the real-world model building usefulness of this option
is limited.
label_kwargs : dict, optional
If these are specified, the oracle will use the keyword
arguments given here instead of the keyword arguments
specified in the DataManager constructor.
Returns
-------
np_data
An np_data object representing the labelled contents of the
input array lams.
"""
res = self.checklam_all(lams, truth_label_fn, label_kwargs)
return np_data(lams[res], lams[~res])
[docs]
def create_random_data(self, nlams, validation = False, truth_label_fn = None, label_kwargs = None):
"""Creates a random sample of Cartesian lambda coefficients and
labels them, then storing the results in an np_data object.
Parameters
----------
nlams : int
The number of sets of quartic potential coefficients to
generate.
validation : bool, default=False
If True, use the validation random number generator to
generate the random coefficients. If False, use the training
random number generator.
truth_label_fn : callable, optional
Must take a 1-D NumPy array representing a single set of
quartic coefficients and return a Boolean True if the
potential they describe is bounded from below, False
otherwise. If this argument is specified, the method will use
this callable to label lams instead of the labeller class.
This is used in specific instances when a fast symbolic
expression for the bounded-from-below constraints is known,
and the performance of the classifier training loop can be
evaluated in the absence of noise due to the approximate
labeller. Obviously the use case of the classifier is for
potentials where such a symbolic expression is NOT known,
so the real-world model building usefulness of this option
is limited.
label_kwargs : dict, optional
If these are specified, the oracle will use the keyword
arguments given here instead of the keyword arguments
specified in the DataManager constructor.
Returns
-------
np_data
An np_data object that represents the labelled sets of
quartic coefficients that was randomly generated.
"""
lams = self.create_random_lambdas(nlams, validation)
return self.create_data(lams, truth_label_fn, label_kwargs)
[docs]
def check_accuracy_with_better_labeller(self, in_data, label_kwargs):
"""A method for evaluating the accuracy of a labeller which is
capable of mislabelling some False points as True, like the
default oracle, which is based on global minimization of the
quartic part of the potential.
Parameters
----------
in_data : np_data
An np_data object, labelled with an oracle that can
mislabel some False points as True (but not the reverse).
label_kwargs : dict
A dictionary which specifies the keyword arguments for the
oracle, which must be selected to yield significantly more
accurate labels than the ones specified by in_data.
Returns
-------
float
The precision (fraction of positively labelled points that
are true positives) of the oracle which originally labelled
in_data.
"""
lams = in_data.pos
labels = self.checklam_all(lams, label_kwargs = label_kwargs)
return 1. - len(labels[~labels]) / len(lams)
[docs]
def balance_array(self, data):
"""Given an np_data object that has more negative (not
bounded-from-below) points than positives (bounded-from-below),
rebalance data to include new positive points generated by
leveraging the convexity of the space of bounded-from-below points.
If there are as many or more positive points as negative points
in the np_data object, leaves the np_data object unmodified.
Parameters
----------
data : np_data
The np_data object that has many more negatively-labelled
points (that is, points which are not bounded-from-below)
than positives.
"""
# Get the positive and negative data sets.
lam_pos = data.pos
lam_neg = data.neg
pos = len(lam_pos)
neg = len(lam_neg)
# If there are no members of one label, or there are an exactly
# equal number of members of both labels, or there are already
# more positive points than negative points, do nothing.
if(pos == 0 or neg == 0 or pos == neg or pos > neg):
return
# Generate new points from the positive points that are already
# in the data.
new_lams = self._create_new_positives(lam_pos, neg - pos)
new_data = np_data(new_lams, np.array([]))
# append new data to the np_data object data.
data.append_data(new_data)
def _create_new_positives(self, lams, nlams):
"""Generates new bounded-from-below points from existing known
ones. Called by balance_array.
Parameters
----------
lams : np.array(np.float32, np.float32)
A 2-D NumPy array of quartic coefficients of the potential.
Each entry along the 0 axis corresponds to a single set of
quartic potential coefficients specifying a potential function.
Important that all elements of lams are bounded-from-below.
nlams : int
The number of new points to generate
Returns
-------
np.array(np.float32, np.float32)
A 2-D NumPy array representing nlams sets of new quartic
coefficients which are bounded-from-below, since they are
sampled along line segments between existing
bounded-from-below points.
"""
new_lams = np.empty((nlams, self.lam_len))
for i in np.arange(len(new_lams)):
pair = self.rng[2].choice(lams, size = 2, replace = False)
t = self.rng[3].random()
new_lams[i] = t*pair[0] + (1.-t)*pair[1]
return (new_lams / np.linalg.norm(new_lams, axis = 1, keepdims = True)).astype(np.float32)
[docs]
def create_dataset(self, data, validation = False):
"""Given an np_data object, creates a Tensorflow dataset object
for training.
Paramaters
----------
data : np_data
validation : bool, default=False
If True, don't shuffle the dataset. Useful for keeping track
of agreement on the validation set for the model after
successive active learning rounds.
Returns
-------
tf.data.Dataset
This dataset is NOT batched, but is randomly shuffled on the
CPU. To use for training, it is necessary to batch the dataset
object with tf.data.Dataset's batch method.
"""
# Retrieve the members of the np_data object.
lam_pos = data.pos
lam_neg = data.neg
pos = len(lam_pos)
neg = len(lam_neg)
# Create NumPy arrays of the quartic coefficient sets (lams)
# and their labels (labs).
lams = np.concatenate((lam_pos, lam_neg), axis=0)
labs = np.concatenate((np.ones(shape=pos, dtype=bool), np.zeros(shape=neg, dtype=bool)), axis=0)
# Shuffle the order of lams and labs.
if not validation:
shuff = self.rng[4].permutation(len(labs))
lams = lams[shuff]
labs = labs[shuff]
# Return the tf.data.Dataset object created from the coefficient
# sets and their labels.
return tf.data.Dataset.from_tensor_slices((lams, labs))
[docs]
def generate_L(self, nL, lams, hop_dist, probs = None, rand_fraction = 0.):
"""Randomly generate a sample of new points in the vicinity of
some existing points. Given some set of existing points, samples
new points by making random hops of an angle given by a draw from
a normal distribution in a random direction along the
unit hypersphere in quartic coefficient space.
Parameters
----------
nL : int
The number of new sets of quartic coefficients to generate.
lams : np.array(np.float32, np.float32)
A 2-D NumPy array representing sets of quartic potential
coefficients. New points will be sampled in the vicinity
of these.
hop_dist : float
The distance scale for sampling around the coefficients in
lams. Newly-generated points are taken from input points
by randomly rotating points in lams by an angle taken from
a normal distribution with standard deviation hop_dist.
probs : np.array(np.float32), optional
A 1-D NumPy array, should be an array of nonnegative floats
which sum to 1, representing the probability of the function
selecting each index of lams to generate new points around.
If not specified, a uniform selection probability for all
points in lams is used.
rand_fraction : float, optional
Must be a non-negative float between 0 and 1. If specified,
then the method will sample that fraction of its points as
uniformly distributed draws from the surface of the unit
hypersphere in quartic coefficient space, instead of sampling
in the vicinity of points in lams. If not specified, all
generated points will be sampled in the vicinity of points
in lams.
Returns
-------
np.array(np.float32, np.float32)
A 2-D NumPy array representing a list of sets of quartic
coefficients for the potential.
"""
len_rands = int(np.rint(nL*rand_fraction))
#Create a list of random draws from cart_lams
picked_lams = self.rng[2].choice(lams, size = nL - len_rands, replace = True, p = probs, axis = 0)
#Generate an ensemble of random unit vectors that are orthogonal to picked_lams
orth_rands = self.create_random_lambdas(nL-len_rands)
orth_rands = (orth_rands - np.sum(orth_rands * picked_lams, axis=1)[:,np.newaxis]*picked_lams)
orth_rands = (orth_rands/(np.sqrt(np.sum(orth_rands * orth_rands, axis=1)[:,np.newaxis]))).astype(np.float32)
#Create a list of random rotation angles to use here.
rot_rands = self.rng[3].normal(loc = 0., scale = hop_dist, size = nL-len_rands).astype(np.float32)
#Rotate picked_lams in the direction of orth_rands by a random angle.
L_array = orth_rands*(np.sin(rot_rands)[:, np.newaxis]) + picked_lams*(np.cos(rot_rands)[:, np.newaxis])
if(len_rands == 0):
return L_array
else:
rand_array = self.create_random_lambdas(len_rands)
return np.concatenate((L_array, rand_array), axis=0)