Source code for bfbrain.Data_Manager

"""A module containing the DataManager class, which handles the generation 
and labelling of training and validation data for the BFBLearner class.
"""

from bfbrain.Jax_Oracle import label_func, test_labeller
import jax
import sympy as sym
from sympy import lambdify, re

import tensorflow as tf
import numpy as np

from numpy.random import default_rng
from numpy.random import SeedSequence
from bfbrain.Hypersphere_Formulas import rand_nsphere, convert_from_polar_sym

[docs] class labeller_wrapper: """Wrapper for the labelling function which serves as the active learning oracle. Attributes ---------- func : callable A numeric function for the potential. This is a numeric function generated by the class DataManager in its init method. It will take numeric arrays (of a format depending on the DataManager class) representing a scalar vev and a set of quartic potential coefficients and return the numerical value of the quartic part of the potential function and its gradient with respect to the vev. phi_len : int The number of real parameters necessary to uniquely specify a vev in the model. lam_len : int The number of independent real quartic coefficients in the model. rng : np.random.Generator The random number generator which governs any random processes that the oracle may use. polar : bool If True, then the analysis of the potential will be conducted with a polar coordinate parameterization of the vev parameters. If False, then Cartesian coordinates will be used, albeit with the vev parameters restricted to a phi_len-dimensional unit hypersphere. label_fn : callable, optional. A function that takes a 2-D NumPy array of quartic coefficients and returns a list of Boolean labels for them. This is for implementing customized oracle functions. Must have the signature (func: Callable, phi_len: int, polar: bool, rng: NumpyGenerator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool) If this argument is not specified, the default oracle BFBrain.Jax_Oracle.label_func is used. label_check : callable, optional A function that can be used to test the reliability of a custom oracle given by label_fn, or if label_fn is None, the default oracle BFBrain.Jax_Oracle.label_func. Must have the same signature as label_fn, up to additional keyword arguments. If this argument is not specified, the a tester for the default oracle is used: BFBrain.Jax_Oracle.test_labeller **label_kwargs : dict, default=dict(niter = 250) A dictionary of additional keyword arguments needed for the labelling function label_func. The default values are applicable for the default oracle function, BFBrain.Jax_Oracle.label_func """ def __init__(self, func, phi_len, lam_len, rng, polar, label_fn = None, label_check = None, label_kwargs = dict(niter = 250)): self.func = func self.phi_len = phi_len self.lam_len = lam_len self.rng = rng self.polar = polar self.label_kwargs = label_kwargs if(label_fn is None): self.label_fn = label_func else: self.label_fn = label_fn if(label_check is None): self.label_check = test_labeller else: self.label_check = label_check
[docs] def do_labelling(self, lam, label_kwargs = None): """Performs labelling using the class's oracle function. Parameters ---------- lam : np.array(np.float32, np.float32) A 2-D NumPy array of sets of quartic potential coefficients. label_kwargs : dict, optional An optional alternative set of oracle keyword arguments. If not specified, the class instance's label_kwargs attribute is used. Returns ------- np.array(bool) A 1-D NumPy array of labels for lam, for which points that are bounded from below are labelled "True" and points which are not are labelled "False". """ if label_kwargs is not None: return self.label_fn(self.func, self.phi_len, self.polar, self.rng, lam, **label_kwargs) else: return self.label_fn(self.func, self.phi_len, self.polar, self.rng, lam, **self.label_kwargs)
[docs] def check_labeller(self, lam, **tester_kwargs): """Tests the reliability of the labelling-- calls self.label_check. Depending on the methodology of label_func, this function may or may not be useful. For example, a rigorous computation of boundedness-from-below based on resultants would not require any consistency or reliability checks. Parameters ---------- lam : np.array(np.float32, np.float32) A 2-D NumPy array of sets of quartic potential coefficients. tester_kwargs : dict A set of keyword arguments for self.label_check. Returns ------- Any Will return what self.label_check returns. """ return self.label_check(self.func, self.phi_len, self.polar, self.rng, lam, **(self.label_kwargs | tester_kwargs))
[docs] class np_data: """Holds labelled sets of quartic coefficients in CPU memory in a format that's easy to save, load, and manipulate. Attributes ---------- pos : np.array(np.float32, np.float32) A 2-D NumPy array of sets of quartic coefficients in the potential, which the labeller has determined are bounded-from-below. neg : np.array(np.float32, np.float32) A 2-D NumPy array of sets of quartic coefficients in the potential, which the labeller has determined are NOT bounded-from-below. """ def __init__(self, pos, neg): self.pos = pos self.neg = neg
[docs] @classmethod def from_file(cls, path): """A constructor for loading an np_data object from a .npz file (see NumPy documentation), likely created in a previous BFBrain analysis. Parameters ---------- path : str A string with a file name. '.npz' is appended to the end of the string, and should not be included in path. Returns ------- np_data """ npz_data = np.load(path+'.npz') return cls(pos = np.copy(npz_data['pos']), neg = np.copy(npz_data['neg']))
[docs] def save_data(self, path): """Saves the data object to the filepath specified as an npz object (see NumPy documentation) Parameters ---------- path : str A string with a file name. If .npz is not at the end of the string, it is appended to it. """ np.savez(path, pos = self.pos, neg = self.neg)
[docs] def append_data(self, new_data): """Given another np_data object, appends its data to this object in place. Parameters ---------- new_data: np_data """ if(len(new_data.pos) > 0): self.pos = np.concatenate((self.pos, new_data.pos), axis=0) if(len(new_data.neg) > 0): self.neg = np.concatenate((self.neg, new_data.neg), axis=0)
[docs] def n_elements(self): """ Computes the total number of sets of quartic coefficients in the object (both bounded-from-below and not bounded-from-below) Returns ------- int The total number of sets of quartic coefficients in the np_data object. """ return len(self.pos) + len(self.neg)
[docs] class DataManager: """A class containing methods which process and generate data. Note that this class contains all the random number generation that's not specifically associated with the neural network and its optimizer. Generally one should use the 'from_seed' or 'from_file' constructor rather than constructing from the base initialization method. Attributes ---------- phi_len : int The number of independent real parameters needed to uniquely specify a vev in the model. lam_len : int The number of independent real quartic potential coefficients in the model. rng : list of numpy.random.Generator A list of NumPy random number generators which control all the random generation related to the generation and labelling of data. In total there are 6 random number generators, each differently seeded using NumPy's SeedSequence.spawn method. Each random number generator is used only for one specific task: Generating training data, generating validation data, generating random points in the vicinity of other points (two rng's are used here, one for rotation direction and the other for rotation angle), doing random number generation associated with labelling, and shuffling data for training. polar : bool If true, the potential is analyzed with the vev coordinates converted to a polar form. If false, they are analyzed in their Cartesian form. sym_expr : SymPy expression Represents the potential function in a form that is both picklable and can easily be used to generate the gradient symbolically. sym_grad_expr : SymPy expression Represents the gradient of the potential function in a form that is both picklable and can easily be used to generate the gradient symbolically. phisym_var : sympy.Array The symbols representing the quartic potential coefficients in sym_expr. lamsym : sympy.Array The symbols representing the quartic potential coefficients in sym_expr. lambdify_mode : {'jax', 'numpy', 'scipy', 'math', 'mpmath', 'numexpr', 'sympy', 'tensorflow'} Passed directly as the argument "modules" in sympy.lambdify, the function used to generate numerical functions from the symbolic expression for the scalar potential. Default value is 'jax', consistent with the default oracle function, BFBrain.Jax_Oracle.label_func label_fn : callable, optional A function that takes a 2-D NumPy array of quartic coefficients and returns a list of Boolean labels for them. This is for implementing customized oracle functions. Must have the signature (func: Callable, phi_len: int, polar: bool, rng: NumpyGenerator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool) If this argument is not specified, the default oracle BFBrain.Jax_Oracle.label_func is used. label_check : callable, optional A function that can be used to test the reliability of a custom oracle given by label_fn, or if label_fn is None, the default oracle BFBrain.Jax_Oracle.label_func. Must have the same signature as label_fn, up to additional keyword arguments. If this argument is not specified, the a tester for the default oracle is used: BFBrain.Jax_Oracle.test_labeller **label_kwargs : dict, optional A dictionary of additional keyword arguments needed for the labelling function label_func. """ def __init__(self, phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym_var, lamsym, lambdify_mode = 'jax', label_fn = None, label_check = None, **label_kwargs): self.phi_len = phi_len self.lam_len = lam_len self.rng = rng self.polar = polar self.sym_expr = sym_expr self.sym_grad_expr = sym_grad_expr self.phisym_var = phisym_var self.lamsym = lamsym self.lambdify_mode = lambdify_mode self.label_fn = label_fn self.label_check = label_check self.label_kwargs = label_kwargs #Now create numerical functions for the value and gradient of the # quartic part of the potential from the symbolic function: if(lambdify_mode == 'jax'): num_func = jax.jit(lambdify([phisym_var, lamsym], sym_expr, lambdify_mode)) num_grad = jax.jit(lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode)) elif(lambdify_mode == 'tensorflow'): num_func = tf.function(lambdify([phisym_var, lamsym], sym_expr, lambdify_mode)) num_grad = tf.function(lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode)) else: num_func = sym.lambdify([phisym_var, lamsym], sym_expr, lambdify_mode) num_grad = sym.lambdify([phisym_var, lamsym], sym_grad_expr, lambdify_mode) if(lambdify_mode == 'jax'): @jax.jit def min_func(phi, lam): return num_func(phi, lam), num_grad(phi, lam) elif(lambdify_mode == 'tensorflow'): @tf.function def min_func(phi, lam): return num_func(phi, lam), tf.stack(num_grad(phi, lam)) else: def min_func(phi, lam): return num_func(phi, lam), num_grad(phi, lam) #Initialize the labeller. self.labeller = labeller_wrapper(min_func, self.phi_len, self.lam_len, self.rng[5], self.polar, self.label_fn, self.label_check, self.label_kwargs)
[docs] @classmethod def from_func(cls, sym_func, phi_len, lam_len, seed = None, polar = False, lambdify_mode = 'jax', label_fn = None, label_check = None, **label_kwargs): """Preferred constructor for initializing DataManager. Parameters ---------- sym_func : SymPy function. A SymPy function that expresses the quartic part of the potential. Must have the signature (sympy.Array, sympy.Array) -> sympy.Expr, where the first sympy.Array object corresponds to the vev configuration and the second corresponds to the quartic coefficients in the potential. phi_len : int The number of real parameters needed to uniquely specify the vev in the model. lam_len : int The number of independent real quartic coupling coefficients in the model's potential function. seed : int, optional A random number seed. Used to spawn a sequence of random generators with SeedSequence. polar : bool, default=False If true, the potential is analyzed with the vev coordinates converted to a polar form. If false, they are analyzed in their Cartesian form. lambdify_mode : {'jax', 'numpy', 'scipy', 'math', 'mpmath', 'numexpr', 'sympy', 'tensorflow'} The "module" input to sympy.lambdify, used to extract numerical expressions from the symbolic SymPy function. See SymPy documentation for details. label_fn : callable, optional A function that takes a 2-D NumPy array of quartic coefficients and returns a list of Boolean labels for them. This is for implementing customized oracle functions. Must have the signature (func: Callable, phi_len: int, polar: bool, rng: numpy.random.Generator, lam: np.array(np.float, np.float), **kwargs) -> np.array(bool) If this argument is not specified, the default oracle BFBrain.Jax_Oracle.label_func is used. label_check : callable, optional A function that can be used to test the reliability of a custom oracle given by label_fn, or if label_fn is None, the default oracle BFBrain.Jax_Oracle.label_func. Must have the same signature as label_fn, up to additional keyword arguments. If this argument is not specified, the a tester for the default oracle is used: BFBrain.Jax_Oracle.test_labeller **label_kwargs : dict, optional A dictionary of additional keyword arguments needed for the labelling function label_func. """ #Create the symbols for some symbolic manipulation: phisym = sym.Array(sym.symbols('phi:'+str(phi_len), real = True)) lamsym = sym.Array(sym.symbols('lambda:'+str(lam_len), real = True)) #Initialize the random number generators: seeds = SeedSequence(seed).spawn(6) rng = [default_rng(seeds[i]) for i in range(6)] #Now create a simplified symbolic expression for the quartic part of the potential from the symbolic function: if(polar): phisym_pol = sym.Array(sym.symbols('theta:'+str(phi_len-1), real = True)) sym_expr = (re(sym_func(phisym, lamsym).subs(zip(phisym, convert_from_polar_sym(phisym_pol))).diff(lamsym))).applyfunc(sym.simplify).dot(lamsym) sym_grad_expr = (sym_expr.diff(phisym_pol).applyfunc(sym.simplify)) return cls(phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym_pol, lamsym, lambdify_mode, label_fn, label_check, **label_kwargs) else: sym_expr = re(sym_func(phisym, lamsym)) sym_grad_expr = (sym_expr.diff(phisym).applyfunc(sym.simplify)) return cls(phi_len, lam_len, rng, polar, sym_expr, sym_grad_expr, phisym, lamsym, lambdify_mode, label_fn, label_check, **label_kwargs)
def __getstate__(self): """Modify pickle's saving of this class to avoid unpicklable objects. """ state = self.__dict__.copy() del state['labeller'] return state def __setstate__(self, state): """Modify pickle's loading of this class to reconstruct unpicklable objects. """ self.__dict__.update(state) #Now create numerical functions for the value and gradient of the # quartic part of the potential from the symbolic function: if(self.lambdify_mode == 'jax'): num_func = jax.jit(lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode)) num_grad = jax.jit(lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode)) elif(self.lambdify_mode == 'tensorflow'): num_func = tf.function(lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode)) num_grad = tf.function(lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode)) else: num_func = sym.lambdify([self.phisym_var, self.lamsym], self.sym_expr, self.lambdify_mode) num_grad = sym.lambdify([self.phisym_var, self.lamsym], self.sym_grad_expr, self.lambdify_mode) if(self.lambdify_mode == 'jax'): @jax.jit def min_func(phi, lam): return num_func(phi, lam), num_grad(phi, lam) elif(self.lambdify_mode == 'tensorflow'): @tf.function def min_func(phi, lam): return num_func(phi, lam), tf.stack(num_grad(phi, lam)) else: def min_func(phi, lam): return num_func(phi, lam), num_grad(phi, lam) #Initialize the labeller. self.labeller = labeller_wrapper(min_func, self.phi_len, self.lam_len, self.rng[5], self.polar, self.label_fn, self.label_check, self.label_kwargs)
[docs] def create_random_lambdas(self, nlams, validation = False): """Create a list of random sets of quartic potential coefficients (but don't label them yet). Use independent uncorrelated rng's for the generation of a validation and training set. Notice that these lambdas are Cartesian coordinates that uniformly sample the unit hypersphere. Parameters ---------- nlams : int The number of sets of quartic coefficients to generate randomly. validation: bool, default=False A flag denoting which random number generator to use, ensuring independently-generated validation and training sets. If True, use the random number generator for the validation set, while if False, use the random number generator for the training set. Returns ------- np.array(np.float32, np.float32) A 2-D NumPy array representing a list of sets of quartic coefficients for the potential. """ if(validation): rand_lams = rand_nsphere(nlams, self.lam_len, self.rng[1]).astype(np.float32) else: rand_lams = rand_nsphere(nlams, self.lam_len, self.rng[0]).astype(np.float32) return rand_lams
[docs] def check_labeller(self, nlams, **tester_kwargs): """A wrapper for calling the labeller's check_labeller function. Generates sample quartic coefficients randomly before running labeller.check_labeller on them. Parameters ---------- nlams : int The number of sets of quartic coefficients to randomly generate for testing the labeller function's consistency. tester_kwargs : dict Additional keyword arguments required by the check_labeller function. If the default oracle and tester are used, possible keyword arguments are niter_step, count_success, max_iter, verbose. See BFBrain.Jax_Oracle.test_labeller for details. Returns ------- Same type as labeller.check_labeller, which may be a user-written function. If the default oracle and tester are used, this function will be BFBrain.Jax_Oracle.test_labeller. """ lams = self.create_random_lambdas(nlams, validation = True) return self.labeller.check_labeller(lams, **tester_kwargs)
[docs] def checklam_all(self, lams, truth_label_fn = None, label_kwargs = None): """Labels sets of quartic potential coefficients with True (for bounded from below) or False (not bounded from below). Parameters ---------- lams : np.array(np.float32, np.float32) A 2-D NumPy array of quartic coefficients of the potential. Each entry along the 0 axis corresponds to a single set of quartic potential coefficients specifying a potential function. truth_label_fn : callable, optional Must take a 1-D NumPy array representing a single set of quartic coefficients and return a Boolean True if the potential they describe is bounded from below, False otherwise. If this argument is specified, the method will use this callable to label lams instead of the labeller class. This is used in specific instances when a fast symbolic expression for the bounded-from-below constraints is known, and the performance of the classifier training loop can be evaluated in the absence of noise due to the approximate labeller. Obviously the use case of the classifier is for potentials where such a symbolic expression is NOT known, so the real-world model building usefulness of this option is limited. label_kwargs : dict, optional If these are specified, the oracle will use the keyword arguments given here instead of the keyword arguments specified in the DataManager constructor. Returns ------- np.array(bool) A Boolean NumPy array of labels for each set of coefficients in lams. """ if truth_label_fn is not None: return np.array([truth_label_fn(lam) for lam in lams]) else: return self.labeller.do_labelling(lams, label_kwargs)
[docs] def create_data(self, lams, truth_label_fn = None, label_kwargs = None): """Given an unlabelled 2-D NumPy array of sets of quartic coefficients, label them and return an np_data object. Parameters ---------- lams : np.array(np.float32, np.float32) A 2-D NumPy array of quartic coefficients of the potential. Each entry along the 0 axis corresponds to a single set of quartic potential coefficients specifying a potential function. truth_label_fn : callable, optional Must take a 1-D NumPy array representing a single set of quartic coefficients and return a Boolean True if the potential they describe is bounded from below, False otherwise. If this argument is specified, the method will use this callable to label lams instead of the labeller class. This is used in specific instances when a fast symbolic expression for the bounded-from-below constraints is known, and the performance of the classifier training loop can be evaluated in the absence of noise due to the approximate oracle. Obviously the use case of the classifier is for potentials where such a symbolic expression is NOT known, so the real-world model building usefulness of this option is limited. label_kwargs : dict, optional If these are specified, the oracle will use the keyword arguments given here instead of the keyword arguments specified in the DataManager constructor. Returns ------- np_data An np_data object representing the labelled contents of the input array lams. """ res = self.checklam_all(lams, truth_label_fn, label_kwargs) return np_data(lams[res], lams[~res])
[docs] def create_random_data(self, nlams, validation = False, truth_label_fn = None, label_kwargs = None): """Creates a random sample of Cartesian lambda coefficients and labels them, then storing the results in an np_data object. Parameters ---------- nlams : int The number of sets of quartic potential coefficients to generate. validation : bool, default=False If True, use the validation random number generator to generate the random coefficients. If False, use the training random number generator. truth_label_fn : callable, optional Must take a 1-D NumPy array representing a single set of quartic coefficients and return a Boolean True if the potential they describe is bounded from below, False otherwise. If this argument is specified, the method will use this callable to label lams instead of the labeller class. This is used in specific instances when a fast symbolic expression for the bounded-from-below constraints is known, and the performance of the classifier training loop can be evaluated in the absence of noise due to the approximate labeller. Obviously the use case of the classifier is for potentials where such a symbolic expression is NOT known, so the real-world model building usefulness of this option is limited. label_kwargs : dict, optional If these are specified, the oracle will use the keyword arguments given here instead of the keyword arguments specified in the DataManager constructor. Returns ------- np_data An np_data object that represents the labelled sets of quartic coefficients that was randomly generated. """ lams = self.create_random_lambdas(nlams, validation) return self.create_data(lams, truth_label_fn, label_kwargs)
[docs] def check_accuracy_with_better_labeller(self, in_data, label_kwargs): """A method for evaluating the accuracy of a labeller which is capable of mislabelling some False points as True, like the default oracle, which is based on global minimization of the quartic part of the potential. Parameters ---------- in_data : np_data An np_data object, labelled with an oracle that can mislabel some False points as True (but not the reverse). label_kwargs : dict A dictionary which specifies the keyword arguments for the oracle, which must be selected to yield significantly more accurate labels than the ones specified by in_data. Returns ------- float The precision (fraction of positively labelled points that are true positives) of the oracle which originally labelled in_data. """ lams = in_data.pos labels = self.checklam_all(lams, label_kwargs = label_kwargs) return 1. - len(labels[~labels]) / len(lams)
[docs] def balance_array(self, data): """Given an np_data object that has more negative (not bounded-from-below) points than positives (bounded-from-below), rebalance data to include new positive points generated by leveraging the convexity of the space of bounded-from-below points. If there are as many or more positive points as negative points in the np_data object, leaves the np_data object unmodified. Parameters ---------- data : np_data The np_data object that has many more negatively-labelled points (that is, points which are not bounded-from-below) than positives. """ # Get the positive and negative data sets. lam_pos = data.pos lam_neg = data.neg pos = len(lam_pos) neg = len(lam_neg) # If there are no members of one label, or there are an exactly # equal number of members of both labels, or there are already # more positive points than negative points, do nothing. if(pos == 0 or neg == 0 or pos == neg or pos > neg): return # Generate new points from the positive points that are already # in the data. new_lams = self._create_new_positives(lam_pos, neg - pos) new_data = np_data(new_lams, np.array([])) # append new data to the np_data object data. data.append_data(new_data)
def _create_new_positives(self, lams, nlams): """Generates new bounded-from-below points from existing known ones. Called by balance_array. Parameters ---------- lams : np.array(np.float32, np.float32) A 2-D NumPy array of quartic coefficients of the potential. Each entry along the 0 axis corresponds to a single set of quartic potential coefficients specifying a potential function. Important that all elements of lams are bounded-from-below. nlams : int The number of new points to generate Returns ------- np.array(np.float32, np.float32) A 2-D NumPy array representing nlams sets of new quartic coefficients which are bounded-from-below, since they are sampled along line segments between existing bounded-from-below points. """ new_lams = np.empty((nlams, self.lam_len)) for i in np.arange(len(new_lams)): pair = self.rng[2].choice(lams, size = 2, replace = False) t = self.rng[3].random() new_lams[i] = t*pair[0] + (1.-t)*pair[1] return (new_lams / np.linalg.norm(new_lams, axis = 1, keepdims = True)).astype(np.float32)
[docs] def create_dataset(self, data, validation = False): """Given an np_data object, creates a Tensorflow dataset object for training. Paramaters ---------- data : np_data validation : bool, default=False If True, don't shuffle the dataset. Useful for keeping track of agreement on the validation set for the model after successive active learning rounds. Returns ------- tf.data.Dataset This dataset is NOT batched, but is randomly shuffled on the CPU. To use for training, it is necessary to batch the dataset object with tf.data.Dataset's batch method. """ # Retrieve the members of the np_data object. lam_pos = data.pos lam_neg = data.neg pos = len(lam_pos) neg = len(lam_neg) # Create NumPy arrays of the quartic coefficient sets (lams) # and their labels (labs). lams = np.concatenate((lam_pos, lam_neg), axis=0) labs = np.concatenate((np.ones(shape=pos, dtype=bool), np.zeros(shape=neg, dtype=bool)), axis=0) # Shuffle the order of lams and labs. if not validation: shuff = self.rng[4].permutation(len(labs)) lams = lams[shuff] labs = labs[shuff] # Return the tf.data.Dataset object created from the coefficient # sets and their labels. return tf.data.Dataset.from_tensor_slices((lams, labs))
[docs] def generate_L(self, nL, lams, hop_dist, probs = None, rand_fraction = 0.): """Randomly generate a sample of new points in the vicinity of some existing points. Given some set of existing points, samples new points by making random hops of an angle given by a draw from a normal distribution in a random direction along the unit hypersphere in quartic coefficient space. Parameters ---------- nL : int The number of new sets of quartic coefficients to generate. lams : np.array(np.float32, np.float32) A 2-D NumPy array representing sets of quartic potential coefficients. New points will be sampled in the vicinity of these. hop_dist : float The distance scale for sampling around the coefficients in lams. Newly-generated points are taken from input points by randomly rotating points in lams by an angle taken from a normal distribution with standard deviation hop_dist. probs : np.array(np.float32), optional A 1-D NumPy array, should be an array of nonnegative floats which sum to 1, representing the probability of the function selecting each index of lams to generate new points around. If not specified, a uniform selection probability for all points in lams is used. rand_fraction : float, optional Must be a non-negative float between 0 and 1. If specified, then the method will sample that fraction of its points as uniformly distributed draws from the surface of the unit hypersphere in quartic coefficient space, instead of sampling in the vicinity of points in lams. If not specified, all generated points will be sampled in the vicinity of points in lams. Returns ------- np.array(np.float32, np.float32) A 2-D NumPy array representing a list of sets of quartic coefficients for the potential. """ len_rands = int(np.rint(nL*rand_fraction)) #Create a list of random draws from cart_lams picked_lams = self.rng[2].choice(lams, size = nL - len_rands, replace = True, p = probs, axis = 0) #Generate an ensemble of random unit vectors that are orthogonal to picked_lams orth_rands = self.create_random_lambdas(nL-len_rands) orth_rands = (orth_rands - np.sum(orth_rands * picked_lams, axis=1)[:,np.newaxis]*picked_lams) orth_rands = (orth_rands/(np.sqrt(np.sum(orth_rands * orth_rands, axis=1)[:,np.newaxis]))).astype(np.float32) #Create a list of random rotation angles to use here. rot_rands = self.rng[3].normal(loc = 0., scale = hop_dist, size = nL-len_rands).astype(np.float32) #Rotate picked_lams in the direction of orth_rands by a random angle. L_array = orth_rands*(np.sin(rot_rands)[:, np.newaxis]) + picked_lams*(np.cos(rot_rands)[:, np.newaxis]) if(len_rands == 0): return L_array else: rand_array = self.create_random_lambdas(len_rands) return np.concatenate((L_array, rand_array), axis=0)