diff --git a/CHANGELOG.md b/CHANGELOG.md index b70609e33..fbb08efe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ - Removed output layer L2 Penalty parameter from MLP Learners - Remove Network interface - RBX Serializer only tracks major library version number + - Convert NeuralNet classes to use NDArray instead of Matrix + - Turn back Network interface - 2.5.0 - Added Vantage Point Spatial tree diff --git a/src/Classifiers/LogisticRegression.php b/src/Classifiers/LogisticRegression.php index 8f5f4c2c0..3d749dab7 100644 --- a/src/Classifiers/LogisticRegression.php +++ b/src/Classifiers/LogisticRegression.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -289,7 +290,7 @@ public function train(Dataset $dataset) : void $classes = $dataset->possibleOutcomes(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(1, $this->l2Penalty, true, new Xavier1())], new Binary($classes, $this->costFn), diff --git a/src/Classifiers/MultilayerPerceptron.php b/src/Classifiers/MultilayerPerceptron.php index e296915af..34e3d8fe6 100644 --- a/src/Classifiers/MultilayerPerceptron.php +++ b/src/Classifiers/MultilayerPerceptron.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -370,7 +371,7 @@ public function train(Dataset $dataset) : void $hiddenLayers[] = new Dense(count($classes), 0.0, true, new Xavier1()); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), $hiddenLayers, new Multiclass($classes, $this->costFn), diff --git a/src/Classifiers/SoftmaxClassifier.php b/src/Classifiers/SoftmaxClassifier.php index 3b8581771..560000671 100644 --- a/src/Classifiers/SoftmaxClassifier.php +++ b/src/Classifiers/SoftmaxClassifier.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -285,7 +286,7 @@ public function train(Dataset $dataset) : void $classes = $dataset->possibleOutcomes(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(count($classes), $this->l2Penalty, true, new Xavier1())], new Multiclass($classes, $this->costFn), diff --git a/src/Datasets/Generators/SwissRoll/SwissRoll.php b/src/Datasets/Generators/SwissRoll/SwissRoll.php new file mode 100644 index 000000000..c965ef865 --- /dev/null +++ b/src/Datasets/Generators/SwissRoll/SwissRoll.php @@ -0,0 +1,188 @@ + + */ +class SwissRoll implements Generator +{ + /** + * The center vector of the swiss roll. + * + * @var list + */ + protected array $center; + + /** + * The scaling factor of the swiss roll. + * + * @var float + */ + protected float $scale; + + /** + * The depth of the swiss roll i.e the scale of the y dimension. + * + * @var float + */ + protected float $depth; + + /** + * The standard deviation of the gaussian noise. + * + * @var float + */ + protected float $noise; + + /** + * @param float $x + * @param float $y + * @param float $z + * @param float $scale + * @param float $depth + * @param float $noise + * @throws InvalidArgumentException + */ + public function __construct( + float $x = 0.0, + float $y = 0.0, + float $z = 0.0, + float $scale = 1.0, + float $depth = 21.0, + float $noise = 0.1 + ) { + if ($scale < 0.0) { + throw new InvalidArgumentException('Scale must be' + . " greater than 0, $scale given."); + } + + if ($depth < 0) { + throw new InvalidArgumentException('Depth must be' + . " greater than 0, $depth given."); + } + + if ($noise < 0.0) { + throw new InvalidArgumentException('Noise factor cannot be less' + . " than 0, $noise given."); + } + + $this->center = [$x, $y, $z]; + $this->scale = $scale; + $this->depth = $depth; + $this->noise = $noise; + } + + /** + * Return the dimensionality of the data this generates. + * + * @internal + * + * @return int<0,max> + */ + public function dimensions() : int + { + return 3; + } + + /** + * Generate n data points. + * + * @param int<0,max> $n + * @return Labeled + */ + public function generate(int $n) : Labeled + { + $range = M_PI + HALF_PI; + + $t = []; + $y = []; + $coords = []; + + for ($i = 0; $i < $n; ++$i) { + $u = mt_rand() / mt_getrandmax(); + $ti = (($u * 2.0) + 1.0) * $range; + $t[] = $ti; + + $uy = mt_rand() / mt_getrandmax(); + $y[] = $uy * $this->depth; + + $coords[] = [ + $ti * cos($ti), + $y[$i], + $ti * sin($ti), + ]; + } + + $noise = []; + + if ($this->noise > 0.0) { + for ($i = 0; $i < $n; ++$i) { + $row = []; + + for ($j = 0; $j < 3; ++$j) { + $u1 = mt_rand() / mt_getrandmax(); + $u2 = mt_rand() / mt_getrandmax(); + $u1 = $u1 > 0.0 ? $u1 : 1e-12; + + $z0 = sqrt(-2.0 * log($u1)) * cos(2.0 * M_PI * $u2); + + $row[] = $z0 * $this->noise; + } + + $noise[] = $row; + } + } else { + for ($i = 0; $i < $n; ++$i) { + $noise[] = [0.0, 0.0, 0.0]; + } + } + + $center = []; + + for ($i = 0; $i < $n; ++$i) { + $center[] = $this->center; + } + + $coords = NumPower::array($coords); + $noise = NumPower::array($noise); + $center = NumPower::array($center); + + $samples = NumPower::add( + NumPower::add( + NumPower::multiply($coords, $this->scale), + $center + ), + $noise + ); + + return Labeled::quick($samples->toArray(), $t); + } +} diff --git a/src/NeuralNet/FeedForward.php b/src/NeuralNet/FeedForward.php index 5cffe79b1..4849f1681 100644 --- a/src/NeuralNet/FeedForward.php +++ b/src/NeuralNet/FeedForward.php @@ -27,7 +27,7 @@ * @package Rubix/ML * @author Andrew DalPino */ -class FeedForward extends Network +class FeedForward implements Network { /** * The input layer to the network. diff --git a/src/NeuralNet/Network.php b/src/NeuralNet/Network.php index 57e7cfd25..26a57e9d4 100644 --- a/src/NeuralNet/Network.php +++ b/src/NeuralNet/Network.php @@ -2,270 +2,24 @@ namespace Rubix\ML\NeuralNet; -use Tensor\Matrix; -use Rubix\ML\Encoding; -use Rubix\ML\Datasets\Dataset; -use Rubix\ML\Datasets\Labeled; -use Rubix\ML\NeuralNet\Layers\Input; -use Rubix\ML\NeuralNet\Layers\Output; -use Rubix\ML\NeuralNet\Layers\Parametric; -use Rubix\ML\NeuralNet\Optimizers\Adaptive; -use Rubix\ML\NeuralNet\Optimizers\Optimizer; use Traversable; -use function array_reverse; - /** * Network * - * A neural network implementation consisting of an input and output layer and any number - * of intermediate hidden layers. - * * @internal * * @category Machine Learning * @package Rubix/ML * @author Andrew DalPino + * @author Samuel Akopyan */ -class Network +interface Network { /** - * The input layer to the network. - * - * @var Input - */ - protected Input $input; - - /** - * The hidden layers of the network. - * - * @var list - */ - protected array $hidden = [ - // - ]; - - /** - * The pathing of the backward pass through the hidden layers. - * - * @var list - */ - protected array $backPass = [ - // - ]; - - /** - * The output layer of the network. - * - * @var Output - */ - protected Output $output; - - /** - * The gradient descent optimizer used to train the network. - * - * @var Optimizer - */ - protected Optimizer $optimizer; - - /** - * @param Input $input - * @param Layers\Hidden[] $hidden - * @param Output $output - * @param Optimizer $optimizer - */ - public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer) - { - $hidden = array_values($hidden); - - $backPass = array_reverse($hidden); - - $this->input = $input; - $this->hidden = $hidden; - $this->output = $output; - $this->optimizer = $optimizer; - $this->backPass = $backPass; - } - - /** - * Return the input layer. - * - * @return Input - */ - public function input() : Input - { - return $this->input; - } - - /** - * Return an array of hidden layers indexed left to right. - * - * @return list - */ - public function hidden() : array - { - return $this->hidden; - } - - /** - * Return the output layer. - * - * @return Output - */ - public function output() : Output - { - return $this->output; - } - - /** - * Return all the layers in the network. + * Return the layers of the network. * * @return Traversable */ - public function layers() : Traversable - { - yield $this->input; - - yield from $this->hidden; - - yield $this->output; - } - - /** - * Return the number of trainable parameters in the network. - * - * @return int - */ - public function numParams() : int - { - $numParams = 0; - - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $parameter) { - $numParams += $parameter->param()->size(); - } - } - } - - return $numParams; - } - - /** - * Initialize the parameters of the layers and warm the optimizer cache. - */ - public function initialize() : void - { - $fanIn = 1; - - foreach ($this->layers() as $layer) { - $fanIn = $layer->initialize($fanIn); - } - - if ($this->optimizer instanceof Adaptive) { - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $param) { - $this->optimizer->warm($param); - } - } - } - } - } - - /** - * Run an inference pass and return the activations at the output layer. - * - * @param Dataset $dataset - * @return Matrix - */ - public function infer(Dataset $dataset) : Matrix - { - $input = Matrix::quick($dataset->samples())->transpose(); - - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } - - return $input->transpose(); - } - - /** - * Perform a forward and backward pass of the network in one call. Returns - * the loss from the backward pass. - * - * @param Labeled $dataset - * @return float - */ - public function roundtrip(Labeled $dataset) : float - { - $input = Matrix::quick($dataset->samples())->transpose(); - - $this->feed($input); - - $loss = $this->backpropagate($dataset->labels()); - - return $loss; - } - - /** - * Feed a batch through the network and return a matrix of activations at the output later. - * - * @param Matrix $input - * @return Matrix - */ - public function feed(Matrix $input) : Matrix - { - foreach ($this->layers() as $layer) { - $input = $layer->forward($input); - } - - return $input; - } - - /** - * Backpropagate the gradient of the cost function and return the loss. - * - * @param list $labels - * @return float - */ - public function backpropagate(array $labels) : float - { - [$gradient, $loss] = $this->output->back($labels, $this->optimizer); - - foreach ($this->backPass as $layer) { - $gradient = $layer->back($gradient, $this->optimizer); - } - - return $loss; - } - - /** - * Export the network architecture as a graph in dot format. - * - * @return Encoding - */ - public function exportGraphviz() : Encoding - { - $dot = 'digraph Tree {' . PHP_EOL; - $dot .= ' node [shape=box, fontname=helvetica];' . PHP_EOL; - - $layerNum = 0; - - foreach ($this->layers() as $layer) { - ++$layerNum; - - $dot .= " N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL; - - if ($layerNum > 1) { - $parentId = $layerNum - 1; - - $dot .= " N{$parentId} -> N{$layerNum};" . PHP_EOL; - } - } - - $dot .= '}'; - - return new Encoding($dot); - } + public function layers() : Traversable; } diff --git a/src/NeuralNet/Networks/Base/Contracts/Network.php b/src/NeuralNet/Networks/Base/Contracts/Network.php new file mode 100644 index 000000000..c6f34abbf --- /dev/null +++ b/src/NeuralNet/Networks/Base/Contracts/Network.php @@ -0,0 +1,49 @@ + + */ +interface Network +{ + /** + * Return the layers of the network. + * + * @return Traversable + */ + public function layers() : Traversable; + + /** + * Return the input layer. + * + * @return Input + */ + public function input() : Input; + + /** + * Return an array of hidden layers indexed left to right. + * + * @return list + */ + public function hidden() : array; + + /** + * Return the output layer. + * + * @return Output + */ + public function output() : Output; +} diff --git a/src/NeuralNet/FeedForwards/FeedForward.php b/src/NeuralNet/Networks/FeedForward/FeedForward.php similarity index 79% rename from src/NeuralNet/FeedForwards/FeedForward.php rename to src/NeuralNet/Networks/FeedForward/FeedForward.php index aea7fe6ed..3d7014439 100644 --- a/src/NeuralNet/FeedForwards/FeedForward.php +++ b/src/NeuralNet/Networks/FeedForward/FeedForward.php @@ -1,22 +1,21 @@ */ -class FeedForward extends Network +class FeedForward implements Network { /** * The input layer to the network. @@ -73,6 +72,13 @@ class FeedForward extends Network */ protected Optimizer $optimizer; + /** + * Whether to normalize the samples. + * + * @var bool + */ + private bool $normalizeSamples; + /** * @param Input $input * @param Hidden[] $hidden @@ -90,6 +96,8 @@ public function __construct(Input $input, array $hidden, Output $output, Optimiz $this->output = $output; $this->optimizer = $optimizer; $this->backPass = $backPass; + + $this->normalizeSamples = false; } /** @@ -186,15 +194,46 @@ public function initialize() : void */ public function infer(Dataset $dataset) : NDArray { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + if ($this->normalizeSamples) { + if ($dataset->empty()) { + return NumPower::array([]); + } - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); + + foreach ($this->layers() as $layer) { + $input = $layer->infer($input); + } + + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + } else { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + + foreach ($this->layers() as $layer) { + $input = $layer->infer($input); + } } return NumPower::transpose($input, [1, 0]); } + /** + * Normalize samples to a strict list-of-lists with sequential numeric keys. + * NumPower's C extension expects packed arrays and can error or behave unpredictably + * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations). + * @param array $samples + * @return array + */ + private function normalizeSamples(array $samples) : array + { + return array_map('array_values', array_values($samples)); + } + /** * Perform a forward and backward pass of the network in one call. Returns * the loss from the backward pass. diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php deleted file mode 100644 index 6554940b3..000000000 --- a/src/NeuralNet/Networks/Network.php +++ /dev/null @@ -1,275 +0,0 @@ - - */ -class Network -{ - /** - * The input layer to the network. - * - * @var Input - */ - protected Input $input; - - /** - * The hidden layers of the network. - * - * @var list - */ - protected array $hidden = [ - // - ]; - - /** - * The pathing of the backward pass through the hidden layers. - * - * @var list - */ - protected array $backPass = [ - // - ]; - - /** - * The output layer of the network. - * - * @var Output - */ - protected Output $output; - - /** - * The gradient descent optimizer used to train the network. - * - * @var Optimizer - */ - protected Optimizer $optimizer; - - /** - * @param Input $input - * @param Hidden[] $hidden - * @param Output $output - * @param Optimizer $optimizer - */ - public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer) - { - $hidden = array_values($hidden); - - $backPass = array_reverse($hidden); - - $this->input = $input; - $this->hidden = $hidden; - $this->output = $output; - $this->optimizer = $optimizer; - $this->backPass = $backPass; - } - - /** - * Return the input layer. - * - * @return Input - */ - public function input() : Input - { - return $this->input; - } - - /** - * Return an array of hidden layers indexed left to right. - * - * @return list - */ - public function hidden() : array - { - return $this->hidden; - } - - /** - * Return the output layer. - * - * @return Output - */ - public function output() : Output - { - return $this->output; - } - - /** - * Return all the layers in the network. - * - * @return Traversable - */ - public function layers() : Traversable - { - yield $this->input; - - yield from $this->hidden; - - yield $this->output; - } - - /** - * Return the number of trainable parameters in the network. - * - * @return int - */ - public function numParams() : int - { - $numParams = 0; - - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $parameter) { - $numParams += $parameter->param()->size(); - } - } - } - - return $numParams; - } - - /** - * Initialize the parameters of the layers and warm the optimizer cache. - */ - public function initialize() : void - { - $fanIn = 1; - - foreach ($this->layers() as $layer) { - $fanIn = $layer->initialize($fanIn); - } - - if ($this->optimizer instanceof Adaptive) { - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $param) { - $this->optimizer->warm($param); - } - } - } - } - } - - /** - * Run an inference pass and return the activations at the output layer. - * - * @param Dataset $dataset - * @return NDArray - */ - public function infer(Dataset $dataset) : NDArray - { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } - - return NumPower::transpose($input, [1, 0]); - } - - /** - * Perform a forward and backward pass of the network in one call. Returns - * the loss from the backward pass. - * - * @param Labeled $dataset - * @return float - */ - public function roundtrip(Labeled $dataset) : float - { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - - $this->feed($input); - - $loss = $this->backpropagate($dataset->labels()); - - return $loss; - } - - /** - * Feed a batch through the network and return a matrix of activations at the output later. - * - * @param NDArray $input - * @return NDArray - */ - public function feed(NDArray $input) : NDArray - { - foreach ($this->layers() as $layer) { - $input = $layer->forward($input); - } - - return $input; - } - - /** - * Backpropagate the gradient of the cost function and return the loss. - * - * @param list $labels - * @return float - */ - public function backpropagate(array $labels) : float - { - [$gradient, $loss] = $this->output->back($labels, $this->optimizer); - - foreach ($this->backPass as $layer) { - $gradient = $layer->back($gradient, $this->optimizer); - } - - return $loss; - } - - /** - * Export the network architecture as a graph in dot format. - * - * @return Encoding - */ - public function exportGraphviz() : Encoding - { - $dot = 'digraph Tree {' . PHP_EOL; - $dot .= ' node [shape=box, fontname=helvetica];' . PHP_EOL; - - $layerNum = 0; - - foreach ($this->layers() as $layer) { - ++$layerNum; - - $dot .= " N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL; - - if ($layerNum > 1) { - $parentId = $layerNum - 1; - - $dot .= " N{$parentId} -> N{$layerNum};" . PHP_EOL; - } - } - - $dot .= '}'; - - return new Encoding($dot); - } -} diff --git a/src/NeuralNet/Parameters/Parameter.php b/src/NeuralNet/Parameters/Parameter.php index 0cef2e87a..6741a0e49 100644 --- a/src/NeuralNet/Parameters/Parameter.php +++ b/src/NeuralNet/Parameters/Parameter.php @@ -90,9 +90,14 @@ public function update(NDArray $gradient, Optimizer $optimizer) : void /** * Perform a deep copy of the object upon cloning. + * + * Cloning an NDArray directly may trigger native memory corruption in some + * NumPower builds (e.g. heap corruption/segfaults when parameters are + * snapshotted during training). To make cloning deterministic and stable we + * deep-copy through a PHP array roundtrip: NDArray -> PHP array -> NDArray. */ public function __clone() : void { - $this->param = clone $this->param; + $this->param = NumPower::array($this->param->toArray()); } } diff --git a/src/NeuralNet/Snapshots/Snapshot.php b/src/NeuralNet/Snapshots/Snapshot.php index 033224d5c..c4bd33f72 100644 --- a/src/NeuralNet/Snapshots/Snapshot.php +++ b/src/NeuralNet/Snapshots/Snapshot.php @@ -2,9 +2,9 @@ namespace Rubix\ML\NeuralNet\Snapshots; -use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric; use Rubix\ML\Exceptions\InvalidArgumentException; -use Rubix\ML\NeuralNet\Networks\Network; +use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric; +use Rubix\ML\NeuralNet\Networks\Base\Contracts\Network; use Rubix\ML\NeuralNet\Parameters\Parameter; /** diff --git a/src/Regressors/Adaline.php b/src/Regressors/Adaline.php index 22e8201d8..40940a1f0 100644 --- a/src/Regressors/Adaline.php +++ b/src/Regressors/Adaline.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Regressors; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -277,7 +278,7 @@ public function train(Dataset $dataset) : void { DatasetIsNotEmpty::with($dataset)->check(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(1, $this->l2Penalty, true, new Xavier2())], new Continuous($this->costFn), diff --git a/src/Regressors/MLPRegressor.php b/src/Regressors/MLPRegressor.php index 710e83f76..769eee4f9 100644 --- a/src/Regressors/MLPRegressor.php +++ b/src/Regressors/MLPRegressor.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Regressors; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -356,7 +357,7 @@ public function train(Dataset $dataset) : void $hiddenLayers[] = new Dense(1, 0.0, true, new Xavier2()); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), $hiddenLayers, new Continuous($this->costFn), diff --git a/src/Regressors/MLPRegressor/MLPRegressor.php b/src/Regressors/MLPRegressor/MLPRegressor.php new file mode 100644 index 000000000..d82f63a4f --- /dev/null +++ b/src/Regressors/MLPRegressor/MLPRegressor.php @@ -0,0 +1,561 @@ + + */ +class MLPRegressor implements Estimator, Learner, Online, Verbose, Persistable +{ + use AutotrackRevisions, LoggerAware; + + /** + * An array composing the user-specified hidden layers of the network in order. + * + * @var Hidden[] + */ + protected array $hiddenLayers = [ + // + ]; + + /** + * The number of training samples to process at a time. + * + * @var positive-int + */ + protected int $batchSize; + + /** + * The gradient descent optimizer used to update the network parameters. + * + * @var Optimizer + */ + protected Optimizer $optimizer; + + /** + * The maximum number of training epochs. i.e. the number of times to iterate before terminating. + * + * @var int<0,max> + */ + protected int $epochs; + + /** + * The minimum change in the training loss necessary to continue training. + * + * @var float + */ + protected float $minChange; + + /** + * The number of epochs to train before evaluating the model with the holdout set. + * + * @var int + */ + protected $evalInterval; + + /** + * The number of epochs without improvement in the validation score to wait before considering an early stop. + * + * @var positive-int + */ + protected int $window; + + /** + * The proportion of training samples to use for validation and progress monitoring. + * + * @var float + */ + protected float $holdOut; + + /** + * The function that computes the loss associated with an erroneous activation during training. + * + * @var RegressionLoss + */ + protected RegressionLoss $costFn; + + /** + * The metric used to score the generalization performance of the model during training. + * + * @var Metric + */ + protected Metric $metric; + + /** + * The underlying neural network instance. + * + * @var Network|null + */ + protected ?Network $network = null; + + /** + * The validation scores at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $scores = null; + + /** + * The loss at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $losses = null; + + /** + * @param Hidden[] $hiddenLayers + * @param int $batchSize + * @param Optimizer|null $optimizer + * @param int $epochs + * @param float $minChange + * @param int $evalInterval + * @param int $window + * @param float $holdOut + * @param RegressionLoss|null $costFn + * @param Metric|null $metric + * @throws InvalidArgumentException + */ + public function __construct( + array $hiddenLayers = [], + int $batchSize = 128, + ?Optimizer $optimizer = null, + int $epochs = 1000, + float $minChange = 1e-4, + int $evalInterval = 3, + int $window = 5, + float $holdOut = 0.1, + ?RegressionLoss $costFn = null, + ?Metric $metric = null + ) { + foreach ($hiddenLayers as $layer) { + if (!$layer instanceof Hidden) { + throw new InvalidArgumentException('Hidden layer' + . ' must implement the Hidden interface.'); + } + } + + if ($batchSize < 1) { + throw new InvalidArgumentException('Batch size must be' + . " greater than 0, $batchSize given."); + } + + if ($epochs < 0) { + throw new InvalidArgumentException('Number of epochs' + . " must be greater than 0, $epochs given."); + } + + if ($minChange < 0.0) { + throw new InvalidArgumentException('Minimum change must be' + . " greater than 0, $minChange given."); + } + + if ($evalInterval < 1) { + throw new InvalidArgumentException('Eval interval must be' + . " greater than 0, $evalInterval given."); + } + + if ($window < 1) { + throw new InvalidArgumentException('Window must be' + . " greater than 0, $window given."); + } + + if ($holdOut < 0.0 or $holdOut > 0.5) { + throw new InvalidArgumentException('Hold out ratio must be' + . " between 0 and 0.5, $holdOut given."); + } + + if ($metric) { + EstimatorIsCompatibleWithMetric::with($this, $metric)->check(); + } + + $this->hiddenLayers = $hiddenLayers; + $this->batchSize = $batchSize; + $this->optimizer = $optimizer ?? new Adam(); + $this->epochs = $epochs; + $this->minChange = $minChange; + $this->evalInterval = $evalInterval; + $this->window = $window; + $this->holdOut = $holdOut; + $this->costFn = $costFn ?? new LeastSquares(); + $this->metric = $metric ?? new RMSE(); + } + + /** + * Return the estimator type. + * + * @internal + * + * @return EstimatorType + */ + public function type() : EstimatorType + { + return EstimatorType::regressor(); + } + + /** + * Return the data types that the estimator is compatible with. + * + * @internal + * + * @return list + */ + public function compatibility() : array + { + return [ + DataType::continuous(), + ]; + } + + /** + * Return the settings of the hyper-parameters in an associative array. + * + * @internal + * + * @return mixed[] + */ + public function params() : array + { + return [ + 'hidden layers' => $this->hiddenLayers, + 'batch size' => $this->batchSize, + 'optimizer' => $this->optimizer, + 'epochs' => $this->epochs, + 'min change' => $this->minChange, + 'eval interval' => $this->evalInterval, + 'window' => $this->window, + 'hold out' => $this->holdOut, + 'cost fn' => $this->costFn, + 'metric' => $this->metric, + ]; + } + + /** + * Has the learner been trained? + * + * @return bool + */ + public function trained() : bool + { + return isset($this->network); + } + + /** + * Return an iterable progress table with the steps from the last training session. + * + * @return Generator + */ + public function steps() : Generator + { + if (!$this->losses) { + return; + } + + foreach ($this->losses as $epoch => $loss) { + yield [ + 'epoch' => $epoch, + 'score' => $this->scores[$epoch] ?? null, + 'loss' => $loss, + ]; + } + } + + /** + * Return the validation score at each epoch. + * + * @return float[]|null + */ + public function scores() : ?array + { + return $this->scores; + } + + /** + * Return the training loss at each epoch. + * + * @return float[]|null + */ + public function losses() : ?array + { + return $this->losses; + } + + /** + * Return the underlying neural network instance or null if not trained. + * + * @return Network|null + */ + public function network() : ?Network + { + return $this->network; + } + + /** + * Train the estimator with a dataset. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + */ + public function train(Dataset $dataset) : void + { + DatasetIsNotEmpty::with($dataset)->check(); + + $hiddenLayers = $this->hiddenLayers; + + $hiddenLayers[] = new Dense(1, 0.0, true, new XavierUniform()); + + $this->network = new FeedForward( + new Placeholder1D($dataset->numFeatures()), + $hiddenLayers, + new Continuous($this->costFn), + $this->optimizer + ); + + $this->network->initialize(); + + $this->partial($dataset); + } + + /** + * Train the network using mini-batch gradient descent with backpropagation. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + * @throws RuntimeException + */ + public function partial(Dataset $dataset) : void + { + if (!$this->network) { + $this->train($dataset); + + return; + } + + SpecificationChain::with([ + new DatasetIsLabeled($dataset), + new DatasetIsNotEmpty($dataset), + new SamplesAreCompatibleWithEstimator($dataset, $this), + new LabelsAreCompatibleWithLearner($dataset, $this), + new DatasetHasDimensionality($dataset, $this->network->input()->width()), + ])->check(); + + if ($this->logger) { + $this->logger->info("Training $this"); + + $numParams = number_format($this->network->numParams()); + + $this->logger->info("{$numParams} trainable parameters"); + } + + [$testing, $training] = $dataset->randomize()->split($this->holdOut); + + [$minScore, $maxScore] = $this->metric->range()->list(); + + $bestScore = $minScore; + $bestEpoch = $numWorseEpochs = 0; + $loss = 0.0; + $score = $snapshot = null; + $prevLoss = INF; + + $this->scores = $this->losses = []; + + for ($epoch = 1; $epoch <= $this->epochs; ++$epoch) { + $batches = $training->randomize()->batch($this->batchSize); + + $loss = 0.0; + + foreach ($batches as $batch) { + $loss += $this->network->roundtrip($batch); + } + + $loss /= count($batches); + + $lossChange = abs($prevLoss - $loss); + + $this->losses[$epoch] = $loss; + + if (is_nan($loss)) { + if ($this->logger) { + $this->logger->warning('Numerical instability detected'); + } + + break; + } + + if ($epoch % $this->evalInterval === 0 && !$testing->empty()) { + $predictions = $this->predict($testing); + + $score = $this->metric->score($predictions, $testing->labels()); + + $this->scores[$epoch] = $score; + } + + if ($this->logger) { + $message = "Epoch: $epoch, {$this->costFn}: $loss"; + + if (isset($score)) { + $message .= ", {$this->metric}: $score"; + } + + $this->logger->info($message); + } + + if (isset($score)) { + if ($score >= $maxScore) { + break; + } + + if ($score > $bestScore) { + $bestScore = $score; + $bestEpoch = $epoch; + + $snapshot = Snapshot::take($this->network); + + $numWorseEpochs = 0; + } else { + ++$numWorseEpochs; + } + + if ($numWorseEpochs >= $this->window) { + break; + } + + unset($score); + } + + if ($lossChange < $this->minChange) { + break; + } + + $prevLoss = $loss; + } + + if ($snapshot and (end($this->scores) < $bestScore or is_nan($loss))) { + $snapshot->restore(); + + if ($this->logger) { + $this->logger->info("Model state restored to epoch $bestEpoch"); + } + } + + if ($this->logger) { + $this->logger->info('Training complete'); + } + } + + /** + * Feed a sample through the network and make a prediction based on the + * activation of the output neuron. + * + * @param Dataset $dataset + * @throws RuntimeException + * @return list + */ + public function predict(Dataset $dataset) : array + { + if (!$this->network) { + throw new RuntimeException('Estimator has not been trained.'); + } + + DatasetHasDimensionality::with($dataset, $this->network->input()->width())->check(); + + $activations = $this->network->infer($dataset); + + $activations = array_column($activations->toArray(), 0); + + return $activations; + } + + /** + * Export the network architecture as a graph in dot format. + * + * @throws RuntimeException + * @return Encoding + */ + public function exportGraphviz() : Encoding + { + if (!$this->network) { + throw new RuntimeException('Must train network first.'); + } + + return $this->network->exportGraphviz(); + } + + /** + * Return an associative array containing the data used to serialize the object. + * + * @return mixed[] + */ + public function __serialize() : array + { + $properties = get_object_vars($this); + + unset($properties['losses'], $properties['scores'], $properties['logger']); + + return $properties; + } + + /** + * Return the string representation of the object. + * + * @internal + * + * @return string + */ + public function __toString() : string + { + return 'MLP Regressor (' . Params::stringify($this->params()) . ')'; + } +} diff --git a/tests/Datasets/Generators/SwissRoll/SwissRollTest.php b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php new file mode 100644 index 000000000..437604c21 --- /dev/null +++ b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php @@ -0,0 +1,47 @@ +generator = new SwissRoll(x: 0.0, y: 0.0, z: 0.0, scale: 1.0, depth: 12.0, noise: 0.3); + } + + #[Test] + #[TestDox('Dimensions returns 3')] + public function testDimensions() : void + { + self::assertEquals(3, $this->generator->dimensions()); + } + + #[Test] + #[TestDox('Generate returns a labeled dataset of the requested size')] + public function testGenerate() : void + { + $dataset = $this->generator->generate(self::DATASET_SIZE); + + self::assertInstanceOf(Labeled::class, $dataset); + self::assertInstanceOf(Dataset::class, $dataset); + + self::assertCount(self::DATASET_SIZE, $dataset); + } +} diff --git a/tests/NeuralNet/Layers/Swish/SwishTest.php b/tests/NeuralNet/Layers/Swish/SwishTest.php index 5f8d55503..f0b2bc2be 100644 --- a/tests/NeuralNet/Layers/Swish/SwishTest.php +++ b/tests/NeuralNet/Layers/Swish/SwishTest.php @@ -73,7 +73,7 @@ public static function initializeForwardBackInferProvider() : array 'backExpected' => [ [0.2319176, 0.7695808, 0.0450083], [0.2749583, 0.1099833, 0.0108810], - [0.1252499, -0.0012326, 0.2314345], + [0.1252493, -0.0012326, 0.2314345], ], 'inferExpected' => [ [0.7306671, 2.3094806, -0.0475070], diff --git a/tests/NeuralNet/NetworkTest.php b/tests/NeuralNet/NetworkTest.php index 1421c0a35..fed2bb57d 100644 --- a/tests/NeuralNet/NetworkTest.php +++ b/tests/NeuralNet/NetworkTest.php @@ -7,6 +7,7 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; use Rubix\ML\Datasets\Labeled; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\NeuralNet\Layers\Hidden; use Rubix\ML\NeuralNet\Layers\Input; use Rubix\ML\NeuralNet\Network; @@ -63,7 +64,7 @@ classes: ['yes', 'no', 'maybe'], costFn: new CrossEntropy() ); - $this->network = new Network( + $this->network = new FeedForward( input: $this->input, hidden: $this->hidden, output: $this->output, diff --git a/tests/NeuralNet/FeedForwards/FeedForwardTest.php b/tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php similarity index 65% rename from tests/NeuralNet/FeedForwards/FeedForwardTest.php rename to tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php index 84226fc70..71bca4c25 100644 --- a/tests/NeuralNet/FeedForwards/FeedForwardTest.php +++ b/tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php @@ -1,26 +1,27 @@ network = new FeedForward($this->input, $this->hidden, $this->output, new Adam(0.001)); } + #[Test] + #[TestDox('Layers iterator yields all layers')] + public function testLayers() : void + { + $count = 0; + + foreach ($this->network->layers() as $item) { + ++$count; + } + + self::assertSame(7, $count); + } + + #[Test] + #[TestDox('Input layer is Placeholder1D')] + public function testInput() : void + { + self::assertInstanceOf(Placeholder1D::class, $this->network->input()); + } + + #[Test] + #[TestDox('Hidden layers count')] + public function testHidden() : void + { + self::assertCount(5, $this->network->hidden()); + } + + #[Test] + #[TestDox('Num params')] + public function testNumParams() : void + { + $this->network->initialize(); + + self::assertEquals(103, $this->network->numParams()); + } + #[Test] #[TestDox('Builds a feed-forward network instance')] public function build() : void @@ -130,4 +167,30 @@ public function roundtrip() : void self::assertIsFloat($loss); } + + + #[Test] + #[TestDox('Normalize samples returns packed list-of-lists for NumPower')] + public function testNormalizeSamplesReturnsPackedListOfLists() : void + { + $samples = [ + 10 => [2 => 1.0, 5 => 2.0, 9 => 10], + 20 => [2 => 3.0, 7 => 4.0, 1 => 1.0], + ]; + + $method = new ReflectionMethod(FeedForward::class, 'normalizeSamples'); + $method->setAccessible(true); + + /** @var array $normalized */ + $normalized = $method->invoke($this->network, $samples); + + self::assertTrue(array_is_list($normalized)); + self::assertCount(2, $normalized); + + foreach ($normalized as $row) { + self::assertTrue(array_is_list($row)); + } + + self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized); + } } diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php deleted file mode 100644 index 0197c225d..000000000 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ /dev/null @@ -1,101 +0,0 @@ -dataset = Labeled::quick( - samples: [ - [1.0, 2.5], - [0.1, 0.0], - [0.002, -6.0], - ], - labels: ['yes', 'no', 'maybe'] - ); - - $this->input = new Placeholder1D(2); - - $this->hidden = [ - new Dense(neurons: 10), - new Activation(new ReLU()), - new Dense(neurons: 5), - new Activation(new ReLU()), - new Dense(neurons: 3), - ]; - - $this->output = new Multiclass( - classes: ['yes', 'no', 'maybe'], - costFn: new CrossEntropy() - ); - - $this->network = new Network( - input: $this->input, - hidden: $this->hidden, - output: $this->output, - optimizer: new Adam(0.001) - ); - } - - public function testLayers() : void - { - $count = 0; - - foreach ($this->network->layers() as $item) { - ++$count; - } - - self::assertSame(7, $count); - } - - public function testInput() : void - { - self::assertInstanceOf(Placeholder1D::class, $this->network->input()); - } - - public function testHidden() : void - { - self::assertCount(5, $this->network->hidden()); - } - - public function testNumParams() : void - { - $this->network->initialize(); - - self::assertEquals(103, $this->network->numParams()); - } -} diff --git a/tests/NeuralNet/NumPower/NumPowerTest.php b/tests/NeuralNet/NumPower/NumPowerTest.php new file mode 100644 index 000000000..20a2ee602 --- /dev/null +++ b/tests/NeuralNet/NumPower/NumPowerTest.php @@ -0,0 +1,50 @@ +shape()); + + $a = $t->toArray(); + + self::assertEqualsWithDelta(0.0, (float) $a[0][0], 1e-12); + self::assertEqualsWithDelta(1000.0, (float) $a[0][1], 1e-12); + self::assertEqualsWithDelta(2000.0, (float) $a[0][2], 1e-12); + + self::assertEqualsWithDelta(255.0, (float) $a[255][0], 1e-12); + self::assertEqualsWithDelta(1255.0, (float) $a[255][1], 1e-12); + self::assertEqualsWithDelta(2255.0, (float) $a[255][2], 1e-12); + + self::assertEqualsWithDelta(42.0, (float) $a[42][0], 1e-12); + self::assertEqualsWithDelta(1042.0, (float) $a[42][1], 1e-12); + self::assertEqualsWithDelta(2042.0, (float) $a[42][2], 1e-12); + } +} diff --git a/tests/NeuralNet/SnapshotTest.php b/tests/NeuralNet/SnapshotTest.php index bdf41829e..5cad02e30 100644 --- a/tests/NeuralNet/SnapshotTest.php +++ b/tests/NeuralNet/SnapshotTest.php @@ -6,6 +6,7 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\NeuralNet\Snapshot; use Rubix\ML\NeuralNet\Network; use Rubix\ML\NeuralNet\Layers\Dense; @@ -27,7 +28,7 @@ class SnapshotTest extends TestCase public function testTake() : void { - $network = new Network( + $network = new FeedForward( input: new Placeholder1D(1), hidden: [ new Dense(10), diff --git a/tests/NeuralNet/Snapshots/SnapshotTest.php b/tests/NeuralNet/Snapshots/SnapshotTest.php index ecde317e3..1aa4e2c7e 100644 --- a/tests/NeuralNet/Snapshots/SnapshotTest.php +++ b/tests/NeuralNet/Snapshots/SnapshotTest.php @@ -7,11 +7,11 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; use Rubix\ML\Exceptions\InvalidArgumentException; +use Rubix\ML\NeuralNet\Layers\Activation\Activation; +use Rubix\ML\NeuralNet\Layers\Binary\Binary; +use Rubix\ML\NeuralNet\Networks\FeedForward\FeedForward; use Rubix\ML\NeuralNet\Snapshots\Snapshot; -use Rubix\ML\NeuralNet\Networks\Network; use Rubix\ML\NeuralNet\Layers\Dense\Dense; -use Rubix\ML\NeuralNet\Layers\Binary\Binary; -use Rubix\ML\NeuralNet\Layers\Activation\Activation; use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D; use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic; use Rubix\ML\NeuralNet\ActivationFunctions\ELU\ELU; @@ -24,7 +24,7 @@ class SnapshotTest extends TestCase { protected Snapshot $snapshot; - protected Network $network; + protected FeedForward $network; public function testConstructorThrowsWithWrongParameters() : void { @@ -39,7 +39,7 @@ public function testConstructorThrowsWithWrongParameters() : void public function testTake() : void { - $network = new Network( + $network = new FeedForward( input: new Placeholder1D(1), hidden: [ new Dense(10), diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php new file mode 100644 index 000000000..26299b3b1 --- /dev/null +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -0,0 +1,318 @@ +generator = new SwissRoll(x: 4.0, y: -7.0, z: 0.0, scale: 1.0, depth: 21.0, noise: 0.5); + + $this->estimator = new MLPRegressor( + hiddenLayers: [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + batchSize: 32, + optimizer: new Adam(0.01), + epochs: 100, + minChange: 1e-4, + evalInterval: 3, + window: 5, + holdOut: 0.1, + costFn: new LeastSquares(), + metric: new RMSE() + ); + + $this->metric = new RSquared(); + + $this->estimator->setLogger(new BlackHole()); + + srand(self::RANDOM_SEED); + } + + #[Test] + #[TestDox('Assert pre conditions')] + public function testAssertPreConditions() : void + { + self::assertFalse($this->estimator->trained()); + } + + #[Test] + #[TestDox('Bad batch size')] + public function testBadBatchSize() : void + { + $this->expectException(InvalidArgumentException::class); + + new MLPRegressor(hiddenLayers: [], batchSize: -100); + } + + #[Test] + #[TestDox('Type')] + public function testType() : void + { + self::assertEquals(EstimatorType::regressor(), $this->estimator->type()); + } + + #[Test] + #[TestDox('Compatibility')] + public function testCompatibility() : void + { + $expected = [ + DataType::continuous(), + ]; + + self::assertEquals($expected, $this->estimator->compatibility()); + } + + #[Test] + #[TestDox('Params')] + public function testParams() : void + { + $expected = [ + 'hidden layers' => [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + 'batch size' => 32, + 'optimizer' => new Adam(0.01), + 'epochs' => 100, + 'min change' => 1e-4, + 'eval interval' => 3, + 'window' => 5, + 'hold out' => 0.1, + 'cost fn' => new LeastSquares(), + 'metric' => new RMSE(), + ]; + + self::assertEquals($expected, $this->estimator->params()); + } + + #[Test] + #[TestDox('Train partial predict')] + public function testTrainPartialPredict() : void + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + self::assertTrue($this->estimator->trained()); + + $dot = $this->estimator->exportGraphviz(); + + // Graphviz::dotToImage($dot)->saveTo(new Filesystem('test.png')); + + self::assertStringStartsWith('digraph Tree {', (string) $dot); + + $losses = $this->estimator->losses(); + + self::assertIsArray($losses); + self::assertContainsOnlyFloat($losses); + + $scores = $this->estimator->scores(); + + self::assertIsArray($scores); + self::assertContainsOnlyFloat($scores); + + $predictions = $this->estimator->predict($testing); + + /** @var list $labels */ + $labels = $testing->labels(); + $score = $this->metric->score( + predictions: $predictions, + labels: $labels + ); + + self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); + } + + #[Test] + #[TestDox('Predict count matches number of samples')] + public function testPredictCountMatchesNumberOfSamples() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + } + + #[Test] + #[TestDox('Predict returns numeric finite values')] + public function testPredictReturnsNumericFiniteValues() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + + foreach ($predictions as $prediction) { + self::assertIsNumeric($prediction); + self::assertFalse(is_nan((float) $prediction)); + self::assertTrue(is_finite((float) $prediction)); + } + } + + #[Test] + #[TestDox('Predict is repeatable for same model and dataset')] + public function testPredictIsRepeatableForSameModelAndDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions1 = $this->estimator->predict($testing); + $predictions2 = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions1); + self::assertCount($testing->numSamples(), $predictions2); + + foreach ($predictions1 as $i => $prediction) { + self::assertEqualsWithDelta((float) $prediction, (float) $predictions2[$i], 1e-12); + } + } + + #[Test] + #[TestDox('Predict does not mutate dataset samples or labels')] + public function testPredictDoesNotMutateDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $samplesBefore = $testing->samples(); + $labelsBefore = $testing->labels(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + self::assertEquals($samplesBefore, $testing->samples()); + self::assertEquals($labelsBefore, $testing->labels()); + } + + #[Test] + #[TestDox('Serialization preserves predict output')] + public function testSerializationPreservesPredictOutput() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictionsBefore = $this->estimator->predict($testing); + + $copy = unserialize(serialize($this->estimator)); + + self::assertInstanceOf(MLPRegressor::class, $copy); + self::assertTrue($copy->trained()); + + $predictionsAfter = $copy->predict($testing); + + self::assertCount($testing->numSamples(), $predictionsAfter); + + foreach ($predictionsAfter as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictionsBefore[$i], (float) $prediction, 1e-8); + } + } + + /** + * @return array{0: Unlabeled} + */ + private function trainEstimatorAndGetTestingSet() : array + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + return [$testing]; + } + + #[Test] + #[TestDox('Train incompatible')] + public function testTrainIncompatible() : void + { + $this->expectException(InvalidArgumentException::class); + + $this->estimator->train(Labeled::quick(samples: [['bad']], labels: [2])); + } + + #[Test] + #[TestDox('Predict untrained')] + public function testPredictUntrained() : void + { + $this->expectException(RuntimeException::class); + + $this->estimator->predict(Unlabeled::quick()); + } +}