From c80d2784968b8f7a015af1dd8cf24bc0e8889260 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:16:30 +0200 Subject: [PATCH 1/8] ML-396 Converted MLPRegressor to work with NumPower/NDArray related classes --- .../Generators/SwissRoll/SwissRoll.php | 188 ++++++ src/NeuralNet/Networks/Network.php | 76 ++- src/NeuralNet/Parameters/Parameter.php | 7 +- src/Regressors/MLPRegressor/MLPRegressor.php | 561 ++++++++++++++++++ .../Generators/SwissRoll/SwissRollTest.php | 47 ++ tests/NeuralNet/Layers/Swish/SwishTest.php | 2 +- tests/NeuralNet/Networks/NetworkTest.php | 51 ++ .../MLPRegressors/MLPRegressorTest.php | 216 +++++++ 8 files changed, 1144 insertions(+), 4 deletions(-) create mode 100644 src/Datasets/Generators/SwissRoll/SwissRoll.php create mode 100644 src/Regressors/MLPRegressor/MLPRegressor.php create mode 100644 tests/Datasets/Generators/SwissRoll/SwissRollTest.php create mode 100644 tests/Regressors/MLPRegressors/MLPRegressorTest.php diff --git a/src/Datasets/Generators/SwissRoll/SwissRoll.php b/src/Datasets/Generators/SwissRoll/SwissRoll.php new file mode 100644 index 000000000..c965ef865 --- /dev/null +++ b/src/Datasets/Generators/SwissRoll/SwissRoll.php @@ -0,0 +1,188 @@ + + */ +class SwissRoll implements Generator +{ + /** + * The center vector of the swiss roll. + * + * @var list + */ + protected array $center; + + /** + * The scaling factor of the swiss roll. + * + * @var float + */ + protected float $scale; + + /** + * The depth of the swiss roll i.e the scale of the y dimension. + * + * @var float + */ + protected float $depth; + + /** + * The standard deviation of the gaussian noise. + * + * @var float + */ + protected float $noise; + + /** + * @param float $x + * @param float $y + * @param float $z + * @param float $scale + * @param float $depth + * @param float $noise + * @throws InvalidArgumentException + */ + public function __construct( + float $x = 0.0, + float $y = 0.0, + float $z = 0.0, + float $scale = 1.0, + float $depth = 21.0, + float $noise = 0.1 + ) { + if ($scale < 0.0) { + throw new InvalidArgumentException('Scale must be' + . " greater than 0, $scale given."); + } + + if ($depth < 0) { + throw new InvalidArgumentException('Depth must be' + . " greater than 0, $depth given."); + } + + if ($noise < 0.0) { + throw new InvalidArgumentException('Noise factor cannot be less' + . " than 0, $noise given."); + } + + $this->center = [$x, $y, $z]; + $this->scale = $scale; + $this->depth = $depth; + $this->noise = $noise; + } + + /** + * Return the dimensionality of the data this generates. + * + * @internal + * + * @return int<0,max> + */ + public function dimensions() : int + { + return 3; + } + + /** + * Generate n data points. + * + * @param int<0,max> $n + * @return Labeled + */ + public function generate(int $n) : Labeled + { + $range = M_PI + HALF_PI; + + $t = []; + $y = []; + $coords = []; + + for ($i = 0; $i < $n; ++$i) { + $u = mt_rand() / mt_getrandmax(); + $ti = (($u * 2.0) + 1.0) * $range; + $t[] = $ti; + + $uy = mt_rand() / mt_getrandmax(); + $y[] = $uy * $this->depth; + + $coords[] = [ + $ti * cos($ti), + $y[$i], + $ti * sin($ti), + ]; + } + + $noise = []; + + if ($this->noise > 0.0) { + for ($i = 0; $i < $n; ++$i) { + $row = []; + + for ($j = 0; $j < 3; ++$j) { + $u1 = mt_rand() / mt_getrandmax(); + $u2 = mt_rand() / mt_getrandmax(); + $u1 = $u1 > 0.0 ? $u1 : 1e-12; + + $z0 = sqrt(-2.0 * log($u1)) * cos(2.0 * M_PI * $u2); + + $row[] = $z0 * $this->noise; + } + + $noise[] = $row; + } + } else { + for ($i = 0; $i < $n; ++$i) { + $noise[] = [0.0, 0.0, 0.0]; + } + } + + $center = []; + + for ($i = 0; $i < $n; ++$i) { + $center[] = $this->center; + } + + $coords = NumPower::array($coords); + $noise = NumPower::array($noise); + $center = NumPower::array($center); + + $samples = NumPower::add( + NumPower::add( + NumPower::multiply($coords, $this->scale), + $center + ), + $noise + ); + + return Labeled::quick($samples->toArray(), $t); + } +} diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index 6554940b3..df51a1a78 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -17,6 +17,7 @@ use Traversable; use function array_reverse; +use function array_is_list; /** * Network @@ -185,12 +186,22 @@ public function initialize() : void */ public function infer(Dataset $dataset) : NDArray { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + if ($dataset->empty()) { + return NumPower::array([]); + } + + $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); foreach ($this->layers() as $layer) { $input = $layer->infer($input); } + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + return NumPower::transpose($input, [1, 0]); } @@ -203,7 +214,11 @@ public function infer(Dataset $dataset) : NDArray */ public function roundtrip(Labeled $dataset) : float { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + if ($dataset->empty()) { + return 0.0; + } + + $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); $this->feed($input); @@ -272,4 +287,61 @@ public function exportGraphviz() : Encoding return new Encoding($dot); } + + /** + * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray. + * + * This method exists because dataset samples originate as PHP arrays and are + * not guaranteed to be in a form that NumPower can always infer as a dense + * 2D numeric matrix. For example: + * + * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2). + * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1). + * - In some edge cases (such as a single row/column), NumPower may infer a + * rank-1 array. + * + * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0]) + * will throw "axes don't match array". To make transpose stable we: + * + * - Reindex the outer and inner arrays with array_values() to force packed + * row/column ordering. + * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n]. + * + * The returned NDArray is row-major with shape [nSamples, nFeatures]. + * + * @param list $samples + * @return NDArray + */ + protected function samplesToInput(array $samples) : NDArray + { + $packed = array_is_list($samples); + + if ($packed) { + foreach ($samples as $sample) { + if (!array_is_list($sample)) { + $packed = false; + + break; + } + } + } + + if (!$packed) { + $samples = array_values($samples); + + foreach ($samples as $i => $sample) { + $samples[$i] = array_values($sample); + } + } + + $input = NumPower::array($samples); + + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + + return $input; + } } diff --git a/src/NeuralNet/Parameters/Parameter.php b/src/NeuralNet/Parameters/Parameter.php index 0cef2e87a..6741a0e49 100644 --- a/src/NeuralNet/Parameters/Parameter.php +++ b/src/NeuralNet/Parameters/Parameter.php @@ -90,9 +90,14 @@ public function update(NDArray $gradient, Optimizer $optimizer) : void /** * Perform a deep copy of the object upon cloning. + * + * Cloning an NDArray directly may trigger native memory corruption in some + * NumPower builds (e.g. heap corruption/segfaults when parameters are + * snapshotted during training). To make cloning deterministic and stable we + * deep-copy through a PHP array roundtrip: NDArray -> PHP array -> NDArray. */ public function __clone() : void { - $this->param = clone $this->param; + $this->param = NumPower::array($this->param->toArray()); } } diff --git a/src/Regressors/MLPRegressor/MLPRegressor.php b/src/Regressors/MLPRegressor/MLPRegressor.php new file mode 100644 index 000000000..b95fe7e49 --- /dev/null +++ b/src/Regressors/MLPRegressor/MLPRegressor.php @@ -0,0 +1,561 @@ + + */ +class MLPRegressor implements Estimator, Learner, Online, Verbose, Persistable +{ + use AutotrackRevisions, LoggerAware; + + /** + * An array composing the user-specified hidden layers of the network in order. + * + * @var Hidden[] + */ + protected array $hiddenLayers = [ + // + ]; + + /** + * The number of training samples to process at a time. + * + * @var positive-int + */ + protected int $batchSize; + + /** + * The gradient descent optimizer used to update the network parameters. + * + * @var Optimizer + */ + protected Optimizer $optimizer; + + /** + * The maximum number of training epochs. i.e. the number of times to iterate before terminating. + * + * @var int<0,max> + */ + protected int $epochs; + + /** + * The minimum change in the training loss necessary to continue training. + * + * @var float + */ + protected float $minChange; + + /** + * The number of epochs to train before evaluating the model with the holdout set. + * + * @var int + */ + protected $evalInterval; + + /** + * The number of epochs without improvement in the validation score to wait before considering an early stop. + * + * @var positive-int + */ + protected int $window; + + /** + * The proportion of training samples to use for validation and progress monitoring. + * + * @var float + */ + protected float $holdOut; + + /** + * The function that computes the loss associated with an erroneous activation during training. + * + * @var RegressionLoss + */ + protected RegressionLoss $costFn; + + /** + * The metric used to score the generalization performance of the model during training. + * + * @var Metric + */ + protected Metric $metric; + + /** + * The underlying neural network instance. + * + * @var Network|null + */ + protected ?Network $network = null; + + /** + * The validation scores at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $scores = null; + + /** + * The loss at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $losses = null; + + /** + * @param Hidden[] $hiddenLayers + * @param int $batchSize + * @param Optimizer|null $optimizer + * @param int $epochs + * @param float $minChange + * @param int $evalInterval + * @param int $window + * @param float $holdOut + * @param RegressionLoss|null $costFn + * @param Metric|null $metric + * @throws InvalidArgumentException + */ + public function __construct( + array $hiddenLayers = [], + int $batchSize = 128, + ?Optimizer $optimizer = null, + int $epochs = 1000, + float $minChange = 1e-4, + int $evalInterval = 3, + int $window = 5, + float $holdOut = 0.1, + ?RegressionLoss $costFn = null, + ?Metric $metric = null + ) { + foreach ($hiddenLayers as $layer) { + if (!$layer instanceof Hidden) { + throw new InvalidArgumentException('Hidden layer' + . ' must implement the Hidden interface.'); + } + } + + if ($batchSize < 1) { + throw new InvalidArgumentException('Batch size must be' + . " greater than 0, $batchSize given."); + } + + if ($epochs < 0) { + throw new InvalidArgumentException('Number of epochs' + . " must be greater than 0, $epochs given."); + } + + if ($minChange < 0.0) { + throw new InvalidArgumentException('Minimum change must be' + . " greater than 0, $minChange given."); + } + + if ($evalInterval < 1) { + throw new InvalidArgumentException('Eval interval must be' + . " greater than 0, $evalInterval given."); + } + + if ($window < 1) { + throw new InvalidArgumentException('Window must be' + . " greater than 0, $window given."); + } + + if ($holdOut < 0.0 or $holdOut > 0.5) { + throw new InvalidArgumentException('Hold out ratio must be' + . " between 0 and 0.5, $holdOut given."); + } + + if ($metric) { + EstimatorIsCompatibleWithMetric::with($this, $metric)->check(); + } + + $this->hiddenLayers = $hiddenLayers; + $this->batchSize = $batchSize; + $this->optimizer = $optimizer ?? new Adam(); + $this->epochs = $epochs; + $this->minChange = $minChange; + $this->evalInterval = $evalInterval; + $this->window = $window; + $this->holdOut = $holdOut; + $this->costFn = $costFn ?? new LeastSquares(); + $this->metric = $metric ?? new RMSE(); + } + + /** + * Return the estimator type. + * + * @internal + * + * @return EstimatorType + */ + public function type() : EstimatorType + { + return EstimatorType::regressor(); + } + + /** + * Return the data types that the estimator is compatible with. + * + * @internal + * + * @return list + */ + public function compatibility() : array + { + return [ + DataType::continuous(), + ]; + } + + /** + * Return the settings of the hyper-parameters in an associative array. + * + * @internal + * + * @return mixed[] + */ + public function params() : array + { + return [ + 'hidden layers' => $this->hiddenLayers, + 'batch size' => $this->batchSize, + 'optimizer' => $this->optimizer, + 'epochs' => $this->epochs, + 'min change' => $this->minChange, + 'eval interval' => $this->evalInterval, + 'window' => $this->window, + 'hold out' => $this->holdOut, + 'cost fn' => $this->costFn, + 'metric' => $this->metric, + ]; + } + + /** + * Has the learner been trained? + * + * @return bool + */ + public function trained() : bool + { + return isset($this->network); + } + + /** + * Return an iterable progress table with the steps from the last training session. + * + * @return Generator + */ + public function steps() : Generator + { + if (!$this->losses) { + return; + } + + foreach ($this->losses as $epoch => $loss) { + yield [ + 'epoch' => $epoch, + 'score' => $this->scores[$epoch] ?? null, + 'loss' => $loss, + ]; + } + } + + /** + * Return the validation score at each epoch. + * + * @return float[]|null + */ + public function scores() : ?array + { + return $this->scores; + } + + /** + * Return the training loss at each epoch. + * + * @return float[]|null + */ + public function losses() : ?array + { + return $this->losses; + } + + /** + * Return the underlying neural network instance or null if not trained. + * + * @return Network|null + */ + public function network() : ?Network + { + return $this->network; + } + + /** + * Train the estimator with a dataset. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + */ + public function train(Dataset $dataset) : void + { + DatasetIsNotEmpty::with($dataset)->check(); + + $hiddenLayers = $this->hiddenLayers; + + $hiddenLayers[] = new Dense(1, 0.0, true, new XavierUniform()); + + $this->network = new Network( + new Placeholder1D($dataset->numFeatures()), + $hiddenLayers, + new Continuous($this->costFn), + $this->optimizer + ); + + $this->network->initialize(); + + $this->partial($dataset); + } + + /** + * Train the network using mini-batch gradient descent with backpropagation. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + * @throws RuntimeException + */ + public function partial(Dataset $dataset) : void + { + if (!$this->network) { + $this->train($dataset); + + return; + } + + SpecificationChain::with([ + new DatasetIsLabeled($dataset), + new DatasetIsNotEmpty($dataset), + new SamplesAreCompatibleWithEstimator($dataset, $this), + new LabelsAreCompatibleWithLearner($dataset, $this), + new DatasetHasDimensionality($dataset, $this->network->input()->width()), + ])->check(); + + if ($this->logger) { + $this->logger->info("Training $this"); + + $numParams = number_format($this->network->numParams()); + + $this->logger->info("{$numParams} trainable parameters"); + } + + [$testing, $training] = $dataset->randomize()->split($this->holdOut); + + [$minScore, $maxScore] = $this->metric->range()->list(); + + $bestScore = $minScore; + $bestEpoch = $numWorseEpochs = 0; + $loss = 0.0; + $score = $snapshot = null; + $prevLoss = INF; + + $this->scores = $this->losses = []; + + for ($epoch = 1; $epoch <= $this->epochs; ++$epoch) { + $batches = $training->randomize()->batch($this->batchSize); + + $loss = 0.0; + + foreach ($batches as $batch) { + $loss += $this->network->roundtrip($batch); + } + + $loss /= count($batches); + + $lossChange = abs($prevLoss - $loss); + + $this->losses[$epoch] = $loss; + + if (is_nan($loss)) { + if ($this->logger) { + $this->logger->warning('Numerical instability detected'); + } + + break; + } + + if ($epoch % $this->evalInterval === 0 && !$testing->empty()) { + $predictions = $this->predict($testing); + + $score = $this->metric->score($predictions, $testing->labels()); + + $this->scores[$epoch] = $score; + } + + if ($this->logger) { + $message = "Epoch: $epoch, {$this->costFn}: $loss"; + + if (isset($score)) { + $message .= ", {$this->metric}: $score"; + } + + $this->logger->info($message); + } + + if (isset($score)) { + if ($score >= $maxScore) { + break; + } + + if ($score > $bestScore) { + $bestScore = $score; + $bestEpoch = $epoch; + + $snapshot = Snapshot::take($this->network); + + $numWorseEpochs = 0; + } else { + ++$numWorseEpochs; + } + + if ($numWorseEpochs >= $this->window) { + break; + } + + unset($score); + } + + if ($lossChange < $this->minChange) { + break; + } + + $prevLoss = $loss; + } + + if ($snapshot and (end($this->scores) < $bestScore or is_nan($loss))) { + $snapshot->restore(); + + if ($this->logger) { + $this->logger->info("Model state restored to epoch $bestEpoch"); + } + } + + if ($this->logger) { + $this->logger->info('Training complete'); + } + } + + /** + * Feed a sample through the network and make a prediction based on the + * activation of the output neuron. + * + * @param Dataset $dataset + * @throws RuntimeException + * @return list + */ + public function predict(Dataset $dataset) : array + { + if (!$this->network) { + throw new RuntimeException('Estimator has not been trained.'); + } + + DatasetHasDimensionality::with($dataset, $this->network->input()->width())->check(); + + $activations = $this->network->infer($dataset); + + $activations = array_column($activations->toArray(), 0); + + return $activations; + } + + /** + * Export the network architecture as a graph in dot format. + * + * @throws RuntimeException + * @return Encoding + */ + public function exportGraphviz() : Encoding + { + if (!$this->network) { + throw new RuntimeException('Must train network first.'); + } + + return $this->network->exportGraphviz(); + } + + /** + * Return an associative array containing the data used to serialize the object. + * + * @return mixed[] + */ + public function __serialize() : array + { + $properties = get_object_vars($this); + + unset($properties['losses'], $properties['scores'], $properties['logger']); + + return $properties; + } + + /** + * Return the string representation of the object. + * + * @internal + * + * @return string + */ + public function __toString() : string + { + return 'MLP Regressor (' . Params::stringify($this->params()) . ')'; + } +} diff --git a/tests/Datasets/Generators/SwissRoll/SwissRollTest.php b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php new file mode 100644 index 000000000..437604c21 --- /dev/null +++ b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php @@ -0,0 +1,47 @@ +generator = new SwissRoll(x: 0.0, y: 0.0, z: 0.0, scale: 1.0, depth: 12.0, noise: 0.3); + } + + #[Test] + #[TestDox('Dimensions returns 3')] + public function testDimensions() : void + { + self::assertEquals(3, $this->generator->dimensions()); + } + + #[Test] + #[TestDox('Generate returns a labeled dataset of the requested size')] + public function testGenerate() : void + { + $dataset = $this->generator->generate(self::DATASET_SIZE); + + self::assertInstanceOf(Labeled::class, $dataset); + self::assertInstanceOf(Dataset::class, $dataset); + + self::assertCount(self::DATASET_SIZE, $dataset); + } +} diff --git a/tests/NeuralNet/Layers/Swish/SwishTest.php b/tests/NeuralNet/Layers/Swish/SwishTest.php index 5f8d55503..f0b2bc2be 100644 --- a/tests/NeuralNet/Layers/Swish/SwishTest.php +++ b/tests/NeuralNet/Layers/Swish/SwishTest.php @@ -73,7 +73,7 @@ public static function initializeForwardBackInferProvider() : array 'backExpected' => [ [0.2319176, 0.7695808, 0.0450083], [0.2749583, 0.1099833, 0.0108810], - [0.1252499, -0.0012326, 0.2314345], + [0.1252493, -0.0012326, 0.2314345], ], 'inferExpected' => [ [0.7306671, 2.3094806, -0.0475070], diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 0197c225d..0406193cb 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -6,6 +6,8 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\Attributes\TestDox; use Rubix\ML\Datasets\Labeled; use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden; use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input; @@ -19,6 +21,7 @@ use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU; use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy; use PHPUnit\Framework\TestCase; +use ReflectionMethod; #[Group('NeuralNet')] #[CoversClass(Network::class)] @@ -71,6 +74,8 @@ classes: ['yes', 'no', 'maybe'], ); } + #[Test] + #[TestDox('Layers iterator yields all layers')] public function testLayers() : void { $count = 0; @@ -82,20 +87,66 @@ public function testLayers() : void self::assertSame(7, $count); } + #[Test] + #[TestDox('Input layer is Placeholder1D')] public function testInput() : void { self::assertInstanceOf(Placeholder1D::class, $this->network->input()); } + #[Test] + #[TestDox('Hidden layers count')] public function testHidden() : void { self::assertCount(5, $this->network->hidden()); } + #[Test] + #[TestDox('Num params')] public function testNumParams() : void { $this->network->initialize(); self::assertEquals(103, $this->network->numParams()); } + + #[Test] + #[TestDox('samplesToInput normalizes samples into 2D NDArray')] + public function testSamplesToInput() : void + { + $method = new ReflectionMethod(Network::class, 'samplesToInput'); + $method->setAccessible(true); + + $input = $method->invoke($this->network, $this->dataset->samples()); + + self::assertEquals([3, 2], $input->shape()); + + $samples = [ + 3 => [ + 1 => 1.0, + 2 => 2.5, + ], + 7 => [ + 1 => 0.1, + 2 => 0.0, + ], + 8 => [ + 1 => 0.002, + 2 => -6.0, + ], + ]; + + $input = $method->invoke($this->network, $samples); + + self::assertEquals([3, 2], $input->shape()); + + $samples = [ + [1.0], + [2.5], + ]; + + $input = $method->invoke($this->network, $samples); + + self::assertEquals([2, 1], $input->shape()); + } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php new file mode 100644 index 000000000..5366c806e --- /dev/null +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -0,0 +1,216 @@ +generator = new SwissRoll(x: 4.0, y: -7.0, z: 0.0, scale: 1.0, depth: 21.0, noise: 0.5); + + $this->estimator = new MLPRegressor( + hiddenLayers: [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + batchSize: 32, + optimizer: new Adam(0.01), + epochs: 100, + minChange: 1e-4, + evalInterval: 3, + window: 5, + holdOut: 0.1, + costFn: new LeastSquares(), + metric: new RMSE() + ); + + $this->metric = new RSquared(); + + $this->estimator->setLogger(new BlackHole()); + + srand(self::RANDOM_SEED); + } + + #[Test] + #[TestDox('Assert pre conditions')] + public function testAssertPreConditions() : void + { + self::assertFalse($this->estimator->trained()); + } + + #[Test] + #[TestDox('Bad batch size')] + public function testBadBatchSize() : void + { + $this->expectException(InvalidArgumentException::class); + + new MLPRegressor(hiddenLayers: [], batchSize: -100); + } + + #[Test] + #[TestDox('Type')] + public function testType() : void + { + self::assertEquals(EstimatorType::regressor(), $this->estimator->type()); + } + + #[Test] + #[TestDox('Compatibility')] + public function testCompatibility() : void + { + $expected = [ + DataType::continuous(), + ]; + + self::assertEquals($expected, $this->estimator->compatibility()); + } + + #[Test] + #[TestDox('Params')] + public function testParams() : void + { + $expected = [ + 'hidden layers' => [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + 'batch size' => 32, + 'optimizer' => new Adam(0.01), + 'epochs' => 100, + 'min change' => 1e-4, + 'eval interval' => 3, + 'window' => 5, + 'hold out' => 0.1, + 'cost fn' => new LeastSquares(), + 'metric' => new RMSE(), + ]; + + self::assertEquals($expected, $this->estimator->params()); + } + + #[Test] + #[TestDox('Train partial predict')] + public function testTrainPartialPredict() : void + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + self::assertTrue($this->estimator->trained()); + + $dot = $this->estimator->exportGraphviz(); + + // Graphviz::dotToImage($dot)->saveTo(new Filesystem('test.png')); + + self::assertStringStartsWith('digraph Tree {', (string) $dot); + + $losses = $this->estimator->losses(); + + self::assertIsArray($losses); + self::assertContainsOnlyFloat($losses); + + $scores = $this->estimator->scores(); + + self::assertIsArray($scores); + self::assertContainsOnlyFloat($scores); + + $predictions = $this->estimator->predict($testing); + + /** @var list $labels */ + $labels = $testing->labels(); + $score = $this->metric->score( + predictions: $predictions, + labels: $labels + ); + + self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); + } + + #[Test] + #[TestDox('Train incompatible')] + public function testTrainIncompatible() : void + { + $this->expectException(InvalidArgumentException::class); + + $this->estimator->train(Labeled::quick(samples: [['bad']], labels: [2])); + } + + #[Test] + #[TestDox('Predict untrained')] + public function testPredictUntrained() : void + { + $this->expectException(RuntimeException::class); + + $this->estimator->predict(Unlabeled::quick()); + } +} From 13acae649e0d8449ffb7d548ea53563fb85ea0d5 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:24:14 +0200 Subject: [PATCH 2/8] ML-396 removed unneeded export function --- tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 5366c806e..839711455 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,7 +26,6 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; -use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] From 3b65a47049dc2ca121800fcb47a4ef77bd38b00c Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:56:10 +0200 Subject: [PATCH 3/8] ML-396 added test for NumPower --- tests/NeuralNet/NumPower/NumPowerTest.php | 50 +++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/NeuralNet/NumPower/NumPowerTest.php diff --git a/tests/NeuralNet/NumPower/NumPowerTest.php b/tests/NeuralNet/NumPower/NumPowerTest.php new file mode 100644 index 000000000..20a2ee602 --- /dev/null +++ b/tests/NeuralNet/NumPower/NumPowerTest.php @@ -0,0 +1,50 @@ +shape()); + + $a = $t->toArray(); + + self::assertEqualsWithDelta(0.0, (float) $a[0][0], 1e-12); + self::assertEqualsWithDelta(1000.0, (float) $a[0][1], 1e-12); + self::assertEqualsWithDelta(2000.0, (float) $a[0][2], 1e-12); + + self::assertEqualsWithDelta(255.0, (float) $a[255][0], 1e-12); + self::assertEqualsWithDelta(1255.0, (float) $a[255][1], 1e-12); + self::assertEqualsWithDelta(2255.0, (float) $a[255][2], 1e-12); + + self::assertEqualsWithDelta(42.0, (float) $a[42][0], 1e-12); + self::assertEqualsWithDelta(1042.0, (float) $a[42][1], 1e-12); + self::assertEqualsWithDelta(2042.0, (float) $a[42][2], 1e-12); + } +} From d7404f81ef8629b4095f0dfc7f10c3aea60e6756 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 18:44:28 +0200 Subject: [PATCH 4/8] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network --- src/NeuralNet/Networks/Network.php | 93 ++++----- tests/NeuralNet/Networks/NetworkTest.php | 40 ---- .../MLPRegressors/MLPRegressorTest.php | 182 ++++++++++++++++++ 3 files changed, 231 insertions(+), 84 deletions(-) diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index df51a1a78..929813652 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -73,6 +73,8 @@ class Network */ protected Optimizer $optimizer; + protected const USE_NUMPOWER_TRANSPOSE = false; + /** * @param Input $input * @param Hidden[] $hidden @@ -190,7 +192,11 @@ public function infer(Dataset $dataset) : NDArray return NumPower::array([]); } - $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + } else { + $input = NumPower::array($this->rowsToColumns($dataset->samples())); + } foreach ($this->layers() as $layer) { $input = $layer->infer($input); @@ -202,7 +208,11 @@ public function infer(Dataset $dataset) : NDArray $input = NumPower::reshape($input, [1, $shape[0]]); } - return NumPower::transpose($input, [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + return NumPower::transpose($input, [1, 0]); + } else { + return NumPower::array($this->columnsToRows($input->toArray())); + } } /** @@ -218,7 +228,11 @@ public function roundtrip(Labeled $dataset) : float return 0.0; } - $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + } else { + $input = NumPower::array($this->rowsToColumns($dataset->samples())); + } $this->feed($input); @@ -289,59 +303,50 @@ public function exportGraphviz() : Encoding } /** - * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray. - * - * This method exists because dataset samples originate as PHP arrays and are - * not guaranteed to be in a form that NumPower can always infer as a dense - * 2D numeric matrix. For example: - * - * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2). - * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1). - * - In some edge cases (such as a single row/column), NumPower may infer a - * rank-1 array. - * - * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0]) - * will throw "axes don't match array". To make transpose stable we: - * - * - Reindex the outer and inner arrays with array_values() to force packed - * row/column ordering. - * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n]. - * - * The returned NDArray is row-major with shape [nSamples, nFeatures]. - * - * @param list $samples - * @return NDArray + * @param list> $rows + * @return list> */ - protected function samplesToInput(array $samples) : NDArray + private function rowsToColumns(array $rows) : array { - $packed = array_is_list($samples); + $numSamples = count($rows); + $numFeatures = isset($rows[0]) && is_array($rows[0]) ? count($rows[0]) : 0; - if ($packed) { - foreach ($samples as $sample) { - if (!array_is_list($sample)) { - $packed = false; + $columns = []; - break; - } + for ($j = 0; $j < $numFeatures; ++$j) { + $column = []; + + for ($i = 0; $i < $numSamples; ++$i) { + $column[] = $rows[$i][$j]; } + + $columns[] = $column; } - if (!$packed) { - $samples = array_values($samples); + return $columns; + } - foreach ($samples as $i => $sample) { - $samples[$i] = array_values($sample); - } - } + /** + * @param list> $columns + * @return list> + */ + private function columnsToRows(array $columns) : array + { + $numFeatures = count($columns); + $numSamples = isset($columns[0]) && is_array($columns[0]) ? count($columns[0]) : 0; - $input = NumPower::array($samples); + $rows = []; - $shape = $input->shape(); + for ($i = 0; $i < $numSamples; ++$i) { + $row = []; - if (count($shape) === 1) { - $input = NumPower::reshape($input, [1, $shape[0]]); + for ($j = 0; $j < $numFeatures; ++$j) { + $row[] = $columns[$j][$i]; + } + + $rows[] = $row; } - return $input; + return $rows; } } diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 0406193cb..199f1e9f4 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -109,44 +109,4 @@ public function testNumParams() : void self::assertEquals(103, $this->network->numParams()); } - - #[Test] - #[TestDox('samplesToInput normalizes samples into 2D NDArray')] - public function testSamplesToInput() : void - { - $method = new ReflectionMethod(Network::class, 'samplesToInput'); - $method->setAccessible(true); - - $input = $method->invoke($this->network, $this->dataset->samples()); - - self::assertEquals([3, 2], $input->shape()); - - $samples = [ - 3 => [ - 1 => 1.0, - 2 => 2.5, - ], - 7 => [ - 1 => 0.1, - 2 => 0.0, - ], - 8 => [ - 1 => 0.002, - 2 => -6.0, - ], - ]; - - $input = $method->invoke($this->network, $samples); - - self::assertEquals([3, 2], $input->shape()); - - $samples = [ - [1.0], - [2.5], - ]; - - $input = $method->invoke($this->network, $samples); - - self::assertEquals([2, 1], $input->shape()); - } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 839711455..ddd633628 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,6 +26,7 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; +use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] @@ -159,9 +160,15 @@ public function testTrainPartialPredict() : void $testing = $dataset->randomize()->take(self::TEST_SIZE); + $testingSamplesBefore = $testing->samples(); + $testingLabelsBefore = $testing->labels(); + $folds = $dataset->fold(3); $this->estimator->train($folds[0]); + + $predictionsBefore = $this->estimator->predict($testing); + $this->estimator->partial($folds[1]); $this->estimator->partial($folds[2]); @@ -177,14 +184,69 @@ public function testTrainPartialPredict() : void self::assertIsArray($losses); self::assertContainsOnlyFloat($losses); + self::assertNotEmpty($losses); + + foreach ($losses as $epoch => $loss) { + self::assertIsInt($epoch); + self::assertGreaterThanOrEqual(1, $epoch); + self::assertFalse(is_nan($loss)); + self::assertTrue(is_finite($loss)); + } $scores = $this->estimator->scores(); self::assertIsArray($scores); self::assertContainsOnlyFloat($scores); + self::assertNotEmpty($scores); + + foreach ($scores as $epoch => $value) { + self::assertIsInt($epoch); + self::assertGreaterThanOrEqual(1, $epoch); + self::assertFalse(is_nan($value)); + self::assertTrue(is_finite($value)); + self::assertSame(0, $epoch % 3); + } $predictions = $this->estimator->predict($testing); + self::assertCount($testing->numSamples(), $predictions); + + foreach ($predictions as $prediction) { + self::assertIsNumeric($prediction); + self::assertFalse(is_nan((float) $prediction)); + self::assertTrue(is_finite((float) $prediction)); + } + + $predictions2 = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions2); + + foreach ($predictions2 as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12); + } + + self::assertEquals($testingSamplesBefore, $testing->samples()); + self::assertEquals($testingLabelsBefore, $testing->labels()); + + $delta = 0.0; + + foreach ($predictions as $i => $prediction) { + $delta += abs((float) $prediction - (float) $predictionsBefore[$i]); + } + + self::assertGreaterThan(0.0, $delta); + + $min = (float) $predictions[0]; + $max = (float) $predictions[0]; + + foreach ($predictions as $prediction) { + $p = (float) $prediction; + $min = min($min, $p); + $max = max($max, $p); + } + + self::assertGreaterThan(0.0, $max - $min); + /** @var list $labels */ $labels = $testing->labels(); $score = $this->metric->score( @@ -192,9 +254,129 @@ public function testTrainPartialPredict() : void labels: $labels ); + self::assertFalse(is_nan($score)); + self::assertTrue(is_finite($score)); + self::assertGreaterThan(-10.0, $score); + + $copy = unserialize(serialize($this->estimator)); + + self::assertInstanceOf(MLPRegressor::class, $copy); + self::assertTrue($copy->trained()); + + $predictionsAfter = $copy->predict($testing); + + self::assertCount($testing->numSamples(), $predictionsAfter); + + foreach ($predictionsAfter as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8); + } + self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); } + #[Test] + #[TestDox('Predict count matches number of samples')] + public function testPredictCountMatchesNumberOfSamples() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + } + + #[Test] + #[TestDox('Predict returns numeric finite values')] + public function testPredictReturnsNumericFiniteValues() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + + foreach ($predictions as $prediction) { + self::assertIsNumeric($prediction); + self::assertFalse(is_nan((float) $prediction)); + self::assertTrue(is_finite((float) $prediction)); + } + } + + #[Test] + #[TestDox('Predict is repeatable for same model and dataset')] + public function testPredictIsRepeatableForSameModelAndDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions1 = $this->estimator->predict($testing); + $predictions2 = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions1); + self::assertCount($testing->numSamples(), $predictions2); + + foreach ($predictions1 as $i => $prediction) { + self::assertEqualsWithDelta((float) $prediction, (float) $predictions2[$i], 1e-12); + } + } + + #[Test] + #[TestDox('Predict does not mutate dataset samples or labels')] + public function testPredictDoesNotMutateDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $samplesBefore = $testing->samples(); + $labelsBefore = $testing->labels(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + self::assertEquals($samplesBefore, $testing->samples()); + self::assertEquals($labelsBefore, $testing->labels()); + } + + #[Test] + #[TestDox('Serialization preserves predict output')] + public function testSerializationPreservesPredictOutput() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictionsBefore = $this->estimator->predict($testing); + + $copy = unserialize(serialize($this->estimator)); + + self::assertInstanceOf(MLPRegressor::class, $copy); + self::assertTrue($copy->trained()); + + $predictionsAfter = $copy->predict($testing); + + self::assertCount($testing->numSamples(), $predictionsAfter); + + foreach ($predictionsAfter as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictionsBefore[$i], (float) $prediction, 1e-8); + } + } + + /** + * @return array{0: Unlabeled} + */ + private function trainEstimatorAndGetTestingSet() : array + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + return [$testing]; + } + #[Test] #[TestDox('Train incompatible')] public function testTrainIncompatible() : void From d538799498733daef3abe4945b687078550e4a79 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 19:01:18 +0200 Subject: [PATCH 5/8] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network --- tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index ddd633628..1198d02b5 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,7 +26,6 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; -use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] From f333c67ec7459c5c50a7b1771a891c94e0857f03 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 23:53:37 +0200 Subject: [PATCH 6/8] ML-396 fixed issue with samples normalization --- composer.json | 4 +- src/NeuralNet/Networks/Network.php | 34 ++++---- tests/NeuralNet/Networks/NetworkTest.php | 25 ++++++ .../MLPRegressors/MLPRegressorTest.php | 78 ------------------- 4 files changed, 45 insertions(+), 96 deletions(-) diff --git a/composer.json b/composer.json index a703df15b..f0e963cd5 100644 --- a/composer.json +++ b/composer.json @@ -38,6 +38,7 @@ "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", "rubix/tensor": "^3.0", + "rubixml/numpower": "dev-main", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", "symfony/polyfill-php82": "^1.27", @@ -52,7 +53,8 @@ "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", "phpunit/phpunit": "^12.0", - "swoole/ide-helper": "^5.1" + "swoole/ide-helper": "^5.1", + "apphp/pretty-print": "^0.5.1" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing", diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index 929813652..c504e43bf 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -73,8 +73,6 @@ class Network */ protected Optimizer $optimizer; - protected const USE_NUMPOWER_TRANSPOSE = false; - /** * @param Input $input * @param Hidden[] $hidden @@ -192,11 +190,8 @@ public function infer(Dataset $dataset) : NDArray return NumPower::array([]); } - if (self::USE_NUMPOWER_TRANSPOSE) { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - } else { - $input = NumPower::array($this->rowsToColumns($dataset->samples())); - } + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); foreach ($this->layers() as $layer) { $input = $layer->infer($input); @@ -208,11 +203,7 @@ public function infer(Dataset $dataset) : NDArray $input = NumPower::reshape($input, [1, $shape[0]]); } - if (self::USE_NUMPOWER_TRANSPOSE) { - return NumPower::transpose($input, [1, 0]); - } else { - return NumPower::array($this->columnsToRows($input->toArray())); - } + return NumPower::transpose($input, [1, 0]); } /** @@ -228,11 +219,8 @@ public function roundtrip(Labeled $dataset) : float return 0.0; } - if (self::USE_NUMPOWER_TRANSPOSE) { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - } else { - $input = NumPower::array($this->rowsToColumns($dataset->samples())); - } + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); $this->feed($input); @@ -326,6 +314,18 @@ private function rowsToColumns(array $rows) : array return $columns; } + /** + * Normalize samples to a strict list-of-lists with sequential numeric keys. + * NumPower's C extension expects packed arrays and can error or behave unpredictably + * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations). + * @param array $samples + * @return array + */ + private function normalizeSamples(array $samples) : array + { + return array_map('array_values', array_values($samples)); + } + /** * @param list> $columns * @return list> diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 199f1e9f4..586d1ffbc 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -109,4 +109,29 @@ public function testNumParams() : void self::assertEquals(103, $this->network->numParams()); } + + #[Test] + #[TestDox('Normalize samples returns packed list-of-lists for NumPower')] + public function testNormalizeSamplesReturnsPackedListOfLists() : void + { + $samples = [ + 10 => [2 => 1.0, 5 => 2.0, 9 => 10], + 20 => [2 => 3.0, 7 => 4.0, 1 => 1.0], + ]; + + $method = new ReflectionMethod(Network::class, 'normalizeSamples'); + $method->setAccessible(true); + + /** @var array $normalized */ + $normalized = $method->invoke($this->network, $samples); + + self::assertTrue(array_is_list($normalized)); + self::assertCount(2, $normalized); + + foreach ($normalized as $row) { + self::assertTrue(array_is_list($row)); + } + + self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized); + } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 1198d02b5..26299b3b1 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -159,15 +159,9 @@ public function testTrainPartialPredict() : void $testing = $dataset->randomize()->take(self::TEST_SIZE); - $testingSamplesBefore = $testing->samples(); - $testingLabelsBefore = $testing->labels(); - $folds = $dataset->fold(3); $this->estimator->train($folds[0]); - - $predictionsBefore = $this->estimator->predict($testing); - $this->estimator->partial($folds[1]); $this->estimator->partial($folds[2]); @@ -183,69 +177,14 @@ public function testTrainPartialPredict() : void self::assertIsArray($losses); self::assertContainsOnlyFloat($losses); - self::assertNotEmpty($losses); - - foreach ($losses as $epoch => $loss) { - self::assertIsInt($epoch); - self::assertGreaterThanOrEqual(1, $epoch); - self::assertFalse(is_nan($loss)); - self::assertTrue(is_finite($loss)); - } $scores = $this->estimator->scores(); self::assertIsArray($scores); self::assertContainsOnlyFloat($scores); - self::assertNotEmpty($scores); - - foreach ($scores as $epoch => $value) { - self::assertIsInt($epoch); - self::assertGreaterThanOrEqual(1, $epoch); - self::assertFalse(is_nan($value)); - self::assertTrue(is_finite($value)); - self::assertSame(0, $epoch % 3); - } $predictions = $this->estimator->predict($testing); - self::assertCount($testing->numSamples(), $predictions); - - foreach ($predictions as $prediction) { - self::assertIsNumeric($prediction); - self::assertFalse(is_nan((float) $prediction)); - self::assertTrue(is_finite((float) $prediction)); - } - - $predictions2 = $this->estimator->predict($testing); - - self::assertCount($testing->numSamples(), $predictions2); - - foreach ($predictions2 as $i => $prediction) { - self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12); - } - - self::assertEquals($testingSamplesBefore, $testing->samples()); - self::assertEquals($testingLabelsBefore, $testing->labels()); - - $delta = 0.0; - - foreach ($predictions as $i => $prediction) { - $delta += abs((float) $prediction - (float) $predictionsBefore[$i]); - } - - self::assertGreaterThan(0.0, $delta); - - $min = (float) $predictions[0]; - $max = (float) $predictions[0]; - - foreach ($predictions as $prediction) { - $p = (float) $prediction; - $min = min($min, $p); - $max = max($max, $p); - } - - self::assertGreaterThan(0.0, $max - $min); - /** @var list $labels */ $labels = $testing->labels(); $score = $this->metric->score( @@ -253,23 +192,6 @@ public function testTrainPartialPredict() : void labels: $labels ); - self::assertFalse(is_nan($score)); - self::assertTrue(is_finite($score)); - self::assertGreaterThan(-10.0, $score); - - $copy = unserialize(serialize($this->estimator)); - - self::assertInstanceOf(MLPRegressor::class, $copy); - self::assertTrue($copy->trained()); - - $predictionsAfter = $copy->predict($testing); - - self::assertCount($testing->numSamples(), $predictionsAfter); - - foreach ($predictionsAfter as $i => $prediction) { - self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8); - } - self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); } From 1583ee3e4eb7a65b50383bf165f649e229aa750b Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 23:58:25 +0200 Subject: [PATCH 7/8] ML-396 removed unneeded packages from composer --- composer.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/composer.json b/composer.json index f0e963cd5..d7810b2de 100644 --- a/composer.json +++ b/composer.json @@ -37,7 +37,6 @@ "amphp/parallel": "^1.3", "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", - "rubix/tensor": "^3.0", "rubixml/numpower": "dev-main", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", @@ -52,9 +51,7 @@ "phpstan/extension-installer": "^1.0", "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", - "phpunit/phpunit": "^12.0", - "swoole/ide-helper": "^5.1", - "apphp/pretty-print": "^0.5.1" + "phpunit/phpunit": "^12.0" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing", From 57037c623914b67fb53a8ef77101b081bb0fc12d Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sun, 15 Feb 2026 00:00:00 +0200 Subject: [PATCH 8/8] ML-396 removed unneeded packages from composer --- composer.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index d7810b2de..a703df15b 100644 --- a/composer.json +++ b/composer.json @@ -37,7 +37,7 @@ "amphp/parallel": "^1.3", "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", - "rubixml/numpower": "dev-main", + "rubix/tensor": "^3.0", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", "symfony/polyfill-php82": "^1.27", @@ -51,7 +51,8 @@ "phpstan/extension-installer": "^1.0", "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", - "phpunit/phpunit": "^12.0" + "phpunit/phpunit": "^12.0", + "swoole/ide-helper": "^5.1" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing",