From c80d2784968b8f7a015af1dd8cf24bc0e8889260 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:16:30 +0200 Subject: [PATCH 01/11] ML-396 Converted MLPRegressor to work with NumPower/NDArray related classes --- .../Generators/SwissRoll/SwissRoll.php | 188 ++++++ src/NeuralNet/Networks/Network.php | 76 ++- src/NeuralNet/Parameters/Parameter.php | 7 +- src/Regressors/MLPRegressor/MLPRegressor.php | 561 ++++++++++++++++++ .../Generators/SwissRoll/SwissRollTest.php | 47 ++ tests/NeuralNet/Layers/Swish/SwishTest.php | 2 +- tests/NeuralNet/Networks/NetworkTest.php | 51 ++ .../MLPRegressors/MLPRegressorTest.php | 216 +++++++ 8 files changed, 1144 insertions(+), 4 deletions(-) create mode 100644 src/Datasets/Generators/SwissRoll/SwissRoll.php create mode 100644 src/Regressors/MLPRegressor/MLPRegressor.php create mode 100644 tests/Datasets/Generators/SwissRoll/SwissRollTest.php create mode 100644 tests/Regressors/MLPRegressors/MLPRegressorTest.php diff --git a/src/Datasets/Generators/SwissRoll/SwissRoll.php b/src/Datasets/Generators/SwissRoll/SwissRoll.php new file mode 100644 index 000000000..c965ef865 --- /dev/null +++ b/src/Datasets/Generators/SwissRoll/SwissRoll.php @@ -0,0 +1,188 @@ + + */ +class SwissRoll implements Generator +{ + /** + * The center vector of the swiss roll. + * + * @var list + */ + protected array $center; + + /** + * The scaling factor of the swiss roll. + * + * @var float + */ + protected float $scale; + + /** + * The depth of the swiss roll i.e the scale of the y dimension. + * + * @var float + */ + protected float $depth; + + /** + * The standard deviation of the gaussian noise. + * + * @var float + */ + protected float $noise; + + /** + * @param float $x + * @param float $y + * @param float $z + * @param float $scale + * @param float $depth + * @param float $noise + * @throws InvalidArgumentException + */ + public function __construct( + float $x = 0.0, + float $y = 0.0, + float $z = 0.0, + float $scale = 1.0, + float $depth = 21.0, + float $noise = 0.1 + ) { + if ($scale < 0.0) { + throw new InvalidArgumentException('Scale must be' + . " greater than 0, $scale given."); + } + + if ($depth < 0) { + throw new InvalidArgumentException('Depth must be' + . " greater than 0, $depth given."); + } + + if ($noise < 0.0) { + throw new InvalidArgumentException('Noise factor cannot be less' + . " than 0, $noise given."); + } + + $this->center = [$x, $y, $z]; + $this->scale = $scale; + $this->depth = $depth; + $this->noise = $noise; + } + + /** + * Return the dimensionality of the data this generates. + * + * @internal + * + * @return int<0,max> + */ + public function dimensions() : int + { + return 3; + } + + /** + * Generate n data points. + * + * @param int<0,max> $n + * @return Labeled + */ + public function generate(int $n) : Labeled + { + $range = M_PI + HALF_PI; + + $t = []; + $y = []; + $coords = []; + + for ($i = 0; $i < $n; ++$i) { + $u = mt_rand() / mt_getrandmax(); + $ti = (($u * 2.0) + 1.0) * $range; + $t[] = $ti; + + $uy = mt_rand() / mt_getrandmax(); + $y[] = $uy * $this->depth; + + $coords[] = [ + $ti * cos($ti), + $y[$i], + $ti * sin($ti), + ]; + } + + $noise = []; + + if ($this->noise > 0.0) { + for ($i = 0; $i < $n; ++$i) { + $row = []; + + for ($j = 0; $j < 3; ++$j) { + $u1 = mt_rand() / mt_getrandmax(); + $u2 = mt_rand() / mt_getrandmax(); + $u1 = $u1 > 0.0 ? $u1 : 1e-12; + + $z0 = sqrt(-2.0 * log($u1)) * cos(2.0 * M_PI * $u2); + + $row[] = $z0 * $this->noise; + } + + $noise[] = $row; + } + } else { + for ($i = 0; $i < $n; ++$i) { + $noise[] = [0.0, 0.0, 0.0]; + } + } + + $center = []; + + for ($i = 0; $i < $n; ++$i) { + $center[] = $this->center; + } + + $coords = NumPower::array($coords); + $noise = NumPower::array($noise); + $center = NumPower::array($center); + + $samples = NumPower::add( + NumPower::add( + NumPower::multiply($coords, $this->scale), + $center + ), + $noise + ); + + return Labeled::quick($samples->toArray(), $t); + } +} diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index 6554940b3..df51a1a78 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -17,6 +17,7 @@ use Traversable; use function array_reverse; +use function array_is_list; /** * Network @@ -185,12 +186,22 @@ public function initialize() : void */ public function infer(Dataset $dataset) : NDArray { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + if ($dataset->empty()) { + return NumPower::array([]); + } + + $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); foreach ($this->layers() as $layer) { $input = $layer->infer($input); } + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + return NumPower::transpose($input, [1, 0]); } @@ -203,7 +214,11 @@ public function infer(Dataset $dataset) : NDArray */ public function roundtrip(Labeled $dataset) : float { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + if ($dataset->empty()) { + return 0.0; + } + + $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); $this->feed($input); @@ -272,4 +287,61 @@ public function exportGraphviz() : Encoding return new Encoding($dot); } + + /** + * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray. + * + * This method exists because dataset samples originate as PHP arrays and are + * not guaranteed to be in a form that NumPower can always infer as a dense + * 2D numeric matrix. For example: + * + * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2). + * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1). + * - In some edge cases (such as a single row/column), NumPower may infer a + * rank-1 array. + * + * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0]) + * will throw "axes don't match array". To make transpose stable we: + * + * - Reindex the outer and inner arrays with array_values() to force packed + * row/column ordering. + * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n]. + * + * The returned NDArray is row-major with shape [nSamples, nFeatures]. + * + * @param list $samples + * @return NDArray + */ + protected function samplesToInput(array $samples) : NDArray + { + $packed = array_is_list($samples); + + if ($packed) { + foreach ($samples as $sample) { + if (!array_is_list($sample)) { + $packed = false; + + break; + } + } + } + + if (!$packed) { + $samples = array_values($samples); + + foreach ($samples as $i => $sample) { + $samples[$i] = array_values($sample); + } + } + + $input = NumPower::array($samples); + + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + + return $input; + } } diff --git a/src/NeuralNet/Parameters/Parameter.php b/src/NeuralNet/Parameters/Parameter.php index 0cef2e87a..6741a0e49 100644 --- a/src/NeuralNet/Parameters/Parameter.php +++ b/src/NeuralNet/Parameters/Parameter.php @@ -90,9 +90,14 @@ public function update(NDArray $gradient, Optimizer $optimizer) : void /** * Perform a deep copy of the object upon cloning. + * + * Cloning an NDArray directly may trigger native memory corruption in some + * NumPower builds (e.g. heap corruption/segfaults when parameters are + * snapshotted during training). To make cloning deterministic and stable we + * deep-copy through a PHP array roundtrip: NDArray -> PHP array -> NDArray. */ public function __clone() : void { - $this->param = clone $this->param; + $this->param = NumPower::array($this->param->toArray()); } } diff --git a/src/Regressors/MLPRegressor/MLPRegressor.php b/src/Regressors/MLPRegressor/MLPRegressor.php new file mode 100644 index 000000000..b95fe7e49 --- /dev/null +++ b/src/Regressors/MLPRegressor/MLPRegressor.php @@ -0,0 +1,561 @@ + + */ +class MLPRegressor implements Estimator, Learner, Online, Verbose, Persistable +{ + use AutotrackRevisions, LoggerAware; + + /** + * An array composing the user-specified hidden layers of the network in order. + * + * @var Hidden[] + */ + protected array $hiddenLayers = [ + // + ]; + + /** + * The number of training samples to process at a time. + * + * @var positive-int + */ + protected int $batchSize; + + /** + * The gradient descent optimizer used to update the network parameters. + * + * @var Optimizer + */ + protected Optimizer $optimizer; + + /** + * The maximum number of training epochs. i.e. the number of times to iterate before terminating. + * + * @var int<0,max> + */ + protected int $epochs; + + /** + * The minimum change in the training loss necessary to continue training. + * + * @var float + */ + protected float $minChange; + + /** + * The number of epochs to train before evaluating the model with the holdout set. + * + * @var int + */ + protected $evalInterval; + + /** + * The number of epochs without improvement in the validation score to wait before considering an early stop. + * + * @var positive-int + */ + protected int $window; + + /** + * The proportion of training samples to use for validation and progress monitoring. + * + * @var float + */ + protected float $holdOut; + + /** + * The function that computes the loss associated with an erroneous activation during training. + * + * @var RegressionLoss + */ + protected RegressionLoss $costFn; + + /** + * The metric used to score the generalization performance of the model during training. + * + * @var Metric + */ + protected Metric $metric; + + /** + * The underlying neural network instance. + * + * @var Network|null + */ + protected ?Network $network = null; + + /** + * The validation scores at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $scores = null; + + /** + * The loss at each epoch from the last training session. + * + * @var float[]|null + */ + protected ?array $losses = null; + + /** + * @param Hidden[] $hiddenLayers + * @param int $batchSize + * @param Optimizer|null $optimizer + * @param int $epochs + * @param float $minChange + * @param int $evalInterval + * @param int $window + * @param float $holdOut + * @param RegressionLoss|null $costFn + * @param Metric|null $metric + * @throws InvalidArgumentException + */ + public function __construct( + array $hiddenLayers = [], + int $batchSize = 128, + ?Optimizer $optimizer = null, + int $epochs = 1000, + float $minChange = 1e-4, + int $evalInterval = 3, + int $window = 5, + float $holdOut = 0.1, + ?RegressionLoss $costFn = null, + ?Metric $metric = null + ) { + foreach ($hiddenLayers as $layer) { + if (!$layer instanceof Hidden) { + throw new InvalidArgumentException('Hidden layer' + . ' must implement the Hidden interface.'); + } + } + + if ($batchSize < 1) { + throw new InvalidArgumentException('Batch size must be' + . " greater than 0, $batchSize given."); + } + + if ($epochs < 0) { + throw new InvalidArgumentException('Number of epochs' + . " must be greater than 0, $epochs given."); + } + + if ($minChange < 0.0) { + throw new InvalidArgumentException('Minimum change must be' + . " greater than 0, $minChange given."); + } + + if ($evalInterval < 1) { + throw new InvalidArgumentException('Eval interval must be' + . " greater than 0, $evalInterval given."); + } + + if ($window < 1) { + throw new InvalidArgumentException('Window must be' + . " greater than 0, $window given."); + } + + if ($holdOut < 0.0 or $holdOut > 0.5) { + throw new InvalidArgumentException('Hold out ratio must be' + . " between 0 and 0.5, $holdOut given."); + } + + if ($metric) { + EstimatorIsCompatibleWithMetric::with($this, $metric)->check(); + } + + $this->hiddenLayers = $hiddenLayers; + $this->batchSize = $batchSize; + $this->optimizer = $optimizer ?? new Adam(); + $this->epochs = $epochs; + $this->minChange = $minChange; + $this->evalInterval = $evalInterval; + $this->window = $window; + $this->holdOut = $holdOut; + $this->costFn = $costFn ?? new LeastSquares(); + $this->metric = $metric ?? new RMSE(); + } + + /** + * Return the estimator type. + * + * @internal + * + * @return EstimatorType + */ + public function type() : EstimatorType + { + return EstimatorType::regressor(); + } + + /** + * Return the data types that the estimator is compatible with. + * + * @internal + * + * @return list + */ + public function compatibility() : array + { + return [ + DataType::continuous(), + ]; + } + + /** + * Return the settings of the hyper-parameters in an associative array. + * + * @internal + * + * @return mixed[] + */ + public function params() : array + { + return [ + 'hidden layers' => $this->hiddenLayers, + 'batch size' => $this->batchSize, + 'optimizer' => $this->optimizer, + 'epochs' => $this->epochs, + 'min change' => $this->minChange, + 'eval interval' => $this->evalInterval, + 'window' => $this->window, + 'hold out' => $this->holdOut, + 'cost fn' => $this->costFn, + 'metric' => $this->metric, + ]; + } + + /** + * Has the learner been trained? + * + * @return bool + */ + public function trained() : bool + { + return isset($this->network); + } + + /** + * Return an iterable progress table with the steps from the last training session. + * + * @return Generator + */ + public function steps() : Generator + { + if (!$this->losses) { + return; + } + + foreach ($this->losses as $epoch => $loss) { + yield [ + 'epoch' => $epoch, + 'score' => $this->scores[$epoch] ?? null, + 'loss' => $loss, + ]; + } + } + + /** + * Return the validation score at each epoch. + * + * @return float[]|null + */ + public function scores() : ?array + { + return $this->scores; + } + + /** + * Return the training loss at each epoch. + * + * @return float[]|null + */ + public function losses() : ?array + { + return $this->losses; + } + + /** + * Return the underlying neural network instance or null if not trained. + * + * @return Network|null + */ + public function network() : ?Network + { + return $this->network; + } + + /** + * Train the estimator with a dataset. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + */ + public function train(Dataset $dataset) : void + { + DatasetIsNotEmpty::with($dataset)->check(); + + $hiddenLayers = $this->hiddenLayers; + + $hiddenLayers[] = new Dense(1, 0.0, true, new XavierUniform()); + + $this->network = new Network( + new Placeholder1D($dataset->numFeatures()), + $hiddenLayers, + new Continuous($this->costFn), + $this->optimizer + ); + + $this->network->initialize(); + + $this->partial($dataset); + } + + /** + * Train the network using mini-batch gradient descent with backpropagation. + * + * @param \Rubix\ML\Datasets\Labeled $dataset + * @throws RuntimeException + */ + public function partial(Dataset $dataset) : void + { + if (!$this->network) { + $this->train($dataset); + + return; + } + + SpecificationChain::with([ + new DatasetIsLabeled($dataset), + new DatasetIsNotEmpty($dataset), + new SamplesAreCompatibleWithEstimator($dataset, $this), + new LabelsAreCompatibleWithLearner($dataset, $this), + new DatasetHasDimensionality($dataset, $this->network->input()->width()), + ])->check(); + + if ($this->logger) { + $this->logger->info("Training $this"); + + $numParams = number_format($this->network->numParams()); + + $this->logger->info("{$numParams} trainable parameters"); + } + + [$testing, $training] = $dataset->randomize()->split($this->holdOut); + + [$minScore, $maxScore] = $this->metric->range()->list(); + + $bestScore = $minScore; + $bestEpoch = $numWorseEpochs = 0; + $loss = 0.0; + $score = $snapshot = null; + $prevLoss = INF; + + $this->scores = $this->losses = []; + + for ($epoch = 1; $epoch <= $this->epochs; ++$epoch) { + $batches = $training->randomize()->batch($this->batchSize); + + $loss = 0.0; + + foreach ($batches as $batch) { + $loss += $this->network->roundtrip($batch); + } + + $loss /= count($batches); + + $lossChange = abs($prevLoss - $loss); + + $this->losses[$epoch] = $loss; + + if (is_nan($loss)) { + if ($this->logger) { + $this->logger->warning('Numerical instability detected'); + } + + break; + } + + if ($epoch % $this->evalInterval === 0 && !$testing->empty()) { + $predictions = $this->predict($testing); + + $score = $this->metric->score($predictions, $testing->labels()); + + $this->scores[$epoch] = $score; + } + + if ($this->logger) { + $message = "Epoch: $epoch, {$this->costFn}: $loss"; + + if (isset($score)) { + $message .= ", {$this->metric}: $score"; + } + + $this->logger->info($message); + } + + if (isset($score)) { + if ($score >= $maxScore) { + break; + } + + if ($score > $bestScore) { + $bestScore = $score; + $bestEpoch = $epoch; + + $snapshot = Snapshot::take($this->network); + + $numWorseEpochs = 0; + } else { + ++$numWorseEpochs; + } + + if ($numWorseEpochs >= $this->window) { + break; + } + + unset($score); + } + + if ($lossChange < $this->minChange) { + break; + } + + $prevLoss = $loss; + } + + if ($snapshot and (end($this->scores) < $bestScore or is_nan($loss))) { + $snapshot->restore(); + + if ($this->logger) { + $this->logger->info("Model state restored to epoch $bestEpoch"); + } + } + + if ($this->logger) { + $this->logger->info('Training complete'); + } + } + + /** + * Feed a sample through the network and make a prediction based on the + * activation of the output neuron. + * + * @param Dataset $dataset + * @throws RuntimeException + * @return list + */ + public function predict(Dataset $dataset) : array + { + if (!$this->network) { + throw new RuntimeException('Estimator has not been trained.'); + } + + DatasetHasDimensionality::with($dataset, $this->network->input()->width())->check(); + + $activations = $this->network->infer($dataset); + + $activations = array_column($activations->toArray(), 0); + + return $activations; + } + + /** + * Export the network architecture as a graph in dot format. + * + * @throws RuntimeException + * @return Encoding + */ + public function exportGraphviz() : Encoding + { + if (!$this->network) { + throw new RuntimeException('Must train network first.'); + } + + return $this->network->exportGraphviz(); + } + + /** + * Return an associative array containing the data used to serialize the object. + * + * @return mixed[] + */ + public function __serialize() : array + { + $properties = get_object_vars($this); + + unset($properties['losses'], $properties['scores'], $properties['logger']); + + return $properties; + } + + /** + * Return the string representation of the object. + * + * @internal + * + * @return string + */ + public function __toString() : string + { + return 'MLP Regressor (' . Params::stringify($this->params()) . ')'; + } +} diff --git a/tests/Datasets/Generators/SwissRoll/SwissRollTest.php b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php new file mode 100644 index 000000000..437604c21 --- /dev/null +++ b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php @@ -0,0 +1,47 @@ +generator = new SwissRoll(x: 0.0, y: 0.0, z: 0.0, scale: 1.0, depth: 12.0, noise: 0.3); + } + + #[Test] + #[TestDox('Dimensions returns 3')] + public function testDimensions() : void + { + self::assertEquals(3, $this->generator->dimensions()); + } + + #[Test] + #[TestDox('Generate returns a labeled dataset of the requested size')] + public function testGenerate() : void + { + $dataset = $this->generator->generate(self::DATASET_SIZE); + + self::assertInstanceOf(Labeled::class, $dataset); + self::assertInstanceOf(Dataset::class, $dataset); + + self::assertCount(self::DATASET_SIZE, $dataset); + } +} diff --git a/tests/NeuralNet/Layers/Swish/SwishTest.php b/tests/NeuralNet/Layers/Swish/SwishTest.php index 5f8d55503..f0b2bc2be 100644 --- a/tests/NeuralNet/Layers/Swish/SwishTest.php +++ b/tests/NeuralNet/Layers/Swish/SwishTest.php @@ -73,7 +73,7 @@ public static function initializeForwardBackInferProvider() : array 'backExpected' => [ [0.2319176, 0.7695808, 0.0450083], [0.2749583, 0.1099833, 0.0108810], - [0.1252499, -0.0012326, 0.2314345], + [0.1252493, -0.0012326, 0.2314345], ], 'inferExpected' => [ [0.7306671, 2.3094806, -0.0475070], diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 0197c225d..0406193cb 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -6,6 +6,8 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\Attributes\TestDox; use Rubix\ML\Datasets\Labeled; use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden; use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input; @@ -19,6 +21,7 @@ use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU; use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy; use PHPUnit\Framework\TestCase; +use ReflectionMethod; #[Group('NeuralNet')] #[CoversClass(Network::class)] @@ -71,6 +74,8 @@ classes: ['yes', 'no', 'maybe'], ); } + #[Test] + #[TestDox('Layers iterator yields all layers')] public function testLayers() : void { $count = 0; @@ -82,20 +87,66 @@ public function testLayers() : void self::assertSame(7, $count); } + #[Test] + #[TestDox('Input layer is Placeholder1D')] public function testInput() : void { self::assertInstanceOf(Placeholder1D::class, $this->network->input()); } + #[Test] + #[TestDox('Hidden layers count')] public function testHidden() : void { self::assertCount(5, $this->network->hidden()); } + #[Test] + #[TestDox('Num params')] public function testNumParams() : void { $this->network->initialize(); self::assertEquals(103, $this->network->numParams()); } + + #[Test] + #[TestDox('samplesToInput normalizes samples into 2D NDArray')] + public function testSamplesToInput() : void + { + $method = new ReflectionMethod(Network::class, 'samplesToInput'); + $method->setAccessible(true); + + $input = $method->invoke($this->network, $this->dataset->samples()); + + self::assertEquals([3, 2], $input->shape()); + + $samples = [ + 3 => [ + 1 => 1.0, + 2 => 2.5, + ], + 7 => [ + 1 => 0.1, + 2 => 0.0, + ], + 8 => [ + 1 => 0.002, + 2 => -6.0, + ], + ]; + + $input = $method->invoke($this->network, $samples); + + self::assertEquals([3, 2], $input->shape()); + + $samples = [ + [1.0], + [2.5], + ]; + + $input = $method->invoke($this->network, $samples); + + self::assertEquals([2, 1], $input->shape()); + } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php new file mode 100644 index 000000000..5366c806e --- /dev/null +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -0,0 +1,216 @@ +generator = new SwissRoll(x: 4.0, y: -7.0, z: 0.0, scale: 1.0, depth: 21.0, noise: 0.5); + + $this->estimator = new MLPRegressor( + hiddenLayers: [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + batchSize: 32, + optimizer: new Adam(0.01), + epochs: 100, + minChange: 1e-4, + evalInterval: 3, + window: 5, + holdOut: 0.1, + costFn: new LeastSquares(), + metric: new RMSE() + ); + + $this->metric = new RSquared(); + + $this->estimator->setLogger(new BlackHole()); + + srand(self::RANDOM_SEED); + } + + #[Test] + #[TestDox('Assert pre conditions')] + public function testAssertPreConditions() : void + { + self::assertFalse($this->estimator->trained()); + } + + #[Test] + #[TestDox('Bad batch size')] + public function testBadBatchSize() : void + { + $this->expectException(InvalidArgumentException::class); + + new MLPRegressor(hiddenLayers: [], batchSize: -100); + } + + #[Test] + #[TestDox('Type')] + public function testType() : void + { + self::assertEquals(EstimatorType::regressor(), $this->estimator->type()); + } + + #[Test] + #[TestDox('Compatibility')] + public function testCompatibility() : void + { + $expected = [ + DataType::continuous(), + ]; + + self::assertEquals($expected, $this->estimator->compatibility()); + } + + #[Test] + #[TestDox('Params')] + public function testParams() : void + { + $expected = [ + 'hidden layers' => [ + new Dense(32), + new Activation(new SiLU()), + new Dense(16), + new Activation(new SiLU()), + new Dense(8), + new Activation(new SiLU()), + ], + 'batch size' => 32, + 'optimizer' => new Adam(0.01), + 'epochs' => 100, + 'min change' => 1e-4, + 'eval interval' => 3, + 'window' => 5, + 'hold out' => 0.1, + 'cost fn' => new LeastSquares(), + 'metric' => new RMSE(), + ]; + + self::assertEquals($expected, $this->estimator->params()); + } + + #[Test] + #[TestDox('Train partial predict')] + public function testTrainPartialPredict() : void + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + self::assertTrue($this->estimator->trained()); + + $dot = $this->estimator->exportGraphviz(); + + // Graphviz::dotToImage($dot)->saveTo(new Filesystem('test.png')); + + self::assertStringStartsWith('digraph Tree {', (string) $dot); + + $losses = $this->estimator->losses(); + + self::assertIsArray($losses); + self::assertContainsOnlyFloat($losses); + + $scores = $this->estimator->scores(); + + self::assertIsArray($scores); + self::assertContainsOnlyFloat($scores); + + $predictions = $this->estimator->predict($testing); + + /** @var list $labels */ + $labels = $testing->labels(); + $score = $this->metric->score( + predictions: $predictions, + labels: $labels + ); + + self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); + } + + #[Test] + #[TestDox('Train incompatible')] + public function testTrainIncompatible() : void + { + $this->expectException(InvalidArgumentException::class); + + $this->estimator->train(Labeled::quick(samples: [['bad']], labels: [2])); + } + + #[Test] + #[TestDox('Predict untrained')] + public function testPredictUntrained() : void + { + $this->expectException(RuntimeException::class); + + $this->estimator->predict(Unlabeled::quick()); + } +} From 13acae649e0d8449ffb7d548ea53563fb85ea0d5 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:24:14 +0200 Subject: [PATCH 02/11] ML-396 removed unneeded export function --- tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 5366c806e..839711455 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,7 +26,6 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; -use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] From 3b65a47049dc2ca121800fcb47a4ef77bd38b00c Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 17:56:10 +0200 Subject: [PATCH 03/11] ML-396 added test for NumPower --- tests/NeuralNet/NumPower/NumPowerTest.php | 50 +++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/NeuralNet/NumPower/NumPowerTest.php diff --git a/tests/NeuralNet/NumPower/NumPowerTest.php b/tests/NeuralNet/NumPower/NumPowerTest.php new file mode 100644 index 000000000..20a2ee602 --- /dev/null +++ b/tests/NeuralNet/NumPower/NumPowerTest.php @@ -0,0 +1,50 @@ +shape()); + + $a = $t->toArray(); + + self::assertEqualsWithDelta(0.0, (float) $a[0][0], 1e-12); + self::assertEqualsWithDelta(1000.0, (float) $a[0][1], 1e-12); + self::assertEqualsWithDelta(2000.0, (float) $a[0][2], 1e-12); + + self::assertEqualsWithDelta(255.0, (float) $a[255][0], 1e-12); + self::assertEqualsWithDelta(1255.0, (float) $a[255][1], 1e-12); + self::assertEqualsWithDelta(2255.0, (float) $a[255][2], 1e-12); + + self::assertEqualsWithDelta(42.0, (float) $a[42][0], 1e-12); + self::assertEqualsWithDelta(1042.0, (float) $a[42][1], 1e-12); + self::assertEqualsWithDelta(2042.0, (float) $a[42][2], 1e-12); + } +} From d7404f81ef8629b4095f0dfc7f10c3aea60e6756 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 18:44:28 +0200 Subject: [PATCH 04/11] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network --- src/NeuralNet/Networks/Network.php | 93 ++++----- tests/NeuralNet/Networks/NetworkTest.php | 40 ---- .../MLPRegressors/MLPRegressorTest.php | 182 ++++++++++++++++++ 3 files changed, 231 insertions(+), 84 deletions(-) diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index df51a1a78..929813652 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -73,6 +73,8 @@ class Network */ protected Optimizer $optimizer; + protected const USE_NUMPOWER_TRANSPOSE = false; + /** * @param Input $input * @param Hidden[] $hidden @@ -190,7 +192,11 @@ public function infer(Dataset $dataset) : NDArray return NumPower::array([]); } - $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + } else { + $input = NumPower::array($this->rowsToColumns($dataset->samples())); + } foreach ($this->layers() as $layer) { $input = $layer->infer($input); @@ -202,7 +208,11 @@ public function infer(Dataset $dataset) : NDArray $input = NumPower::reshape($input, [1, $shape[0]]); } - return NumPower::transpose($input, [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + return NumPower::transpose($input, [1, 0]); + } else { + return NumPower::array($this->columnsToRows($input->toArray())); + } } /** @@ -218,7 +228,11 @@ public function roundtrip(Labeled $dataset) : float return 0.0; } - $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]); + if (self::USE_NUMPOWER_TRANSPOSE) { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); + } else { + $input = NumPower::array($this->rowsToColumns($dataset->samples())); + } $this->feed($input); @@ -289,59 +303,50 @@ public function exportGraphviz() : Encoding } /** - * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray. - * - * This method exists because dataset samples originate as PHP arrays and are - * not guaranteed to be in a form that NumPower can always infer as a dense - * 2D numeric matrix. For example: - * - * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2). - * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1). - * - In some edge cases (such as a single row/column), NumPower may infer a - * rank-1 array. - * - * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0]) - * will throw "axes don't match array". To make transpose stable we: - * - * - Reindex the outer and inner arrays with array_values() to force packed - * row/column ordering. - * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n]. - * - * The returned NDArray is row-major with shape [nSamples, nFeatures]. - * - * @param list $samples - * @return NDArray + * @param list> $rows + * @return list> */ - protected function samplesToInput(array $samples) : NDArray + private function rowsToColumns(array $rows) : array { - $packed = array_is_list($samples); + $numSamples = count($rows); + $numFeatures = isset($rows[0]) && is_array($rows[0]) ? count($rows[0]) : 0; - if ($packed) { - foreach ($samples as $sample) { - if (!array_is_list($sample)) { - $packed = false; + $columns = []; - break; - } + for ($j = 0; $j < $numFeatures; ++$j) { + $column = []; + + for ($i = 0; $i < $numSamples; ++$i) { + $column[] = $rows[$i][$j]; } + + $columns[] = $column; } - if (!$packed) { - $samples = array_values($samples); + return $columns; + } - foreach ($samples as $i => $sample) { - $samples[$i] = array_values($sample); - } - } + /** + * @param list> $columns + * @return list> + */ + private function columnsToRows(array $columns) : array + { + $numFeatures = count($columns); + $numSamples = isset($columns[0]) && is_array($columns[0]) ? count($columns[0]) : 0; - $input = NumPower::array($samples); + $rows = []; - $shape = $input->shape(); + for ($i = 0; $i < $numSamples; ++$i) { + $row = []; - if (count($shape) === 1) { - $input = NumPower::reshape($input, [1, $shape[0]]); + for ($j = 0; $j < $numFeatures; ++$j) { + $row[] = $columns[$j][$i]; + } + + $rows[] = $row; } - return $input; + return $rows; } } diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 0406193cb..199f1e9f4 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -109,44 +109,4 @@ public function testNumParams() : void self::assertEquals(103, $this->network->numParams()); } - - #[Test] - #[TestDox('samplesToInput normalizes samples into 2D NDArray')] - public function testSamplesToInput() : void - { - $method = new ReflectionMethod(Network::class, 'samplesToInput'); - $method->setAccessible(true); - - $input = $method->invoke($this->network, $this->dataset->samples()); - - self::assertEquals([3, 2], $input->shape()); - - $samples = [ - 3 => [ - 1 => 1.0, - 2 => 2.5, - ], - 7 => [ - 1 => 0.1, - 2 => 0.0, - ], - 8 => [ - 1 => 0.002, - 2 => -6.0, - ], - ]; - - $input = $method->invoke($this->network, $samples); - - self::assertEquals([3, 2], $input->shape()); - - $samples = [ - [1.0], - [2.5], - ]; - - $input = $method->invoke($this->network, $samples); - - self::assertEquals([2, 1], $input->shape()); - } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 839711455..ddd633628 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,6 +26,7 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; +use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] @@ -159,9 +160,15 @@ public function testTrainPartialPredict() : void $testing = $dataset->randomize()->take(self::TEST_SIZE); + $testingSamplesBefore = $testing->samples(); + $testingLabelsBefore = $testing->labels(); + $folds = $dataset->fold(3); $this->estimator->train($folds[0]); + + $predictionsBefore = $this->estimator->predict($testing); + $this->estimator->partial($folds[1]); $this->estimator->partial($folds[2]); @@ -177,14 +184,69 @@ public function testTrainPartialPredict() : void self::assertIsArray($losses); self::assertContainsOnlyFloat($losses); + self::assertNotEmpty($losses); + + foreach ($losses as $epoch => $loss) { + self::assertIsInt($epoch); + self::assertGreaterThanOrEqual(1, $epoch); + self::assertFalse(is_nan($loss)); + self::assertTrue(is_finite($loss)); + } $scores = $this->estimator->scores(); self::assertIsArray($scores); self::assertContainsOnlyFloat($scores); + self::assertNotEmpty($scores); + + foreach ($scores as $epoch => $value) { + self::assertIsInt($epoch); + self::assertGreaterThanOrEqual(1, $epoch); + self::assertFalse(is_nan($value)); + self::assertTrue(is_finite($value)); + self::assertSame(0, $epoch % 3); + } $predictions = $this->estimator->predict($testing); + self::assertCount($testing->numSamples(), $predictions); + + foreach ($predictions as $prediction) { + self::assertIsNumeric($prediction); + self::assertFalse(is_nan((float) $prediction)); + self::assertTrue(is_finite((float) $prediction)); + } + + $predictions2 = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions2); + + foreach ($predictions2 as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12); + } + + self::assertEquals($testingSamplesBefore, $testing->samples()); + self::assertEquals($testingLabelsBefore, $testing->labels()); + + $delta = 0.0; + + foreach ($predictions as $i => $prediction) { + $delta += abs((float) $prediction - (float) $predictionsBefore[$i]); + } + + self::assertGreaterThan(0.0, $delta); + + $min = (float) $predictions[0]; + $max = (float) $predictions[0]; + + foreach ($predictions as $prediction) { + $p = (float) $prediction; + $min = min($min, $p); + $max = max($max, $p); + } + + self::assertGreaterThan(0.0, $max - $min); + /** @var list $labels */ $labels = $testing->labels(); $score = $this->metric->score( @@ -192,9 +254,129 @@ public function testTrainPartialPredict() : void labels: $labels ); + self::assertFalse(is_nan($score)); + self::assertTrue(is_finite($score)); + self::assertGreaterThan(-10.0, $score); + + $copy = unserialize(serialize($this->estimator)); + + self::assertInstanceOf(MLPRegressor::class, $copy); + self::assertTrue($copy->trained()); + + $predictionsAfter = $copy->predict($testing); + + self::assertCount($testing->numSamples(), $predictionsAfter); + + foreach ($predictionsAfter as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8); + } + self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); } + #[Test] + #[TestDox('Predict count matches number of samples')] + public function testPredictCountMatchesNumberOfSamples() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + } + + #[Test] + #[TestDox('Predict returns numeric finite values')] + public function testPredictReturnsNumericFiniteValues() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + + foreach ($predictions as $prediction) { + self::assertIsNumeric($prediction); + self::assertFalse(is_nan((float) $prediction)); + self::assertTrue(is_finite((float) $prediction)); + } + } + + #[Test] + #[TestDox('Predict is repeatable for same model and dataset')] + public function testPredictIsRepeatableForSameModelAndDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictions1 = $this->estimator->predict($testing); + $predictions2 = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions1); + self::assertCount($testing->numSamples(), $predictions2); + + foreach ($predictions1 as $i => $prediction) { + self::assertEqualsWithDelta((float) $prediction, (float) $predictions2[$i], 1e-12); + } + } + + #[Test] + #[TestDox('Predict does not mutate dataset samples or labels')] + public function testPredictDoesNotMutateDataset() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $samplesBefore = $testing->samples(); + $labelsBefore = $testing->labels(); + + $predictions = $this->estimator->predict($testing); + + self::assertCount($testing->numSamples(), $predictions); + self::assertEquals($samplesBefore, $testing->samples()); + self::assertEquals($labelsBefore, $testing->labels()); + } + + #[Test] + #[TestDox('Serialization preserves predict output')] + public function testSerializationPreservesPredictOutput() : void + { + [$testing] = $this->trainEstimatorAndGetTestingSet(); + + $predictionsBefore = $this->estimator->predict($testing); + + $copy = unserialize(serialize($this->estimator)); + + self::assertInstanceOf(MLPRegressor::class, $copy); + self::assertTrue($copy->trained()); + + $predictionsAfter = $copy->predict($testing); + + self::assertCount($testing->numSamples(), $predictionsAfter); + + foreach ($predictionsAfter as $i => $prediction) { + self::assertEqualsWithDelta((float) $predictionsBefore[$i], (float) $prediction, 1e-8); + } + } + + /** + * @return array{0: Unlabeled} + */ + private function trainEstimatorAndGetTestingSet() : array + { + $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE); + + $dataset->apply(new ZScaleStandardizer()); + + $testing = $dataset->randomize()->take(self::TEST_SIZE); + + $folds = $dataset->fold(3); + + $this->estimator->train($folds[0]); + $this->estimator->partial($folds[1]); + $this->estimator->partial($folds[2]); + + return [$testing]; + } + #[Test] #[TestDox('Train incompatible')] public function testTrainIncompatible() : void From d538799498733daef3abe4945b687078550e4a79 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 19:01:18 +0200 Subject: [PATCH 05/11] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network --- tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index ddd633628..1198d02b5 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -26,7 +26,6 @@ use Rubix\ML\Exceptions\InvalidArgumentException; use Rubix\ML\Exceptions\RuntimeException; use PHPUnit\Framework\TestCase; -use function Apphp\PrettyPrint\pp; #[Group('Regressors')] #[CoversClass(MLPRegressor::class)] From f333c67ec7459c5c50a7b1771a891c94e0857f03 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 23:53:37 +0200 Subject: [PATCH 06/11] ML-396 fixed issue with samples normalization --- composer.json | 4 +- src/NeuralNet/Networks/Network.php | 34 ++++---- tests/NeuralNet/Networks/NetworkTest.php | 25 ++++++ .../MLPRegressors/MLPRegressorTest.php | 78 ------------------- 4 files changed, 45 insertions(+), 96 deletions(-) diff --git a/composer.json b/composer.json index a703df15b..f0e963cd5 100644 --- a/composer.json +++ b/composer.json @@ -38,6 +38,7 @@ "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", "rubix/tensor": "^3.0", + "rubixml/numpower": "dev-main", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", "symfony/polyfill-php82": "^1.27", @@ -52,7 +53,8 @@ "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", "phpunit/phpunit": "^12.0", - "swoole/ide-helper": "^5.1" + "swoole/ide-helper": "^5.1", + "apphp/pretty-print": "^0.5.1" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing", diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php index 929813652..c504e43bf 100644 --- a/src/NeuralNet/Networks/Network.php +++ b/src/NeuralNet/Networks/Network.php @@ -73,8 +73,6 @@ class Network */ protected Optimizer $optimizer; - protected const USE_NUMPOWER_TRANSPOSE = false; - /** * @param Input $input * @param Hidden[] $hidden @@ -192,11 +190,8 @@ public function infer(Dataset $dataset) : NDArray return NumPower::array([]); } - if (self::USE_NUMPOWER_TRANSPOSE) { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - } else { - $input = NumPower::array($this->rowsToColumns($dataset->samples())); - } + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); foreach ($this->layers() as $layer) { $input = $layer->infer($input); @@ -208,11 +203,7 @@ public function infer(Dataset $dataset) : NDArray $input = NumPower::reshape($input, [1, $shape[0]]); } - if (self::USE_NUMPOWER_TRANSPOSE) { - return NumPower::transpose($input, [1, 0]); - } else { - return NumPower::array($this->columnsToRows($input->toArray())); - } + return NumPower::transpose($input, [1, 0]); } /** @@ -228,11 +219,8 @@ public function roundtrip(Labeled $dataset) : float return 0.0; } - if (self::USE_NUMPOWER_TRANSPOSE) { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - } else { - $input = NumPower::array($this->rowsToColumns($dataset->samples())); - } + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); $this->feed($input); @@ -326,6 +314,18 @@ private function rowsToColumns(array $rows) : array return $columns; } + /** + * Normalize samples to a strict list-of-lists with sequential numeric keys. + * NumPower's C extension expects packed arrays and can error or behave unpredictably + * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations). + * @param array $samples + * @return array + */ + private function normalizeSamples(array $samples) : array + { + return array_map('array_values', array_values($samples)); + } + /** * @param list> $columns * @return list> diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php index 199f1e9f4..586d1ffbc 100644 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ b/tests/NeuralNet/Networks/NetworkTest.php @@ -109,4 +109,29 @@ public function testNumParams() : void self::assertEquals(103, $this->network->numParams()); } + + #[Test] + #[TestDox('Normalize samples returns packed list-of-lists for NumPower')] + public function testNormalizeSamplesReturnsPackedListOfLists() : void + { + $samples = [ + 10 => [2 => 1.0, 5 => 2.0, 9 => 10], + 20 => [2 => 3.0, 7 => 4.0, 1 => 1.0], + ]; + + $method = new ReflectionMethod(Network::class, 'normalizeSamples'); + $method->setAccessible(true); + + /** @var array $normalized */ + $normalized = $method->invoke($this->network, $samples); + + self::assertTrue(array_is_list($normalized)); + self::assertCount(2, $normalized); + + foreach ($normalized as $row) { + self::assertTrue(array_is_list($row)); + } + + self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized); + } } diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php index 1198d02b5..26299b3b1 100644 --- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php +++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php @@ -159,15 +159,9 @@ public function testTrainPartialPredict() : void $testing = $dataset->randomize()->take(self::TEST_SIZE); - $testingSamplesBefore = $testing->samples(); - $testingLabelsBefore = $testing->labels(); - $folds = $dataset->fold(3); $this->estimator->train($folds[0]); - - $predictionsBefore = $this->estimator->predict($testing); - $this->estimator->partial($folds[1]); $this->estimator->partial($folds[2]); @@ -183,69 +177,14 @@ public function testTrainPartialPredict() : void self::assertIsArray($losses); self::assertContainsOnlyFloat($losses); - self::assertNotEmpty($losses); - - foreach ($losses as $epoch => $loss) { - self::assertIsInt($epoch); - self::assertGreaterThanOrEqual(1, $epoch); - self::assertFalse(is_nan($loss)); - self::assertTrue(is_finite($loss)); - } $scores = $this->estimator->scores(); self::assertIsArray($scores); self::assertContainsOnlyFloat($scores); - self::assertNotEmpty($scores); - - foreach ($scores as $epoch => $value) { - self::assertIsInt($epoch); - self::assertGreaterThanOrEqual(1, $epoch); - self::assertFalse(is_nan($value)); - self::assertTrue(is_finite($value)); - self::assertSame(0, $epoch % 3); - } $predictions = $this->estimator->predict($testing); - self::assertCount($testing->numSamples(), $predictions); - - foreach ($predictions as $prediction) { - self::assertIsNumeric($prediction); - self::assertFalse(is_nan((float) $prediction)); - self::assertTrue(is_finite((float) $prediction)); - } - - $predictions2 = $this->estimator->predict($testing); - - self::assertCount($testing->numSamples(), $predictions2); - - foreach ($predictions2 as $i => $prediction) { - self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12); - } - - self::assertEquals($testingSamplesBefore, $testing->samples()); - self::assertEquals($testingLabelsBefore, $testing->labels()); - - $delta = 0.0; - - foreach ($predictions as $i => $prediction) { - $delta += abs((float) $prediction - (float) $predictionsBefore[$i]); - } - - self::assertGreaterThan(0.0, $delta); - - $min = (float) $predictions[0]; - $max = (float) $predictions[0]; - - foreach ($predictions as $prediction) { - $p = (float) $prediction; - $min = min($min, $p); - $max = max($max, $p); - } - - self::assertGreaterThan(0.0, $max - $min); - /** @var list $labels */ $labels = $testing->labels(); $score = $this->metric->score( @@ -253,23 +192,6 @@ public function testTrainPartialPredict() : void labels: $labels ); - self::assertFalse(is_nan($score)); - self::assertTrue(is_finite($score)); - self::assertGreaterThan(-10.0, $score); - - $copy = unserialize(serialize($this->estimator)); - - self::assertInstanceOf(MLPRegressor::class, $copy); - self::assertTrue($copy->trained()); - - $predictionsAfter = $copy->predict($testing); - - self::assertCount($testing->numSamples(), $predictionsAfter); - - foreach ($predictionsAfter as $i => $prediction) { - self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8); - } - self::assertGreaterThanOrEqual(self::MIN_SCORE, $score); } From 1583ee3e4eb7a65b50383bf165f649e229aa750b Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sat, 14 Feb 2026 23:58:25 +0200 Subject: [PATCH 07/11] ML-396 removed unneeded packages from composer --- composer.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/composer.json b/composer.json index f0e963cd5..d7810b2de 100644 --- a/composer.json +++ b/composer.json @@ -37,7 +37,6 @@ "amphp/parallel": "^1.3", "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", - "rubix/tensor": "^3.0", "rubixml/numpower": "dev-main", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", @@ -52,9 +51,7 @@ "phpstan/extension-installer": "^1.0", "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", - "phpunit/phpunit": "^12.0", - "swoole/ide-helper": "^5.1", - "apphp/pretty-print": "^0.5.1" + "phpunit/phpunit": "^12.0" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing", From 57037c623914b67fb53a8ef77101b081bb0fc12d Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Sun, 15 Feb 2026 00:00:00 +0200 Subject: [PATCH 08/11] ML-396 removed unneeded packages from composer --- composer.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index d7810b2de..a703df15b 100644 --- a/composer.json +++ b/composer.json @@ -37,7 +37,7 @@ "amphp/parallel": "^1.3", "andrewdalpino/okbloomer": "^1.0", "psr/log": "^1.1|^2.0|^3.0", - "rubixml/numpower": "dev-main", + "rubix/tensor": "^3.0", "symfony/polyfill-mbstring": "^1.0", "symfony/polyfill-php80": "^1.17", "symfony/polyfill-php82": "^1.27", @@ -51,7 +51,8 @@ "phpstan/extension-installer": "^1.0", "phpstan/phpstan": "^2.0", "phpstan/phpstan-phpunit": "^2.0", - "phpunit/phpunit": "^12.0" + "phpunit/phpunit": "^12.0", + "swoole/ide-helper": "^5.1" }, "suggest": { "ext-tensor": "For fast Matrix/Vector computing", From 66ce8ec4ca423d49cad363ad2ad87add52f3baaf Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Wed, 11 Mar 2026 00:41:45 +0200 Subject: [PATCH 09/11] ML-400 convert Network to interface --- CHANGELOG.md | 2 + src/Classifiers/LogisticRegression.php | 3 +- src/Classifiers/MultilayerPerceptron.php | 3 +- src/Classifiers/SoftmaxClassifier.php | 3 +- src/NeuralNet/FeedForward.php | 2 +- src/NeuralNet/Network.php | 254 +------------ .../Networks/Base/Contracts/Network.php | 25 ++ .../FeedForward}/FeedForward.php | 47 ++- src/NeuralNet/Networks/Network.php | 352 ------------------ src/NeuralNet/Snapshots/Snapshot.php | 4 +- src/Regressors/Adaline.php | 3 +- src/Regressors/MLPRegressor.php | 3 +- src/Regressors/MLPRegressor/MLPRegressor.php | 50 +-- tests/NeuralNet/NetworkTest.php | 3 +- .../FeedForward}/FeedForwardTest.php | 89 ++++- tests/NeuralNet/Networks/NetworkTest.php | 137 ------- tests/NeuralNet/SnapshotTest.php | 3 +- tests/NeuralNet/Snapshots/SnapshotTest.php | 10 +- 18 files changed, 194 insertions(+), 799 deletions(-) create mode 100644 src/NeuralNet/Networks/Base/Contracts/Network.php rename src/NeuralNet/{FeedForwards => Networks/FeedForward}/FeedForward.php (83%) delete mode 100644 src/NeuralNet/Networks/Network.php rename tests/NeuralNet/{FeedForwards => Networks/FeedForward}/FeedForwardTest.php (65%) delete mode 100644 tests/NeuralNet/Networks/NetworkTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index b70609e33..fbb08efe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ - Removed output layer L2 Penalty parameter from MLP Learners - Remove Network interface - RBX Serializer only tracks major library version number + - Convert NeuralNet classes to use NDArray instead of Matrix + - Turn back Network interface - 2.5.0 - Added Vantage Point Spatial tree diff --git a/src/Classifiers/LogisticRegression.php b/src/Classifiers/LogisticRegression.php index 8f5f4c2c0..3d749dab7 100644 --- a/src/Classifiers/LogisticRegression.php +++ b/src/Classifiers/LogisticRegression.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -289,7 +290,7 @@ public function train(Dataset $dataset) : void $classes = $dataset->possibleOutcomes(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(1, $this->l2Penalty, true, new Xavier1())], new Binary($classes, $this->costFn), diff --git a/src/Classifiers/MultilayerPerceptron.php b/src/Classifiers/MultilayerPerceptron.php index e296915af..34e3d8fe6 100644 --- a/src/Classifiers/MultilayerPerceptron.php +++ b/src/Classifiers/MultilayerPerceptron.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -370,7 +371,7 @@ public function train(Dataset $dataset) : void $hiddenLayers[] = new Dense(count($classes), 0.0, true, new Xavier1()); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), $hiddenLayers, new Multiclass($classes, $this->costFn), diff --git a/src/Classifiers/SoftmaxClassifier.php b/src/Classifiers/SoftmaxClassifier.php index 3b8581771..560000671 100644 --- a/src/Classifiers/SoftmaxClassifier.php +++ b/src/Classifiers/SoftmaxClassifier.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Classifiers; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -285,7 +286,7 @@ public function train(Dataset $dataset) : void $classes = $dataset->possibleOutcomes(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(count($classes), $this->l2Penalty, true, new Xavier1())], new Multiclass($classes, $this->costFn), diff --git a/src/NeuralNet/FeedForward.php b/src/NeuralNet/FeedForward.php index 5cffe79b1..4849f1681 100644 --- a/src/NeuralNet/FeedForward.php +++ b/src/NeuralNet/FeedForward.php @@ -27,7 +27,7 @@ * @package Rubix/ML * @author Andrew DalPino */ -class FeedForward extends Network +class FeedForward implements Network { /** * The input layer to the network. diff --git a/src/NeuralNet/Network.php b/src/NeuralNet/Network.php index 57e7cfd25..26a57e9d4 100644 --- a/src/NeuralNet/Network.php +++ b/src/NeuralNet/Network.php @@ -2,270 +2,24 @@ namespace Rubix\ML\NeuralNet; -use Tensor\Matrix; -use Rubix\ML\Encoding; -use Rubix\ML\Datasets\Dataset; -use Rubix\ML\Datasets\Labeled; -use Rubix\ML\NeuralNet\Layers\Input; -use Rubix\ML\NeuralNet\Layers\Output; -use Rubix\ML\NeuralNet\Layers\Parametric; -use Rubix\ML\NeuralNet\Optimizers\Adaptive; -use Rubix\ML\NeuralNet\Optimizers\Optimizer; use Traversable; -use function array_reverse; - /** * Network * - * A neural network implementation consisting of an input and output layer and any number - * of intermediate hidden layers. - * * @internal * * @category Machine Learning * @package Rubix/ML * @author Andrew DalPino + * @author Samuel Akopyan */ -class Network +interface Network { /** - * The input layer to the network. - * - * @var Input - */ - protected Input $input; - - /** - * The hidden layers of the network. - * - * @var list - */ - protected array $hidden = [ - // - ]; - - /** - * The pathing of the backward pass through the hidden layers. - * - * @var list - */ - protected array $backPass = [ - // - ]; - - /** - * The output layer of the network. - * - * @var Output - */ - protected Output $output; - - /** - * The gradient descent optimizer used to train the network. - * - * @var Optimizer - */ - protected Optimizer $optimizer; - - /** - * @param Input $input - * @param Layers\Hidden[] $hidden - * @param Output $output - * @param Optimizer $optimizer - */ - public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer) - { - $hidden = array_values($hidden); - - $backPass = array_reverse($hidden); - - $this->input = $input; - $this->hidden = $hidden; - $this->output = $output; - $this->optimizer = $optimizer; - $this->backPass = $backPass; - } - - /** - * Return the input layer. - * - * @return Input - */ - public function input() : Input - { - return $this->input; - } - - /** - * Return an array of hidden layers indexed left to right. - * - * @return list - */ - public function hidden() : array - { - return $this->hidden; - } - - /** - * Return the output layer. - * - * @return Output - */ - public function output() : Output - { - return $this->output; - } - - /** - * Return all the layers in the network. + * Return the layers of the network. * * @return Traversable */ - public function layers() : Traversable - { - yield $this->input; - - yield from $this->hidden; - - yield $this->output; - } - - /** - * Return the number of trainable parameters in the network. - * - * @return int - */ - public function numParams() : int - { - $numParams = 0; - - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $parameter) { - $numParams += $parameter->param()->size(); - } - } - } - - return $numParams; - } - - /** - * Initialize the parameters of the layers and warm the optimizer cache. - */ - public function initialize() : void - { - $fanIn = 1; - - foreach ($this->layers() as $layer) { - $fanIn = $layer->initialize($fanIn); - } - - if ($this->optimizer instanceof Adaptive) { - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $param) { - $this->optimizer->warm($param); - } - } - } - } - } - - /** - * Run an inference pass and return the activations at the output layer. - * - * @param Dataset $dataset - * @return Matrix - */ - public function infer(Dataset $dataset) : Matrix - { - $input = Matrix::quick($dataset->samples())->transpose(); - - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } - - return $input->transpose(); - } - - /** - * Perform a forward and backward pass of the network in one call. Returns - * the loss from the backward pass. - * - * @param Labeled $dataset - * @return float - */ - public function roundtrip(Labeled $dataset) : float - { - $input = Matrix::quick($dataset->samples())->transpose(); - - $this->feed($input); - - $loss = $this->backpropagate($dataset->labels()); - - return $loss; - } - - /** - * Feed a batch through the network and return a matrix of activations at the output later. - * - * @param Matrix $input - * @return Matrix - */ - public function feed(Matrix $input) : Matrix - { - foreach ($this->layers() as $layer) { - $input = $layer->forward($input); - } - - return $input; - } - - /** - * Backpropagate the gradient of the cost function and return the loss. - * - * @param list $labels - * @return float - */ - public function backpropagate(array $labels) : float - { - [$gradient, $loss] = $this->output->back($labels, $this->optimizer); - - foreach ($this->backPass as $layer) { - $gradient = $layer->back($gradient, $this->optimizer); - } - - return $loss; - } - - /** - * Export the network architecture as a graph in dot format. - * - * @return Encoding - */ - public function exportGraphviz() : Encoding - { - $dot = 'digraph Tree {' . PHP_EOL; - $dot .= ' node [shape=box, fontname=helvetica];' . PHP_EOL; - - $layerNum = 0; - - foreach ($this->layers() as $layer) { - ++$layerNum; - - $dot .= " N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL; - - if ($layerNum > 1) { - $parentId = $layerNum - 1; - - $dot .= " N{$parentId} -> N{$layerNum};" . PHP_EOL; - } - } - - $dot .= '}'; - - return new Encoding($dot); - } + public function layers() : Traversable; } diff --git a/src/NeuralNet/Networks/Base/Contracts/Network.php b/src/NeuralNet/Networks/Base/Contracts/Network.php new file mode 100644 index 000000000..b6dba2ea3 --- /dev/null +++ b/src/NeuralNet/Networks/Base/Contracts/Network.php @@ -0,0 +1,25 @@ + + */ +interface Network +{ + /** + * Return the layers of the network. + * + * @return Traversable + */ + public function layers() : Traversable; +} diff --git a/src/NeuralNet/FeedForwards/FeedForward.php b/src/NeuralNet/Networks/FeedForward/FeedForward.php similarity index 83% rename from src/NeuralNet/FeedForwards/FeedForward.php rename to src/NeuralNet/Networks/FeedForward/FeedForward.php index aea7fe6ed..e4ff715bf 100644 --- a/src/NeuralNet/FeedForwards/FeedForward.php +++ b/src/NeuralNet/Networks/FeedForward/FeedForward.php @@ -1,22 +1,21 @@ */ -class FeedForward extends Network +class FeedForward implements Network { /** * The input layer to the network. @@ -195,6 +194,40 @@ public function infer(Dataset $dataset) : NDArray return NumPower::transpose($input, [1, 0]); } + public function inferNew(Dataset $dataset) : NDArray + { + if ($dataset->empty()) { + return NumPower::array([]); + } + + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); + + foreach ($this->layers() as $layer) { + $input = $layer->infer($input); + } + + $shape = $input->shape(); + + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + + return NumPower::transpose($input, [1, 0]); + } + + /** + * Normalize samples to a strict list-of-lists with sequential numeric keys. + * NumPower's C extension expects packed arrays and can error or behave unpredictably + * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations). + * @param array $samples + * @return array + */ + private function normalizeSamples(array $samples) : array + { + return array_map('array_values', array_values($samples)); + } + /** * Perform a forward and backward pass of the network in one call. Returns * the loss from the backward pass. diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php deleted file mode 100644 index c504e43bf..000000000 --- a/src/NeuralNet/Networks/Network.php +++ /dev/null @@ -1,352 +0,0 @@ - - */ -class Network -{ - /** - * The input layer to the network. - * - * @var Input - */ - protected Input $input; - - /** - * The hidden layers of the network. - * - * @var list - */ - protected array $hidden = [ - // - ]; - - /** - * The pathing of the backward pass through the hidden layers. - * - * @var list - */ - protected array $backPass = [ - // - ]; - - /** - * The output layer of the network. - * - * @var Output - */ - protected Output $output; - - /** - * The gradient descent optimizer used to train the network. - * - * @var Optimizer - */ - protected Optimizer $optimizer; - - /** - * @param Input $input - * @param Hidden[] $hidden - * @param Output $output - * @param Optimizer $optimizer - */ - public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer) - { - $hidden = array_values($hidden); - - $backPass = array_reverse($hidden); - - $this->input = $input; - $this->hidden = $hidden; - $this->output = $output; - $this->optimizer = $optimizer; - $this->backPass = $backPass; - } - - /** - * Return the input layer. - * - * @return Input - */ - public function input() : Input - { - return $this->input; - } - - /** - * Return an array of hidden layers indexed left to right. - * - * @return list - */ - public function hidden() : array - { - return $this->hidden; - } - - /** - * Return the output layer. - * - * @return Output - */ - public function output() : Output - { - return $this->output; - } - - /** - * Return all the layers in the network. - * - * @return Traversable - */ - public function layers() : Traversable - { - yield $this->input; - - yield from $this->hidden; - - yield $this->output; - } - - /** - * Return the number of trainable parameters in the network. - * - * @return int - */ - public function numParams() : int - { - $numParams = 0; - - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $parameter) { - $numParams += $parameter->param()->size(); - } - } - } - - return $numParams; - } - - /** - * Initialize the parameters of the layers and warm the optimizer cache. - */ - public function initialize() : void - { - $fanIn = 1; - - foreach ($this->layers() as $layer) { - $fanIn = $layer->initialize($fanIn); - } - - if ($this->optimizer instanceof Adaptive) { - foreach ($this->layers() as $layer) { - if ($layer instanceof Parametric) { - foreach ($layer->parameters() as $param) { - $this->optimizer->warm($param); - } - } - } - } - } - - /** - * Run an inference pass and return the activations at the output layer. - * - * @param Dataset $dataset - * @return NDArray - */ - public function infer(Dataset $dataset) : NDArray - { - if ($dataset->empty()) { - return NumPower::array([]); - } - - $normalizedSamples = $this->normalizeSamples($dataset->samples()); - $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); - - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } - - $shape = $input->shape(); - - if (count($shape) === 1) { - $input = NumPower::reshape($input, [1, $shape[0]]); - } - - return NumPower::transpose($input, [1, 0]); - } - - /** - * Perform a forward and backward pass of the network in one call. Returns - * the loss from the backward pass. - * - * @param Labeled $dataset - * @return float - */ - public function roundtrip(Labeled $dataset) : float - { - if ($dataset->empty()) { - return 0.0; - } - - $normalizedSamples = $this->normalizeSamples($dataset->samples()); - $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); - - $this->feed($input); - - $loss = $this->backpropagate($dataset->labels()); - - return $loss; - } - - /** - * Feed a batch through the network and return a matrix of activations at the output later. - * - * @param NDArray $input - * @return NDArray - */ - public function feed(NDArray $input) : NDArray - { - foreach ($this->layers() as $layer) { - $input = $layer->forward($input); - } - - return $input; - } - - /** - * Backpropagate the gradient of the cost function and return the loss. - * - * @param list $labels - * @return float - */ - public function backpropagate(array $labels) : float - { - [$gradient, $loss] = $this->output->back($labels, $this->optimizer); - - foreach ($this->backPass as $layer) { - $gradient = $layer->back($gradient, $this->optimizer); - } - - return $loss; - } - - /** - * Export the network architecture as a graph in dot format. - * - * @return Encoding - */ - public function exportGraphviz() : Encoding - { - $dot = 'digraph Tree {' . PHP_EOL; - $dot .= ' node [shape=box, fontname=helvetica];' . PHP_EOL; - - $layerNum = 0; - - foreach ($this->layers() as $layer) { - ++$layerNum; - - $dot .= " N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL; - - if ($layerNum > 1) { - $parentId = $layerNum - 1; - - $dot .= " N{$parentId} -> N{$layerNum};" . PHP_EOL; - } - } - - $dot .= '}'; - - return new Encoding($dot); - } - - /** - * @param list> $rows - * @return list> - */ - private function rowsToColumns(array $rows) : array - { - $numSamples = count($rows); - $numFeatures = isset($rows[0]) && is_array($rows[0]) ? count($rows[0]) : 0; - - $columns = []; - - for ($j = 0; $j < $numFeatures; ++$j) { - $column = []; - - for ($i = 0; $i < $numSamples; ++$i) { - $column[] = $rows[$i][$j]; - } - - $columns[] = $column; - } - - return $columns; - } - - /** - * Normalize samples to a strict list-of-lists with sequential numeric keys. - * NumPower's C extension expects packed arrays and can error or behave unpredictably - * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations). - * @param array $samples - * @return array - */ - private function normalizeSamples(array $samples) : array - { - return array_map('array_values', array_values($samples)); - } - - /** - * @param list> $columns - * @return list> - */ - private function columnsToRows(array $columns) : array - { - $numFeatures = count($columns); - $numSamples = isset($columns[0]) && is_array($columns[0]) ? count($columns[0]) : 0; - - $rows = []; - - for ($i = 0; $i < $numSamples; ++$i) { - $row = []; - - for ($j = 0; $j < $numFeatures; ++$j) { - $row[] = $columns[$j][$i]; - } - - $rows[] = $row; - } - - return $rows; - } -} diff --git a/src/NeuralNet/Snapshots/Snapshot.php b/src/NeuralNet/Snapshots/Snapshot.php index 033224d5c..c4bd33f72 100644 --- a/src/NeuralNet/Snapshots/Snapshot.php +++ b/src/NeuralNet/Snapshots/Snapshot.php @@ -2,9 +2,9 @@ namespace Rubix\ML\NeuralNet\Snapshots; -use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric; use Rubix\ML\Exceptions\InvalidArgumentException; -use Rubix\ML\NeuralNet\Networks\Network; +use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric; +use Rubix\ML\NeuralNet\Networks\Base\Contracts\Network; use Rubix\ML\NeuralNet\Parameters\Parameter; /** diff --git a/src/Regressors/Adaline.php b/src/Regressors/Adaline.php index 22e8201d8..40940a1f0 100644 --- a/src/Regressors/Adaline.php +++ b/src/Regressors/Adaline.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Regressors; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -277,7 +278,7 @@ public function train(Dataset $dataset) : void { DatasetIsNotEmpty::with($dataset)->check(); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), [new Dense(1, $this->l2Penalty, true, new Xavier2())], new Continuous($this->costFn), diff --git a/src/Regressors/MLPRegressor.php b/src/Regressors/MLPRegressor.php index 710e83f76..769eee4f9 100644 --- a/src/Regressors/MLPRegressor.php +++ b/src/Regressors/MLPRegressor.php @@ -2,6 +2,7 @@ namespace Rubix\ML\Regressors; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\Online; use Rubix\ML\Learner; use Rubix\ML\Verbose; @@ -356,7 +357,7 @@ public function train(Dataset $dataset) : void $hiddenLayers[] = new Dense(1, 0.0, true, new Xavier2()); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), $hiddenLayers, new Continuous($this->costFn), diff --git a/src/Regressors/MLPRegressor/MLPRegressor.php b/src/Regressors/MLPRegressor/MLPRegressor.php index b95fe7e49..d82f63a4f 100644 --- a/src/Regressors/MLPRegressor/MLPRegressor.php +++ b/src/Regressors/MLPRegressor/MLPRegressor.php @@ -2,45 +2,45 @@ namespace Rubix\ML\Regressors\MLPRegressor; -use Rubix\ML\Online; -use Rubix\ML\Learner; -use Rubix\ML\Verbose; +use Generator; +use Rubix\ML\CrossValidation\Metrics\Metric; +use Rubix\ML\CrossValidation\Metrics\RMSE; +use Rubix\ML\Datasets\Dataset; use Rubix\ML\DataType; use Rubix\ML\Encoding; use Rubix\ML\Estimator; -use Rubix\ML\Persistable; use Rubix\ML\EstimatorType; +use Rubix\ML\Exceptions\InvalidArgumentException; +use Rubix\ML\Exceptions\RuntimeException; use Rubix\ML\Helpers\Params; -use Rubix\ML\Datasets\Dataset; -use Rubix\ML\Traits\LoggerAware; -use Rubix\ML\NeuralNet\Snapshots\Snapshot; -use Rubix\ML\NeuralNet\Networks\Network; -use Rubix\ML\NeuralNet\Layers\Dense\Dense; +use Rubix\ML\Learner; +use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\RegressionLoss; +use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares; +use Rubix\ML\NeuralNet\Initializers\Xavier\XavierUniform; use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden; -use Rubix\ML\Traits\AutotrackRevisions; -use Rubix\ML\NeuralNet\Optimizers\Adam\Adam; use Rubix\ML\NeuralNet\Layers\Continuous\Continuous; -use Rubix\ML\CrossValidation\Metrics\RMSE; +use Rubix\ML\NeuralNet\Layers\Dense\Dense; use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D; +use Rubix\ML\NeuralNet\Networks\Base\Contracts\Network; +use Rubix\ML\NeuralNet\Networks\FeedForward\FeedForward; +use Rubix\ML\NeuralNet\Optimizers\Adam\Adam; use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer; -use Rubix\ML\NeuralNet\Initializers\Xavier\XavierUniform; -use Rubix\ML\CrossValidation\Metrics\Metric; +use Rubix\ML\NeuralNet\Snapshots\Snapshot; +use Rubix\ML\Online; +use Rubix\ML\Persistable; +use Rubix\ML\Specifications\DatasetHasDimensionality; use Rubix\ML\Specifications\DatasetIsLabeled; use Rubix\ML\Specifications\DatasetIsNotEmpty; -use Rubix\ML\Specifications\SpecificationChain; -use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares; -use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\RegressionLoss; -use Rubix\ML\Specifications\DatasetHasDimensionality; -use Rubix\ML\Specifications\LabelsAreCompatibleWithLearner; use Rubix\ML\Specifications\EstimatorIsCompatibleWithMetric; +use Rubix\ML\Specifications\LabelsAreCompatibleWithLearner; use Rubix\ML\Specifications\SamplesAreCompatibleWithEstimator; -use Rubix\ML\Exceptions\InvalidArgumentException; -use Rubix\ML\Exceptions\RuntimeException; -use Generator; - -use function is_nan; +use Rubix\ML\Specifications\SpecificationChain; +use Rubix\ML\Traits\AutotrackRevisions; +use Rubix\ML\Traits\LoggerAware; +use Rubix\ML\Verbose; use function count; use function get_object_vars; +use function is_nan; use function number_format; /** @@ -357,7 +357,7 @@ public function train(Dataset $dataset) : void $hiddenLayers[] = new Dense(1, 0.0, true, new XavierUniform()); - $this->network = new Network( + $this->network = new FeedForward( new Placeholder1D($dataset->numFeatures()), $hiddenLayers, new Continuous($this->costFn), diff --git a/tests/NeuralNet/NetworkTest.php b/tests/NeuralNet/NetworkTest.php index 1421c0a35..fed2bb57d 100644 --- a/tests/NeuralNet/NetworkTest.php +++ b/tests/NeuralNet/NetworkTest.php @@ -7,6 +7,7 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; use Rubix\ML\Datasets\Labeled; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\NeuralNet\Layers\Hidden; use Rubix\ML\NeuralNet\Layers\Input; use Rubix\ML\NeuralNet\Network; @@ -63,7 +64,7 @@ classes: ['yes', 'no', 'maybe'], costFn: new CrossEntropy() ); - $this->network = new Network( + $this->network = new FeedForward( input: $this->input, hidden: $this->hidden, output: $this->output, diff --git a/tests/NeuralNet/FeedForwards/FeedForwardTest.php b/tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php similarity index 65% rename from tests/NeuralNet/FeedForwards/FeedForwardTest.php rename to tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php index 84226fc70..71bca4c25 100644 --- a/tests/NeuralNet/FeedForwards/FeedForwardTest.php +++ b/tests/NeuralNet/Networks/FeedForward/FeedForwardTest.php @@ -1,26 +1,27 @@ network = new FeedForward($this->input, $this->hidden, $this->output, new Adam(0.001)); } + #[Test] + #[TestDox('Layers iterator yields all layers')] + public function testLayers() : void + { + $count = 0; + + foreach ($this->network->layers() as $item) { + ++$count; + } + + self::assertSame(7, $count); + } + + #[Test] + #[TestDox('Input layer is Placeholder1D')] + public function testInput() : void + { + self::assertInstanceOf(Placeholder1D::class, $this->network->input()); + } + + #[Test] + #[TestDox('Hidden layers count')] + public function testHidden() : void + { + self::assertCount(5, $this->network->hidden()); + } + + #[Test] + #[TestDox('Num params')] + public function testNumParams() : void + { + $this->network->initialize(); + + self::assertEquals(103, $this->network->numParams()); + } + #[Test] #[TestDox('Builds a feed-forward network instance')] public function build() : void @@ -130,4 +167,30 @@ public function roundtrip() : void self::assertIsFloat($loss); } + + + #[Test] + #[TestDox('Normalize samples returns packed list-of-lists for NumPower')] + public function testNormalizeSamplesReturnsPackedListOfLists() : void + { + $samples = [ + 10 => [2 => 1.0, 5 => 2.0, 9 => 10], + 20 => [2 => 3.0, 7 => 4.0, 1 => 1.0], + ]; + + $method = new ReflectionMethod(FeedForward::class, 'normalizeSamples'); + $method->setAccessible(true); + + /** @var array $normalized */ + $normalized = $method->invoke($this->network, $samples); + + self::assertTrue(array_is_list($normalized)); + self::assertCount(2, $normalized); + + foreach ($normalized as $row) { + self::assertTrue(array_is_list($row)); + } + + self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized); + } } diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php deleted file mode 100644 index 586d1ffbc..000000000 --- a/tests/NeuralNet/Networks/NetworkTest.php +++ /dev/null @@ -1,137 +0,0 @@ -dataset = Labeled::quick( - samples: [ - [1.0, 2.5], - [0.1, 0.0], - [0.002, -6.0], - ], - labels: ['yes', 'no', 'maybe'] - ); - - $this->input = new Placeholder1D(2); - - $this->hidden = [ - new Dense(neurons: 10), - new Activation(new ReLU()), - new Dense(neurons: 5), - new Activation(new ReLU()), - new Dense(neurons: 3), - ]; - - $this->output = new Multiclass( - classes: ['yes', 'no', 'maybe'], - costFn: new CrossEntropy() - ); - - $this->network = new Network( - input: $this->input, - hidden: $this->hidden, - output: $this->output, - optimizer: new Adam(0.001) - ); - } - - #[Test] - #[TestDox('Layers iterator yields all layers')] - public function testLayers() : void - { - $count = 0; - - foreach ($this->network->layers() as $item) { - ++$count; - } - - self::assertSame(7, $count); - } - - #[Test] - #[TestDox('Input layer is Placeholder1D')] - public function testInput() : void - { - self::assertInstanceOf(Placeholder1D::class, $this->network->input()); - } - - #[Test] - #[TestDox('Hidden layers count')] - public function testHidden() : void - { - self::assertCount(5, $this->network->hidden()); - } - - #[Test] - #[TestDox('Num params')] - public function testNumParams() : void - { - $this->network->initialize(); - - self::assertEquals(103, $this->network->numParams()); - } - - #[Test] - #[TestDox('Normalize samples returns packed list-of-lists for NumPower')] - public function testNormalizeSamplesReturnsPackedListOfLists() : void - { - $samples = [ - 10 => [2 => 1.0, 5 => 2.0, 9 => 10], - 20 => [2 => 3.0, 7 => 4.0, 1 => 1.0], - ]; - - $method = new ReflectionMethod(Network::class, 'normalizeSamples'); - $method->setAccessible(true); - - /** @var array $normalized */ - $normalized = $method->invoke($this->network, $samples); - - self::assertTrue(array_is_list($normalized)); - self::assertCount(2, $normalized); - - foreach ($normalized as $row) { - self::assertTrue(array_is_list($row)); - } - - self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized); - } -} diff --git a/tests/NeuralNet/SnapshotTest.php b/tests/NeuralNet/SnapshotTest.php index bdf41829e..5cad02e30 100644 --- a/tests/NeuralNet/SnapshotTest.php +++ b/tests/NeuralNet/SnapshotTest.php @@ -6,6 +6,7 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; +use Rubix\ML\NeuralNet\FeedForward; use Rubix\ML\NeuralNet\Snapshot; use Rubix\ML\NeuralNet\Network; use Rubix\ML\NeuralNet\Layers\Dense; @@ -27,7 +28,7 @@ class SnapshotTest extends TestCase public function testTake() : void { - $network = new Network( + $network = new FeedForward( input: new Placeholder1D(1), hidden: [ new Dense(10), diff --git a/tests/NeuralNet/Snapshots/SnapshotTest.php b/tests/NeuralNet/Snapshots/SnapshotTest.php index ecde317e3..1aa4e2c7e 100644 --- a/tests/NeuralNet/Snapshots/SnapshotTest.php +++ b/tests/NeuralNet/Snapshots/SnapshotTest.php @@ -7,11 +7,11 @@ use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Group; use Rubix\ML\Exceptions\InvalidArgumentException; +use Rubix\ML\NeuralNet\Layers\Activation\Activation; +use Rubix\ML\NeuralNet\Layers\Binary\Binary; +use Rubix\ML\NeuralNet\Networks\FeedForward\FeedForward; use Rubix\ML\NeuralNet\Snapshots\Snapshot; -use Rubix\ML\NeuralNet\Networks\Network; use Rubix\ML\NeuralNet\Layers\Dense\Dense; -use Rubix\ML\NeuralNet\Layers\Binary\Binary; -use Rubix\ML\NeuralNet\Layers\Activation\Activation; use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D; use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic; use Rubix\ML\NeuralNet\ActivationFunctions\ELU\ELU; @@ -24,7 +24,7 @@ class SnapshotTest extends TestCase { protected Snapshot $snapshot; - protected Network $network; + protected FeedForward $network; public function testConstructorThrowsWithWrongParameters() : void { @@ -39,7 +39,7 @@ public function testConstructorThrowsWithWrongParameters() : void public function testTake() : void { - $network = new Network( + $network = new FeedForward( input: new Placeholder1D(1), hidden: [ new Dense(10), From 0db4266fa03860dabeaee48124e67ffa831289b6 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Wed, 11 Mar 2026 01:53:16 +0200 Subject: [PATCH 10/11] ML-400 added normalizeSamples to FeedForward --- .../Networks/FeedForward/FeedForward.php | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/NeuralNet/Networks/FeedForward/FeedForward.php b/src/NeuralNet/Networks/FeedForward/FeedForward.php index e4ff715bf..3d7014439 100644 --- a/src/NeuralNet/Networks/FeedForward/FeedForward.php +++ b/src/NeuralNet/Networks/FeedForward/FeedForward.php @@ -72,6 +72,13 @@ class FeedForward implements Network */ protected Optimizer $optimizer; + /** + * Whether to normalize the samples. + * + * @var bool + */ + private bool $normalizeSamples; + /** * @param Input $input * @param Hidden[] $hidden @@ -89,6 +96,8 @@ public function __construct(Input $input, array $hidden, Output $output, Optimiz $this->output = $output; $this->optimizer = $optimizer; $this->backPass = $backPass; + + $this->normalizeSamples = false; } /** @@ -185,32 +194,29 @@ public function initialize() : void */ public function infer(Dataset $dataset) : NDArray { - $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } - - return NumPower::transpose($input, [1, 0]); - } + if ($this->normalizeSamples) { + if ($dataset->empty()) { + return NumPower::array([]); + } - public function inferNew(Dataset $dataset) : NDArray - { - if ($dataset->empty()) { - return NumPower::array([]); - } + $normalizedSamples = $this->normalizeSamples($dataset->samples()); + $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); - $normalizedSamples = $this->normalizeSamples($dataset->samples()); - $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]); + foreach ($this->layers() as $layer) { + $input = $layer->infer($input); + } - foreach ($this->layers() as $layer) { - $input = $layer->infer($input); - } + $shape = $input->shape(); - $shape = $input->shape(); + if (count($shape) === 1) { + $input = NumPower::reshape($input, [1, $shape[0]]); + } + } else { + $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]); - if (count($shape) === 1) { - $input = NumPower::reshape($input, [1, $shape[0]]); + foreach ($this->layers() as $layer) { + $input = $layer->infer($input); + } } return NumPower::transpose($input, [1, 0]); From 7454f2526f0acfa2c092d08ec53232b5a3cd38e7 Mon Sep 17 00:00:00 2001 From: Samuel Akopyan Date: Wed, 11 Mar 2026 01:54:01 +0200 Subject: [PATCH 11/11] ML-400 extended Network interface --- .../Networks/Base/Contracts/Network.php | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/NeuralNet/Networks/Base/Contracts/Network.php b/src/NeuralNet/Networks/Base/Contracts/Network.php index b6dba2ea3..c6f34abbf 100644 --- a/src/NeuralNet/Networks/Base/Contracts/Network.php +++ b/src/NeuralNet/Networks/Base/Contracts/Network.php @@ -2,6 +2,9 @@ namespace Rubix\ML\NeuralNet\Networks\Base\Contracts; +use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden; +use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input; +use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output; use Traversable; /** @@ -22,4 +25,25 @@ interface Network * @return Traversable */ public function layers() : Traversable; + + /** + * Return the input layer. + * + * @return Input + */ + public function input() : Input; + + /** + * Return an array of hidden layers indexed left to right. + * + * @return list + */ + public function hidden() : array; + + /** + * Return the output layer. + * + * @return Output + */ + public function output() : Output; }