From 78bcf8fb094ff13e31bb4610f8086306fc9b5d64 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Sun, 14 Dec 2025 20:45:23 +0530 Subject: [PATCH 01/23] stepwisefit: add baseline tests documenting current Octave behavior --- inst/stepwisefit.m | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 34f44f636..5545e823f 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -165,4 +165,36 @@ %! assert(X_use, [4 1]) %! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) +%!test +%! y = randn (20, 1); +%! X = randn (20, 3); +%! X_use = stepwisefit (y, X); +%! assert (isnumeric (X_use)); +%! assert (all (X_use(:) >= 1)); +%! assert (all (X_use(:) <= columns (X))); + +%!test +%! y = randn (15, 1); +%! X = randn (15, 2); +%! try +%! [X_use, b] = stepwisefit (y, X); +%! assert (isnumeric (b)); +%! catch +%! assert (true); +%! end_try_catch +%!test +%! y = [1; 2; NaN; 4]; +%! X = randn (4, 2); +%! X_use = stepwisefit (y, X); +%! assert (isnumeric (X_use)); + +%!test +%! y = randn (10, 1); +%! X = randn (10, 2); +%! try +%! stepwisefit (X, y); +%! error ("Expected error not thrown"); +%! catch +%! assert (true); +%! end_try_catch From a4bb012efebb1cc8aa04ad66d0954252c2151835 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 15 Dec 2025 00:33:53 +0530 Subject: [PATCH 02/23] stepwisefit: initialize documented outputs to avoid undefined return errors --- inst/stepwisefit.m | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 5545e823f..a94310f9b 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -1,5 +1,6 @@ ## Copyright (C) 2013-2021 Nir Krakauer ## Copyright (C) 2014 Mikael Kurula +## Copyright (C) 2025 Jayant Chauhan <0001jayant@gmail.com> ## ## This file is part of the statistics package for GNU Octave. ## @@ -56,6 +57,13 @@ ## @end deftypefn function [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, penter = 0.05, premove = 0.1, method = "corr") + % Phase-1.1: initialize all documented outputs to avoid undefined returns + X_use = []; + b = []; + bint = []; + r = []; + rint = []; + stats = struct (); if nargin >= 3 && isempty(penter) penter = 0.05; @@ -198,3 +206,14 @@ %! catch %! assert (true); %! end_try_catch + +%!test +%! % Phase-1.1: requesting all documented outputs must not crash +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! [X_use, b, bint, r, rint, stats] = stepwisefit (y, X); +%! assert (isnumeric (X_use)); +%! catch +%! error ("stepwisefit crashed when requesting documented outputs"); +%! end_try_catch From 80ac34ecdfce37dfc901ff45a52448e7ab0fca64 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 15 Dec 2025 01:31:28 +0530 Subject: [PATCH 03/23] stepwisefit: validate scalar penter before calling regress --- inst/stepwisefit.m | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index a94310f9b..831f632c6 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -73,6 +73,9 @@ premove = 0.1; endif +if (! isempty (penter) && ! isscalar (penter)) + error ("stepwisefit: penter must be a scalar value"); +endif #remove any rows with missing entries notnans = !any (isnan ([y X]) , 2); @@ -217,3 +220,43 @@ %! catch %! error ("stepwisefit crashed when requesting documented outputs"); %! end_try_catch + +%!test +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! [X_use, b, bint, r, rint, stats] = stepwisefit (y, X); +%! assert (isnumeric (X_use)); +%! catch err +%! error ("regress alpha forwarding failed: %s", err.message); +%! end_try_catch + +%!test +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! [X_use, b] = stepwisefit (y, X, []); +%! assert (isnumeric (X_use)); +%! catch err +%! error ("empty penter broke regress: %s", err.message); +%! end_try_catch + +%!test +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! stepwisefit (y, X, NaN); +%! assert (true); +%! catch err +%! error ("NaN penter broke regress: %s", err.message); +%! end_try_catch + +%!test +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! stepwisefit (y, X, [0.05 0.1]); +%! error ("Expected error not thrown for non-scalar penter"); +%! catch +%! assert (true); +%! end_try_catch \ No newline at end of file From 65379856266da869565cc7019672a0b1f94fa556 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 15 Dec 2025 09:31:25 +0530 Subject: [PATCH 04/23] stepwisefit: BISTs for input validation --- inst/stepwisefit.m | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 831f632c6..a68c7681b 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -259,4 +259,29 @@ %! error ("Expected error not thrown for non-scalar penter"); %! catch %! assert (true); -%! end_try_catch \ No newline at end of file +%! end_try_catch + + +%!test +%! % Phase-1.3: non-scalar premove must error early +%! y = randn (20, 1); +%! X = randn (20, 3); +%! try +%! stepwisefit (y, X, 0.05, [0.1 0.2]); +%! error ("Expected error not thrown for non-scalar premove"); +%! catch +%! assert (true); +%! end_try_catch + +%!test +%! % Phase-1.3: premove used in drop logic must not crash +%! rand ("seed", 1); % make behavior deterministic +%! y = randn (30, 1); +%! X = randn (30, 3); +%! try +%! % encourage add then drop by making premove very permissive +%! stepwisefit (y, X, 0.05, [0.5 0.6], "corr"); +%! assert (true); +%! catch err +%! error ("non-scalar premove broke drop logic: %s", err.message); +%! end_try_catch From 2f2fb52e526cdd305bcd4a2927671587dbcba3bc Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 12:15:43 +0530 Subject: [PATCH 05/23] stepwisefit: remove unwanted BISTs --- inst/stepwisefit.m | 110 --------------------------------------------- 1 file changed, 110 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index a68c7681b..bedd3223c 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -175,113 +175,3 @@ %! [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, [], [], "p"); %! assert(X_use, [4 1]) %! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) - -%!test -%! y = randn (20, 1); -%! X = randn (20, 3); -%! X_use = stepwisefit (y, X); -%! assert (isnumeric (X_use)); -%! assert (all (X_use(:) >= 1)); -%! assert (all (X_use(:) <= columns (X))); - -%!test -%! y = randn (15, 1); -%! X = randn (15, 2); -%! try -%! [X_use, b] = stepwisefit (y, X); -%! assert (isnumeric (b)); -%! catch -%! assert (true); -%! end_try_catch - -%!test -%! y = [1; 2; NaN; 4]; -%! X = randn (4, 2); -%! X_use = stepwisefit (y, X); -%! assert (isnumeric (X_use)); - -%!test -%! y = randn (10, 1); -%! X = randn (10, 2); -%! try -%! stepwisefit (X, y); -%! error ("Expected error not thrown"); -%! catch -%! assert (true); -%! end_try_catch - -%!test -%! % Phase-1.1: requesting all documented outputs must not crash -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! [X_use, b, bint, r, rint, stats] = stepwisefit (y, X); -%! assert (isnumeric (X_use)); -%! catch -%! error ("stepwisefit crashed when requesting documented outputs"); -%! end_try_catch - -%!test -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! [X_use, b, bint, r, rint, stats] = stepwisefit (y, X); -%! assert (isnumeric (X_use)); -%! catch err -%! error ("regress alpha forwarding failed: %s", err.message); -%! end_try_catch - -%!test -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! [X_use, b] = stepwisefit (y, X, []); -%! assert (isnumeric (X_use)); -%! catch err -%! error ("empty penter broke regress: %s", err.message); -%! end_try_catch - -%!test -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! stepwisefit (y, X, NaN); -%! assert (true); -%! catch err -%! error ("NaN penter broke regress: %s", err.message); -%! end_try_catch - -%!test -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! stepwisefit (y, X, [0.05 0.1]); -%! error ("Expected error not thrown for non-scalar penter"); -%! catch -%! assert (true); -%! end_try_catch - - -%!test -%! % Phase-1.3: non-scalar premove must error early -%! y = randn (20, 1); -%! X = randn (20, 3); -%! try -%! stepwisefit (y, X, 0.05, [0.1 0.2]); -%! error ("Expected error not thrown for non-scalar premove"); -%! catch -%! assert (true); -%! end_try_catch - -%!test -%! % Phase-1.3: premove used in drop logic must not crash -%! rand ("seed", 1); % make behavior deterministic -%! y = randn (30, 1); -%! X = randn (30, 3); -%! try -%! % encourage add then drop by making premove very permissive -%! stepwisefit (y, X, 0.05, [0.5 0.6], "corr"); -%! assert (true); -%! catch err -%! error ("non-scalar premove broke drop logic: %s", err.message); -%! end_try_catch From c02a78521659c693b248391e658a85ecc55f8a30 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 12:35:35 +0530 Subject: [PATCH 06/23] stepwisefit: cleanup --- inst/stepwisefit.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index bedd3223c..2912d8f78 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -57,7 +57,7 @@ ## @end deftypefn function [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, penter = 0.05, premove = 0.1, method = "corr") - % Phase-1.1: initialize all documented outputs to avoid undefined returns + %initialize all documented outputs to avoid undefined returns X_use = []; b = []; bint = []; From be7e7e7e95569554f24247c4208055e49b5a5bcd Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 13:33:57 +0530 Subject: [PATCH 07/23] stepwisefit1 :wrapper around stepwisefit --- inst/stepwisefit1.m | 125 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 inst/stepwisefit1.m diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m new file mode 100644 index 000000000..bf9c7c5ca --- /dev/null +++ b/inst/stepwisefit1.m @@ -0,0 +1,125 @@ +## Copyright (C) 2025 Jayant Chauhan <0001jayant@gmail.com> +## +## This file is part of the statistics package for GNU Octave. +## +## This program is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, see . + + + +function [b, se, pval, finalmodel, stats, nextstep, history] = ... + stepwisefit1 (X, y) + + if (nargin != 2) + error ("stepwisefit1: exactly two input arguments required"); + endif + + if (! ismatrix (X) || ! isvector (y)) + error ("stepwisefit1: invalid input dimensions"); + endif + + y = y(:); + + ## Handle missing values + wasnan = any (isnan ([X y]), 2); + Xc = X(!wasnan, :); + yc = y(!wasnan); + + n = rows (Xc); + p = columns (Xc); + + ## Stepwise selection (legacy implementation) + X_use = stepwisefit (yc, Xc); + + ## Final regression on selected predictors + Xfinal = [ones(n,1), Xc(:, X_use)]; + [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); + + ## Allocate outputs + b = zeros (p,1); + se = zeros (p,1); + pval = zeros (p,1); + + df = n - columns (Xfinal); + + ## Included predictors + b(X_use) = B(2:end); + se(X_use) = (BINT(2:end,2) - B(2:end)) ./ tinv (0.975, df); + pval(X_use) = 2 * (1 - tcdf (abs (B(2:end) ./ se(X_use)), df)); + + ## Excluded predictors: conditional refit + excluded = setdiff (1:p, X_use); + + for j = excluded + Xj = [ones(n,1), Xc(:, [X_use j])]; + [Bj, BjINT] = regress (yc, Xj); + + bj = Bj(end); + sej = (BjINT(end,2) - bj) ./ tinv (0.975, n - columns (Xj)); + + b(j) = bj; + se(j) = sej; + pval(j) = 2 * (1 - tcdf (abs (bj / sej), n - columns (Xj))); + endfor + + ## Final model indicator + finalmodel = false (1,p); + finalmodel(X_use) = true; + + ## Stats structure + stats = struct (); + stats.source = "stepwisefit"; + stats.df0 = numel (X_use); + stats.dfe = n - stats.df0 - 1; + stats.SStotal = sum ((yc - mean (yc)).^2); + stats.SSresid = sum (R.^2); + stats.rmse = sqrt (stats.SSresid / stats.dfe); + stats.intercept = B(1); + stats.wasnan = wasnan; + + ## Placeholders for future phases + nextstep = 0; + history = struct (); + +endfunction + + +%!test +%! % S1.2: default full outputs (numeric contract) +%! X = [7 26 6 60; +%! 1 29 15 52; +%! 11 56 8 20; +%! 11 31 8 47; +%! 7 52 6 33; +%! 11 55 9 22; +%! 3 71 17 6; +%! 1 31 22 44; +%! 2 54 18 22; +%! 21 47 4 26; +%! 1 40 23 34; +%! 11 66 9 12; +%! 10 68 8 12]; +%! y = [78.5; 74.3; 104.3; 87.6; 95.9; 109.2; +%! 102.7; 72.5; 93.1; 115.9; 83.8; 113.3; 109.4]; +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! assert (finalmodel, [true false false true]); +%! assert (b, [1.4400; 0.4161; -0.4100; -0.6140], 1e-4); +%! assert (se, [0.1384; 0.1856; 0.1992; 0.0486], 1e-4); +%! assert (pval, [0; 0.0517; 0.0697; 0], 1e-4); +%! assert (stats.rmse, 2.7343, 1e-4); +%! assert (stats.SStotal, 2715.7631, 1e-3); +%! assert (stats.SSresid, 74.7621, 1e-4); +%! assert (stats.df0, 2); +%! assert (stats.dfe, 10); +%! assert (stats.intercept, 103.0974, 1e-4); + From 6e8a48055eb97e66c0c66c235925cabea97a98df Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 13:34:38 +0530 Subject: [PATCH 08/23] stepwisefit1 :textinfo update --- inst/stepwisefit1.m | 49 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index bf9c7c5ca..407fcedc9 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -15,7 +15,54 @@ ## You should have received a copy of the GNU General Public License ## along with this program; if not, see . - +# -*- texinfo -*- +## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}) +## +## Stepwise linear regression with expanded diagnostic outputs. +## +## This function performs stepwise variable selection using linear regression +## and returns coefficient estimates, standard errors, p-values, and model +## diagnostics in a consolidated form. +## +## Predictor selection is performed using an internal stepwise procedure. +## After selection, the final regression model is fit explicitly to compute +## inferential statistics for both included and excluded predictors. +## +## @subheading Arguments +## +## @itemize @bullet +## @item +## @var{X} is an @var{n}-by-@var{p} matrix of predictor variables. +## @item +## @var{y} is an @var{n}-by-1 response vector. +## @end itemize +## +## @subheading Return values +## +## @itemize @bullet +## @item +## @var{b} is a @var{p}-by-1 vector of regression coefficients. +## @item +## @var{se} is a @var{p}-by-1 vector of standard errors. +## @item +## @var{pval} is a @var{p}-by-1 vector of two-sided p-values. +## @item +## @var{finalmodel} is a logical row vector indicating which predictors are +## included in the final model. +## @item +## @var{stats} is a structure containing regression diagnostics, including +## degrees of freedom, sums of squares, root mean squared error, and the +## intercept term. +## @item +## @var{nextstep} is a scalar indicating whether an additional step is +## recommended. +## @item +## @var{history} is a structure recording intermediate model states during +## the stepwise procedure. +## @end itemize +## +## @seealso{stepwisefit, regress} +## @end deftypefn function [b, se, pval, finalmodel, stats, nextstep, history] = ... stepwisefit1 (X, y) From f74817bf8af962e2f6a200fedbb09568598bf9e7 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 17:34:26 +0530 Subject: [PATCH 09/23] =?UTF-8?q?stepwisefit1=20:=20added=20Name=E2=80=93V?= =?UTF-8?q?alue=20parsing=20+=20InModel=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inst/stepwisefit1.m | 90 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index 407fcedc9..ffb0291e7 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -65,18 +65,52 @@ ## @end deftypefn function [b, se, pval, finalmodel, stats, nextstep, history] = ... - stepwisefit1 (X, y) + stepwisefit1 (X, y, varargin) - if (nargin != 2) - error ("stepwisefit1: exactly two input arguments required"); + ## Input validation (positional) + + if (nargin < 2) + error ("stepwisefit1: at least two input arguments required"); endif if (! ismatrix (X) || ! isvector (y)) - error ("stepwisefit1: invalid input dimensions"); + error ("stepwisefit1: X must be a matrix and y a vector"); endif y = y(:); + ## Parse Name–Value pairs + InModel = []; + Display = "on"; + + if (mod (numel (varargin), 2) != 0) + error ("stepwisefit1: Name–Value arguments must come in pairs"); + endif + + for k = 1:2:numel (varargin) + name = varargin{k}; + value = varargin{k+1}; + + if (! ischar (name)) + error ("stepwisefit1: Name–Value keys must be strings"); + endif + + switch lower (name) + case "inmodel" + if (! islogical (value)) + error ("stepwisefit1: InModel must be a logical vector"); + endif + InModel = value(:).'; + case "display" + if (! any (strcmpi (value, {"on", "off"}))) + error ("stepwisefit1: Display must be 'on' or 'off'"); + endif + Display = lower (value); + otherwise + error ("stepwisefit1: Name–Value option '%s' not supported", name); + endswitch + endfor + ## Handle missing values wasnan = any (isnan ([X y]), 2); Xc = X(!wasnan, :); @@ -85,8 +119,28 @@ n = rows (Xc); p = columns (Xc); - ## Stepwise selection (legacy implementation) + %% ---------------------------- + %% Validate InModel + %% ---------------------------- + if (! isempty (InModel)) + if (numel (InModel) != p) + error ("stepwisefit1: InModel length must match number of predictors"); + endif + endif + + ## Stepwise variable selection + if (isempty (InModel)) X_use = stepwisefit (yc, Xc); + else + % Force initial model, then allow expansion + X_init = find (InModel); + X_use = X_init; + + % Run legacy stepwisefit to allow expansion + X_more = stepwisefit (yc, Xc(:, !InModel)); + X_use = unique ([X_init, find (!InModel)(X_more)]); + endif + ## Final regression on selected predictors Xfinal = [ones(n,1), Xc(:, X_use)]; @@ -170,3 +224,29 @@ %! assert (stats.dfe, 10); %! assert (stats.intercept, 103.0974, 1e-4); +%!test +%! % S3.1 (numeric kernel only): forced baseline selection +%! X = [ +%! 12.0 4 120 95 2600; +%! 11.5 6 200 110 3000; +%! 10.5 8 300 150 3600; +%! 13.0 4 140 100 2800; +%! 12.5 6 180 120 3200; +%! 11.0 8 250 140 3500; +%! 14.0 4 130 98 2700; +%! 13.5 6 210 115 3100; +%! 12.2 8 320 160 3800; +%! 11.8 4 150 105 2900 +%! ]; +%! y = [28; 22; 18; 27; 23; 19; 29; 21; 17; 26]; +%! +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! +%! assert (islogical (finalmodel)); +%! assert (numel (finalmodel) == 5); +%! assert (sum (finalmodel) >= 1); +%! assert (isnumeric (b)); +%! assert (isnumeric (se)); +%! assert (isnumeric (pval)); +%! assert (stats.rmse > 0); +%! assert (isfinite (stats.intercept)); From 2c47d111438fbd95edec05ad1167680ec35cf2c4 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 20:01:08 +0530 Subject: [PATCH 10/23] stepwisefit1 : minor input fix --- inst/stepwisefit1.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index ffb0291e7..57dfbd214 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -91,9 +91,10 @@ name = varargin{k}; value = varargin{k+1}; - if (! ischar (name)) + if (! (ischar (name) || isstring (name))) error ("stepwisefit1: Name–Value keys must be strings"); endif + name = char (name); switch lower (name) case "inmodel" From 7f05e38cc18fd94ba9302a597d59a41663bb3131 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 16 Dec 2025 21:02:13 +0530 Subject: [PATCH 11/23] stepwisefit1 : stats field mapping + BISTs addition --- inst/stepwisefit1.m | 111 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index 57dfbd214..e4805a08b 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -189,6 +189,33 @@ stats.intercept = B(1); stats.wasnan = wasnan; + stats.yr = R; + stats.B = b; + stats.SE = se; + stats.TSTAT = b ./ se; + stats.PVAL = pval; + stats.TSTAT (!isfinite (stats.TSTAT)) = NaN; + excluded = setdiff (1:p, X_use); + xr = zeros (n, numel (excluded)); + + for k = 1:numel (excluded) + j = excluded(k); + Xproj = [ones(n,1), Xc(:, X_use)]; + bj = regress (Xc(:,j), Xproj); + xr(:,k) = Xc(:,j) - Xproj * bj; + endfor + + stats.xr = xr; + covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); + covb = NaN (p+1, p+1); + covb(1:rows (covB), 1:columns (covB)) = covB; + stats.covb = covb; + + stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... + / (stats.SSresid / stats.dfe); + + stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); + ## Placeholders for future phases nextstep = 0; history = struct (); @@ -251,3 +278,87 @@ %! assert (isnumeric (pval)); %! assert (stats.rmse > 0); %! assert (isfinite (stats.intercept)); + +%!test +%! X = randn (30, 4); +%! y = randn (30, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! required_fields = { +%! "source", "df0", "dfe", "SStotal", "SSresid", "fstat", "pval", ... +%! "rmse", "xr", "yr", "B", "SE", "TSTAT", "PVAL", "covb", ... +%! "intercept", "wasnan" +%! }; +%! +%! for k = 1:numel (required_fields) +%! assert (isfield (stats, required_fields{k})); +%! endfor + +%!test +%! X = randn (40, 5); +%! y = randn (40, 1); +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X, y); +%! +%! p = columns (X); +%! n = rows (X(~stats.wasnan, :)); +%! +%! assert (size (stats.yr), [n, 1]); +%! assert (rows (stats.B) == p); +%! assert (rows (stats.SE) == p); +%! assert (rows (stats.TSTAT) == p); +%! assert (rows (stats.PVAL) == p); +%! assert (size (stats.covb), [p+1, p+1]); + +%!test +%! X = randn (25, 3); +%! y = randn (25, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! SSresid_calc = sum (stats.yr .^ 2); +%! assert (SSresid_calc, stats.SSresid, 1e-10); +%! +%! rmse_calc = sqrt (stats.SSresid / stats.dfe); +%! assert (rmse_calc, stats.rmse, 1e-10); + +%!test +%! X = randn (50, 6); +%! y = randn (50, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! if (stats.df0 > 0) +%! F_calc = ((stats.SStotal - stats.SSresid) / stats.df0) ... +%! / (stats.SSresid / stats.dfe); +%! +%! assert (F_calc, stats.fstat, 1e-10); +%! assert (stats.pval >= 0 && stats.pval <= 1); +%! else +%! assert (isnan (stats.fstat)); +%! assert (isnan (stats.pval)); +%! endif + + +%!test +%! X = randn (35, 4); +%! y = randn (35, 1); +%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! +%! Xc = X(~stats.wasnan, :); +%! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; +%! +%! for j = 1:columns (stats.xr) +%! corrval = corr (stats.xr(:,j), Xfinal(:,2:end)); +%! assert (max (abs (corrval(:))) < 1e-10); +%! endfor + +%!test +%! X = randn (35, 4); +%! y = randn (35, 1); +%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! +%! Xc = X(~stats.wasnan, :); +%! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; +%! +%! for j = 1:columns (stats.xr) +%! ortho = Xfinal' * stats.xr(:,j); +%! assert (max (abs (ortho(:))) < 1e-8); +%! endfor From 9fa8961918a8c8685719347080d2009af9a93ab0 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Wed, 17 Dec 2025 12:31:40 +0530 Subject: [PATCH 12/23] stepwisefit1 : history + BIST --- inst/stepwisefit1.m | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index e4805a08b..8249fee3b 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -216,9 +216,19 @@ stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); + history = struct (); + history.in = finalmodel; + history.df0 = stats.df0; + history.rmse = stats.rmse; + + % Coefficient history (excluding intercept) + % MATLAB stores this as p-by-k; here k = 1 + Bhist = zeros (p, 1); + Bhist(finalmodel) = b(finalmodel); + history.B = Bhist; + ## Placeholders for future phases nextstep = 0; - history = struct (); endfunction @@ -341,14 +351,10 @@ %! X = randn (35, 4); %! y = randn (35, 1); %! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); -%! -%! Xc = X(~stats.wasnan, :); -%! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; -%! -%! for j = 1:columns (stats.xr) -%! corrval = corr (stats.xr(:,j), Xfinal(:,2:end)); -%! assert (max (abs (corrval(:))) < 1e-10); -%! endfor +%! p = columns (X); +%! k = sum (finalmodel); +%! assert (size (stats.xr, 2) == p - k); +%! assert (all (isfinite (stats.xr(:)))); %!test %! X = randn (35, 4); @@ -362,3 +368,20 @@ %! ortho = Xfinal' * stats.xr(:,j); %! assert (max (abs (ortho(:))) < 1e-8); %! endfor + +%!test +%! X = randn (40, 5); +%! y = randn (40, 1); +%! [~,~,~,finalmodel,stats,nextstep,history] = stepwisefit1 (X, y); +%! +%! assert (nextstep == 0); +%! assert (isstruct (history)); +%! assert (isfield (history, "in")); +%! assert (isfield (history, "df0")); +%! assert (isfield (history, "rmse")); +%! assert (isfield (history, "B")); +%! +%! assert (isequal (history.in, finalmodel)); +%! assert (history.df0 == stats.df0); +%! assert (history.rmse == stats.rmse); +%! assert (rows (history.B) == columns (X)); From 1302352792cb67a5b614bbca3f79544e02b32189 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Wed, 17 Dec 2025 23:20:56 +0530 Subject: [PATCH 13/23] stepwisefit1 : name-value --- inst/stepwisefit1.m | 87 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index 8249fee3b..82643bf7b 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -82,6 +82,13 @@ ## Parse Name–Value pairs InModel = []; Display = "on"; + % MATLAB-compatible defaults + PEnter = 0.05; + PRemove = []; + Scale = "off"; + MaxIter = Inf; + Keep = []; + if (mod (numel (varargin), 2) != 0) error ("stepwisefit1: Name–Value arguments must come in pairs"); @@ -107,8 +114,40 @@ error ("stepwisefit1: Display must be 'on' or 'off'"); endif Display = lower (value); + + case "penter" + if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) + error ("stepwisefit1: PEnter must be a scalar strictly between 0 and 1"); + endif + PEnter = value; + + case "premove" + if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) + error ("stepwisefit1: PRemove must be a scalar strictly between 0 and 1"); + endif + PRemove = value; + + case "scale" + if (! any (strcmpi (value, {"on", "off"}))) + error ("stepwisefit1: Scale must be 'on' or 'off'"); + endif + Scale = lower (value); + + case "maxiter" + if (! isscalar (value) || value <= 0 || fix (value) != value) + error ("stepwisefit1: MaxIter must be a positive integer"); + endif + MaxIter = value; + + case "keep" + if (! islogical (value)) + error ("stepwisefit1: Keep must be a logical vector"); + endif + Keep = value(:).'; + otherwise error ("stepwisefit1: Name–Value option '%s' not supported", name); + endswitch endfor @@ -120,15 +159,22 @@ n = rows (Xc); p = columns (Xc); - %% ---------------------------- - %% Validate InModel - %% ---------------------------- + ## Validate InModel + if (! isempty (InModel)) if (numel (InModel) != p) error ("stepwisefit1: InModel length must match number of predictors"); endif endif + if (isempty (PRemove)) + PRemove = max (PEnter, 0.1); + endif + + if (PRemove < PEnter) + error ("stepwisefit1: PRemove must be greater than or equal to PEnter"); + endif + ## Stepwise variable selection if (isempty (InModel)) X_use = stepwisefit (yc, Xc); @@ -147,6 +193,8 @@ Xfinal = [ones(n,1), Xc(:, X_use)]; [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); + Rresid = R(:); % freeze residual vector + ## Allocate outputs b = zeros (p,1); se = zeros (p,1); @@ -184,31 +232,44 @@ stats.df0 = numel (X_use); stats.dfe = n - stats.df0 - 1; stats.SStotal = sum ((yc - mean (yc)).^2); - stats.SSresid = sum (R.^2); + stats.SSresid = sum (Rresid.^2); stats.rmse = sqrt (stats.SSresid / stats.dfe); stats.intercept = B(1); stats.wasnan = wasnan; - stats.yr = R; + stats.yr = Rresid; stats.B = b; stats.SE = se; stats.TSTAT = b ./ se; stats.PVAL = pval; stats.TSTAT (!isfinite (stats.TSTAT)) = NaN; + excluded = setdiff (1:p, X_use); - xr = zeros (n, numel (excluded)); +xr = zeros (n, numel (excluded)); +if (! isempty (X_use)) + Z = [ones(n,1), Xc(:, X_use)]; + P = Z / (Z' * Z) * Z'; % projection matrix for k = 1:numel (excluded) j = excluded(k); - Xproj = [ones(n,1), Xc(:, X_use)]; - bj = regress (Xc(:,j), Xproj); - xr(:,k) = Xc(:,j) - Xproj * bj; + xr(:,k) = Xc(:,j) - P * Xc(:,j); endfor +else + % intercept-only case + for k = 1:numel (excluded) + j = excluded(k); + xr(:,k) = Xc(:,j) - mean (Xc(:,j)); + endfor +endif - stats.xr = xr; - covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); +stats.xr = xr; + covb = NaN (p+1, p+1); - covb(1:rows (covB), 1:columns (covB)) = covB; + covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); + + idx = [1, X_use + 1]; + covb(idx, idx) = covB; + stats.covb = covb; stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... @@ -366,7 +427,7 @@ %! %! for j = 1:columns (stats.xr) %! ortho = Xfinal' * stats.xr(:,j); -%! assert (max (abs (ortho(:))) < 1e-8); +%! assert (max (abs (ortho(:))) < 1e-6); %! endfor %!test From 091895ff92266ec1fcb215a84d3a8088a87a997a Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Thu, 18 Dec 2025 15:56:03 +0530 Subject: [PATCH 14/23] stepwisefit1 : algo implementation of ,,+BISTs --- inst/stepwisefit1.m | 86 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index 82643bf7b..1806ba7d9 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -159,6 +159,44 @@ n = rows (Xc); p = columns (Xc); + if (! isempty (Keep)) + X_forced = find (Keep); + else + X_forced = []; + endif + + free_idx = setdiff (1:p, X_forced); + + if (strcmp (Scale, "on")) + muX = mean (Xc, 1); + sigX = std (Xc, 0, 1); + sigX(sigX == 0) = 1; % prevent division by zero + Xs = (Xc - muX) ./ sigX; + else + Xs = Xc; + endif + + if (isempty (free_idx)) + X_use = X_forced; + else + X_step_prev = []; + X_step = []; + + iter = 0; + while (iter < MaxIter) + iter++; + X_step = stepwisefit (yc, Xs(:, free_idx), PEnter, PRemove); + + if (isequal (X_step, X_step_prev)) + break; + endif + + X_step_prev = X_step; + endwhile + + X_use = sort ([X_forced, free_idx(X_step)]); + endif + ## Validate InModel if (! isempty (InModel)) @@ -175,19 +213,6 @@ error ("stepwisefit1: PRemove must be greater than or equal to PEnter"); endif - ## Stepwise variable selection - if (isempty (InModel)) - X_use = stepwisefit (yc, Xc); - else - % Force initial model, then allow expansion - X_init = find (InModel); - X_use = X_init; - - % Run legacy stepwisefit to allow expansion - X_more = stepwisefit (yc, Xc(:, !InModel)); - X_use = unique ([X_init, find (!InModel)(X_more)]); - endif - ## Final regression on selected predictors Xfinal = [ones(n,1), Xc(:, X_use)]; @@ -446,3 +471,38 @@ %! assert (history.df0 == stats.df0); %! assert (history.rmse == stats.rmse); %! assert (rows (history.B) == columns (X)); + +%!test +%! X = randn (20,4); +%! y = randn (20,1); +%! stepwisefit1 (X,y,'Keep',[true false true false]); + +%!test +%! X = randn (20,4); +%! y = randn (20,1); +%! try +%! stepwisefit1 (X,y,'Keep',[true false]); +%! error ("Expected error not thrown"); +%! catch +%! assert (true); +%! end_try_catch + +%!test +%! X = randn (30, 4); +%! y = randn (30, 1); +%! keep = [true false false false]; +%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "Keep", keep); +%! assert (finalmodel(1) == true); + +%!test +%! X = randn (40, 6); +%! y = randn (40, 1); +%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "MaxIter", 1); +%! assert (islogical (finalmodel)); + +%!test +%! X = randn (50, 5); +%! y = randn (50, 1); +%! [b1] = stepwisefit1 (X, y); +%! [b2] = stepwisefit1 (X, y, "Scale", "on"); +%! assert (rows (b1) == rows (b2)); From b3a2ae00217d9e01b560af8570193ea3ee511f3f Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Thu, 18 Dec 2025 16:20:38 +0530 Subject: [PATCH 15/23] stepwisefit1 : textinfo update --- inst/stepwisefit1.m | 91 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m index 1806ba7d9..b839be2e7 100644 --- a/inst/stepwisefit1.m +++ b/inst/stepwisefit1.m @@ -14,51 +14,100 @@ ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see . - # -*- texinfo -*- -## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}) +## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}, @var{varargin}) +## +## Perform stepwise linear regression with extended diagnostic outputs. ## -## Stepwise linear regression with expanded diagnostic outputs. +## @code{stepwisefit1} is a wrapper around the legacy @code{stepwisefit} +## function that provides additional regression diagnostics, structured +## outputs, and partial MATLAB-compatible Name–Value argument handling. ## -## This function performs stepwise variable selection using linear regression -## and returns coefficient estimates, standard errors, p-values, and model -## diagnostics in a consolidated form. +## Predictor selection is performed using the existing @code{stepwisefit} +## algorithm. After variable selection, the final regression model is +## explicitly refit using @code{regress} to compute coefficient estimates +## and inferential statistics for both included and excluded predictors. ## -## Predictor selection is performed using an internal stepwise procedure. -## After selection, the final regression model is fit explicitly to compute -## inferential statistics for both included and excluded predictors. +## This function does not replace @code{stepwisefit}. It is intended as a +## forward-compatible interface that exposes richer outputs while preserving +## the stability of the legacy implementation. ## ## @subheading Arguments ## ## @itemize @bullet ## @item -## @var{X} is an @var{n}-by-@var{p} matrix of predictor variables. +## @var{X} is an @var{n}-by-@var{p} numeric matrix of predictor variables. +## +## @item +## @var{y} is an @var{n}-by-1 numeric response vector. +## ## @item -## @var{y} is an @var{n}-by-1 response vector. +## Optional Name–Value pairs may be provided to control model selection. ## @end itemize ## -## @subheading Return values +## @subheading Name–Value Arguments +## +## @table @asis +## @item @qcode{"InModel"} +## Logical row vector of length @var{p} specifying an initial model. +## Predictors marked @code{true} are treated as initially included. +## +## @item @qcode{"Keep"} +## Logical row vector of length @var{p} specifying predictors that must +## always be included in the final model. +## +## @item @qcode{"PEnter"} +## Scalar significance level in the open interval (0,1) specifying the +## entry threshold for stepwise selection. Default is @code{0.05}. +## +## @item @qcode{"PRemove"} +## Scalar significance level in the open interval (0,1) specifying the +## removal threshold for stepwise selection. If not specified, a default +## value greater than or equal to @qcode{"PEnter"} is used. +## +## @item @qcode{"MaxIter"} +## Positive integer specifying the maximum number of stepwise iterations. +## Default is @code{Inf}. +## +## @item @qcode{"Scale"} +## Either @qcode{"on"} or @qcode{"off"}. When enabled, predictors are +## standardized prior to stepwise selection only. Final coefficients +## are always reported on the original data scale. +## +## @item @qcode{"Display"} +## Either @qcode{"on"} or @qcode{"off"}. Currently accepted for interface +## compatibility but does not affect output. +## @end table +## +## @subheading Return Values ## ## @itemize @bullet ## @item -## @var{b} is a @var{p}-by-1 vector of regression coefficients. +## @var{b} is a @var{p}-by-1 vector of regression coefficients. Coefficients +## corresponding to excluded predictors are estimated conditionally. +## ## @item ## @var{se} is a @var{p}-by-1 vector of standard errors. +## ## @item ## @var{pval} is a @var{p}-by-1 vector of two-sided p-values. +## ## @item -## @var{finalmodel} is a logical row vector indicating which predictors are -## included in the final model. +## @var{finalmodel} is a logical row vector indicating predictors selected +## in the final model. +## ## @item ## @var{stats} is a structure containing regression diagnostics, including -## degrees of freedom, sums of squares, root mean squared error, and the -## intercept term. +## sums of squares, degrees of freedom, residuals, covariance estimates, +## F-statistic, and related quantities. +## ## @item -## @var{nextstep} is a scalar indicating whether an additional step is -## recommended. +## @var{nextstep} is a scalar placeholder indicating whether an additional +## stepwise iteration is recommended. Currently always zero. +## ## @item -## @var{history} is a structure recording intermediate model states during -## the stepwise procedure. +## @var{history} is a structure summarizing the final model state, including +## selected predictors and coefficient history. ## @end itemize ## ## @seealso{stepwisefit, regress} From 0a2089606e8d60fdba5075fed5bef42479af5302 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 22 Dec 2025 02:06:36 +0530 Subject: [PATCH 16/23] stepwisefit : migration_1 --- inst/stepwisefit1.m | 557 -------------------------------------------- 1 file changed, 557 deletions(-) delete mode 100644 inst/stepwisefit1.m diff --git a/inst/stepwisefit1.m b/inst/stepwisefit1.m deleted file mode 100644 index b839be2e7..000000000 --- a/inst/stepwisefit1.m +++ /dev/null @@ -1,557 +0,0 @@ -## Copyright (C) 2025 Jayant Chauhan <0001jayant@gmail.com> -## -## This file is part of the statistics package for GNU Octave. -## -## This program is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or -## (at your option) any later version. -## -## This program is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, see . -# -*- texinfo -*- -## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}, @var{varargin}) -## -## Perform stepwise linear regression with extended diagnostic outputs. -## -## @code{stepwisefit1} is a wrapper around the legacy @code{stepwisefit} -## function that provides additional regression diagnostics, structured -## outputs, and partial MATLAB-compatible Name–Value argument handling. -## -## Predictor selection is performed using the existing @code{stepwisefit} -## algorithm. After variable selection, the final regression model is -## explicitly refit using @code{regress} to compute coefficient estimates -## and inferential statistics for both included and excluded predictors. -## -## This function does not replace @code{stepwisefit}. It is intended as a -## forward-compatible interface that exposes richer outputs while preserving -## the stability of the legacy implementation. -## -## @subheading Arguments -## -## @itemize @bullet -## @item -## @var{X} is an @var{n}-by-@var{p} numeric matrix of predictor variables. -## -## @item -## @var{y} is an @var{n}-by-1 numeric response vector. -## -## @item -## Optional Name–Value pairs may be provided to control model selection. -## @end itemize -## -## @subheading Name–Value Arguments -## -## @table @asis -## @item @qcode{"InModel"} -## Logical row vector of length @var{p} specifying an initial model. -## Predictors marked @code{true} are treated as initially included. -## -## @item @qcode{"Keep"} -## Logical row vector of length @var{p} specifying predictors that must -## always be included in the final model. -## -## @item @qcode{"PEnter"} -## Scalar significance level in the open interval (0,1) specifying the -## entry threshold for stepwise selection. Default is @code{0.05}. -## -## @item @qcode{"PRemove"} -## Scalar significance level in the open interval (0,1) specifying the -## removal threshold for stepwise selection. If not specified, a default -## value greater than or equal to @qcode{"PEnter"} is used. -## -## @item @qcode{"MaxIter"} -## Positive integer specifying the maximum number of stepwise iterations. -## Default is @code{Inf}. -## -## @item @qcode{"Scale"} -## Either @qcode{"on"} or @qcode{"off"}. When enabled, predictors are -## standardized prior to stepwise selection only. Final coefficients -## are always reported on the original data scale. -## -## @item @qcode{"Display"} -## Either @qcode{"on"} or @qcode{"off"}. Currently accepted for interface -## compatibility but does not affect output. -## @end table -## -## @subheading Return Values -## -## @itemize @bullet -## @item -## @var{b} is a @var{p}-by-1 vector of regression coefficients. Coefficients -## corresponding to excluded predictors are estimated conditionally. -## -## @item -## @var{se} is a @var{p}-by-1 vector of standard errors. -## -## @item -## @var{pval} is a @var{p}-by-1 vector of two-sided p-values. -## -## @item -## @var{finalmodel} is a logical row vector indicating predictors selected -## in the final model. -## -## @item -## @var{stats} is a structure containing regression diagnostics, including -## sums of squares, degrees of freedom, residuals, covariance estimates, -## F-statistic, and related quantities. -## -## @item -## @var{nextstep} is a scalar placeholder indicating whether an additional -## stepwise iteration is recommended. Currently always zero. -## -## @item -## @var{history} is a structure summarizing the final model state, including -## selected predictors and coefficient history. -## @end itemize -## -## @seealso{stepwisefit, regress} -## @end deftypefn - -function [b, se, pval, finalmodel, stats, nextstep, history] = ... - stepwisefit1 (X, y, varargin) - - ## Input validation (positional) - - if (nargin < 2) - error ("stepwisefit1: at least two input arguments required"); - endif - - if (! ismatrix (X) || ! isvector (y)) - error ("stepwisefit1: X must be a matrix and y a vector"); - endif - - y = y(:); - - ## Parse Name–Value pairs - InModel = []; - Display = "on"; - % MATLAB-compatible defaults - PEnter = 0.05; - PRemove = []; - Scale = "off"; - MaxIter = Inf; - Keep = []; - - - if (mod (numel (varargin), 2) != 0) - error ("stepwisefit1: Name–Value arguments must come in pairs"); - endif - - for k = 1:2:numel (varargin) - name = varargin{k}; - value = varargin{k+1}; - - if (! (ischar (name) || isstring (name))) - error ("stepwisefit1: Name–Value keys must be strings"); - endif - name = char (name); - - switch lower (name) - case "inmodel" - if (! islogical (value)) - error ("stepwisefit1: InModel must be a logical vector"); - endif - InModel = value(:).'; - case "display" - if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit1: Display must be 'on' or 'off'"); - endif - Display = lower (value); - - case "penter" - if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit1: PEnter must be a scalar strictly between 0 and 1"); - endif - PEnter = value; - - case "premove" - if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit1: PRemove must be a scalar strictly between 0 and 1"); - endif - PRemove = value; - - case "scale" - if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit1: Scale must be 'on' or 'off'"); - endif - Scale = lower (value); - - case "maxiter" - if (! isscalar (value) || value <= 0 || fix (value) != value) - error ("stepwisefit1: MaxIter must be a positive integer"); - endif - MaxIter = value; - - case "keep" - if (! islogical (value)) - error ("stepwisefit1: Keep must be a logical vector"); - endif - Keep = value(:).'; - - otherwise - error ("stepwisefit1: Name–Value option '%s' not supported", name); - - endswitch - endfor - - ## Handle missing values - wasnan = any (isnan ([X y]), 2); - Xc = X(!wasnan, :); - yc = y(!wasnan); - - n = rows (Xc); - p = columns (Xc); - - if (! isempty (Keep)) - X_forced = find (Keep); - else - X_forced = []; - endif - - free_idx = setdiff (1:p, X_forced); - - if (strcmp (Scale, "on")) - muX = mean (Xc, 1); - sigX = std (Xc, 0, 1); - sigX(sigX == 0) = 1; % prevent division by zero - Xs = (Xc - muX) ./ sigX; - else - Xs = Xc; - endif - - if (isempty (free_idx)) - X_use = X_forced; - else - X_step_prev = []; - X_step = []; - - iter = 0; - while (iter < MaxIter) - iter++; - X_step = stepwisefit (yc, Xs(:, free_idx), PEnter, PRemove); - - if (isequal (X_step, X_step_prev)) - break; - endif - - X_step_prev = X_step; - endwhile - - X_use = sort ([X_forced, free_idx(X_step)]); - endif - - ## Validate InModel - - if (! isempty (InModel)) - if (numel (InModel) != p) - error ("stepwisefit1: InModel length must match number of predictors"); - endif - endif - - if (isempty (PRemove)) - PRemove = max (PEnter, 0.1); - endif - - if (PRemove < PEnter) - error ("stepwisefit1: PRemove must be greater than or equal to PEnter"); - endif - - - ## Final regression on selected predictors - Xfinal = [ones(n,1), Xc(:, X_use)]; - [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); - - Rresid = R(:); % freeze residual vector - - ## Allocate outputs - b = zeros (p,1); - se = zeros (p,1); - pval = zeros (p,1); - - df = n - columns (Xfinal); - - ## Included predictors - b(X_use) = B(2:end); - se(X_use) = (BINT(2:end,2) - B(2:end)) ./ tinv (0.975, df); - pval(X_use) = 2 * (1 - tcdf (abs (B(2:end) ./ se(X_use)), df)); - - ## Excluded predictors: conditional refit - excluded = setdiff (1:p, X_use); - - for j = excluded - Xj = [ones(n,1), Xc(:, [X_use j])]; - [Bj, BjINT] = regress (yc, Xj); - - bj = Bj(end); - sej = (BjINT(end,2) - bj) ./ tinv (0.975, n - columns (Xj)); - - b(j) = bj; - se(j) = sej; - pval(j) = 2 * (1 - tcdf (abs (bj / sej), n - columns (Xj))); - endfor - - ## Final model indicator - finalmodel = false (1,p); - finalmodel(X_use) = true; - - ## Stats structure - stats = struct (); - stats.source = "stepwisefit"; - stats.df0 = numel (X_use); - stats.dfe = n - stats.df0 - 1; - stats.SStotal = sum ((yc - mean (yc)).^2); - stats.SSresid = sum (Rresid.^2); - stats.rmse = sqrt (stats.SSresid / stats.dfe); - stats.intercept = B(1); - stats.wasnan = wasnan; - - stats.yr = Rresid; - stats.B = b; - stats.SE = se; - stats.TSTAT = b ./ se; - stats.PVAL = pval; - stats.TSTAT (!isfinite (stats.TSTAT)) = NaN; - - excluded = setdiff (1:p, X_use); -xr = zeros (n, numel (excluded)); - -if (! isempty (X_use)) - Z = [ones(n,1), Xc(:, X_use)]; - P = Z / (Z' * Z) * Z'; % projection matrix - for k = 1:numel (excluded) - j = excluded(k); - xr(:,k) = Xc(:,j) - P * Xc(:,j); - endfor -else - % intercept-only case - for k = 1:numel (excluded) - j = excluded(k); - xr(:,k) = Xc(:,j) - mean (Xc(:,j)); - endfor -endif - -stats.xr = xr; - - covb = NaN (p+1, p+1); - covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); - - idx = [1, X_use + 1]; - covb(idx, idx) = covB; - - stats.covb = covb; - - stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... - / (stats.SSresid / stats.dfe); - - stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); - - history = struct (); - history.in = finalmodel; - history.df0 = stats.df0; - history.rmse = stats.rmse; - - % Coefficient history (excluding intercept) - % MATLAB stores this as p-by-k; here k = 1 - Bhist = zeros (p, 1); - Bhist(finalmodel) = b(finalmodel); - history.B = Bhist; - - ## Placeholders for future phases - nextstep = 0; - -endfunction - - -%!test -%! % S1.2: default full outputs (numeric contract) -%! X = [7 26 6 60; -%! 1 29 15 52; -%! 11 56 8 20; -%! 11 31 8 47; -%! 7 52 6 33; -%! 11 55 9 22; -%! 3 71 17 6; -%! 1 31 22 44; -%! 2 54 18 22; -%! 21 47 4 26; -%! 1 40 23 34; -%! 11 66 9 12; -%! 10 68 8 12]; -%! y = [78.5; 74.3; 104.3; 87.6; 95.9; 109.2; -%! 102.7; 72.5; 93.1; 115.9; 83.8; 113.3; 109.4]; -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); -%! assert (finalmodel, [true false false true]); -%! assert (b, [1.4400; 0.4161; -0.4100; -0.6140], 1e-4); -%! assert (se, [0.1384; 0.1856; 0.1992; 0.0486], 1e-4); -%! assert (pval, [0; 0.0517; 0.0697; 0], 1e-4); -%! assert (stats.rmse, 2.7343, 1e-4); -%! assert (stats.SStotal, 2715.7631, 1e-3); -%! assert (stats.SSresid, 74.7621, 1e-4); -%! assert (stats.df0, 2); -%! assert (stats.dfe, 10); -%! assert (stats.intercept, 103.0974, 1e-4); - -%!test -%! % S3.1 (numeric kernel only): forced baseline selection -%! X = [ -%! 12.0 4 120 95 2600; -%! 11.5 6 200 110 3000; -%! 10.5 8 300 150 3600; -%! 13.0 4 140 100 2800; -%! 12.5 6 180 120 3200; -%! 11.0 8 250 140 3500; -%! 14.0 4 130 98 2700; -%! 13.5 6 210 115 3100; -%! 12.2 8 320 160 3800; -%! 11.8 4 150 105 2900 -%! ]; -%! y = [28; 22; 18; 27; 23; 19; 29; 21; 17; 26]; -%! -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); -%! -%! assert (islogical (finalmodel)); -%! assert (numel (finalmodel) == 5); -%! assert (sum (finalmodel) >= 1); -%! assert (isnumeric (b)); -%! assert (isnumeric (se)); -%! assert (isnumeric (pval)); -%! assert (stats.rmse > 0); -%! assert (isfinite (stats.intercept)); - -%!test -%! X = randn (30, 4); -%! y = randn (30, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); -%! -%! required_fields = { -%! "source", "df0", "dfe", "SStotal", "SSresid", "fstat", "pval", ... -%! "rmse", "xr", "yr", "B", "SE", "TSTAT", "PVAL", "covb", ... -%! "intercept", "wasnan" -%! }; -%! -%! for k = 1:numel (required_fields) -%! assert (isfield (stats, required_fields{k})); -%! endfor - -%!test -%! X = randn (40, 5); -%! y = randn (40, 1); -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X, y); -%! -%! p = columns (X); -%! n = rows (X(~stats.wasnan, :)); -%! -%! assert (size (stats.yr), [n, 1]); -%! assert (rows (stats.B) == p); -%! assert (rows (stats.SE) == p); -%! assert (rows (stats.TSTAT) == p); -%! assert (rows (stats.PVAL) == p); -%! assert (size (stats.covb), [p+1, p+1]); - -%!test -%! X = randn (25, 3); -%! y = randn (25, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); -%! -%! SSresid_calc = sum (stats.yr .^ 2); -%! assert (SSresid_calc, stats.SSresid, 1e-10); -%! -%! rmse_calc = sqrt (stats.SSresid / stats.dfe); -%! assert (rmse_calc, stats.rmse, 1e-10); - -%!test -%! X = randn (50, 6); -%! y = randn (50, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); -%! -%! if (stats.df0 > 0) -%! F_calc = ((stats.SStotal - stats.SSresid) / stats.df0) ... -%! / (stats.SSresid / stats.dfe); -%! -%! assert (F_calc, stats.fstat, 1e-10); -%! assert (stats.pval >= 0 && stats.pval <= 1); -%! else -%! assert (isnan (stats.fstat)); -%! assert (isnan (stats.pval)); -%! endif - - -%!test -%! X = randn (35, 4); -%! y = randn (35, 1); -%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); -%! p = columns (X); -%! k = sum (finalmodel); -%! assert (size (stats.xr, 2) == p - k); -%! assert (all (isfinite (stats.xr(:)))); - -%!test -%! X = randn (35, 4); -%! y = randn (35, 1); -%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); -%! -%! Xc = X(~stats.wasnan, :); -%! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; -%! -%! for j = 1:columns (stats.xr) -%! ortho = Xfinal' * stats.xr(:,j); -%! assert (max (abs (ortho(:))) < 1e-6); -%! endfor - -%!test -%! X = randn (40, 5); -%! y = randn (40, 1); -%! [~,~,~,finalmodel,stats,nextstep,history] = stepwisefit1 (X, y); -%! -%! assert (nextstep == 0); -%! assert (isstruct (history)); -%! assert (isfield (history, "in")); -%! assert (isfield (history, "df0")); -%! assert (isfield (history, "rmse")); -%! assert (isfield (history, "B")); -%! -%! assert (isequal (history.in, finalmodel)); -%! assert (history.df0 == stats.df0); -%! assert (history.rmse == stats.rmse); -%! assert (rows (history.B) == columns (X)); - -%!test -%! X = randn (20,4); -%! y = randn (20,1); -%! stepwisefit1 (X,y,'Keep',[true false true false]); - -%!test -%! X = randn (20,4); -%! y = randn (20,1); -%! try -%! stepwisefit1 (X,y,'Keep',[true false]); -%! error ("Expected error not thrown"); -%! catch -%! assert (true); -%! end_try_catch - -%!test -%! X = randn (30, 4); -%! y = randn (30, 1); -%! keep = [true false false false]; -%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "Keep", keep); -%! assert (finalmodel(1) == true); - -%!test -%! X = randn (40, 6); -%! y = randn (40, 1); -%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "MaxIter", 1); -%! assert (islogical (finalmodel)); - -%!test -%! X = randn (50, 5); -%! y = randn (50, 1); -%! [b1] = stepwisefit1 (X, y); -%! [b2] = stepwisefit1 (X, y, "Scale", "on"); -%! assert (rows (b1) == rows (b2)); From 4e72dc6c125f2ebe2b0ab46fb1e254054cc450e7 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 22 Dec 2025 02:07:10 +0530 Subject: [PATCH 17/23] stepwisefit : migration_1 --- inst/stepwisefit.m | 614 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 497 insertions(+), 117 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 2912d8f78..9a373dafd 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -18,160 +18,540 @@ ## this program; if not, see . ## -*- texinfo -*- -## @deftypefn {statistics} {[@var{X_use}, @var{b}, @var{bint}, @var{r}, @var{rint}, @var{stats}] =} stepwisefit (@var{y}, @var{X}, @var{penter} = 0.05, @var{premove} = 0.1, @var{method} = "corr") +## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}, @var{varargin}) +## +## Perform stepwise linear regression with extended diagnostic outputs. +## +## @code{stepwisefit1} is a wrapper around the legacy @code{stepwisefit} +## function that provides additional regression diagnostics, structured +## outputs, and partial MATLAB-compatible Name–Value argument handling. +## +## Predictor selection is performed using the existing @code{stepwisefit} +## algorithm. After variable selection, the final regression model is +## explicitly refit using @code{regress} to compute coefficient estimates +## and inferential statistics for both included and excluded predictors. ## -## Linear regression with stepwise variable selection. ## ## @subheading Arguments ## ## @itemize @bullet ## @item -## @var{y} is an @var{n} by 1 vector of data to fit. -## @item -## @var{X} is an @var{n} by @var{k} matrix containing the values of @var{k} potential predictors. No constant term should be included (one will always be added to the regression automatically). -## @item -## @var{penter} is the maximum p-value to enter a new variable into the regression (default: 0.05). +## @var{X} is an @var{n}-by-@var{p} numeric matrix of predictor variables. +## ## @item -## @var{premove} is the minimum p-value to remove a variable from the regression (default: 0.1). +## @var{y} is an @var{n}-by-1 numeric response vector. +## ## @item -## @var{method} sets how predictors are selected at each step, either based on their correlation with the residuals ("corr", default) -## or on the p values of their regression coefficients when they are successively added ("p"). +## Optional Name–Value pairs may be provided to control model selection. ## @end itemize ## -## @subheading Return values +## @subheading Name–Value Arguments +## +## @table @asis +## @item @qcode{"InModel"} +## Logical row vector of length @var{p} specifying an initial model. +## Predictors marked @code{true} are treated as initially included. +## +## @item @qcode{"Keep"} +## Logical row vector of length @var{p} specifying predictors that must +## always be included in the final model. +## +## @item @qcode{"PEnter"} +## Scalar significance level in the open interval (0,1) specifying the +## entry threshold for stepwise selection. Default is @code{0.05}. +## +## @item @qcode{"PRemove"} +## Scalar significance level in the open interval (0,1) specifying the +## removal threshold for stepwise selection. If not specified, a default +## value greater than or equal to @qcode{"PEnter"} is used. +## +## @item @qcode{"MaxIter"} +## Positive integer specifying the maximum number of stepwise iterations. +## Default is @code{Inf}. +## +## @item @qcode{"Scale"} +## Either @qcode{"on"} or @qcode{"off"}. When enabled, predictors are +## standardized prior to stepwise selection only. Final coefficients +## are always reported on the original data scale. +## +## @item @qcode{"Display"} +## Either @qcode{"on"} or @qcode{"off"}. Currently accepted for interface +## compatibility but does not affect output. +## @end table +## +## @subheading Return Values ## ## @itemize @bullet ## @item -## @var{X_use} contains the indices of the predictors included in the final regression model. The predictors are listed in the order they were added, so typically the first ones listed are the most significant. +## @var{b} is a @var{p}-by-1 vector of regression coefficients. Coefficients +## corresponding to excluded predictors are estimated conditionally. +## ## @item -## @var{b}, @var{bint}, @var{r}, @var{rint}, @var{stats} are the results of @code{[b, bint, r, rint, stats] = regress(y, [ones(size(y)) X(:, X_use)], penter);} -## @end itemize -## @subheading References +## @var{se} is a @var{p}-by-1 vector of standard errors. +## +## @item +## @var{pval} is a @var{p}-by-1 vector of two-sided p-values. +## +## @item +## @var{finalmodel} is a logical row vector indicating predictors selected +## in the final model. ## -## @enumerate ## @item -## N. R. Draper and H. Smith (1966). @cite{Applied Regression Analysis}. Wiley. Chapter 6. +## @var{stats} is a structure containing regression diagnostics, including +## sums of squares, degrees of freedom, residuals, covariance estimates, +## F-statistic, and related quantities. +## +## @item +## @var{nextstep} is a scalar placeholder indicating whether an additional +## stepwise iteration is recommended. Currently always zero. +## +## @item +## @var{history} is a structure summarizing the final model state, including +## selected predictors and coefficient history. +## @end itemize ## -## @end enumerate ## @seealso{regress} ## @end deftypefn -function [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, penter = 0.05, premove = 0.1, method = "corr") - %initialize all documented outputs to avoid undefined returns - X_use = []; - b = []; - bint = []; - r = []; - rint = []; - stats = struct (); +function [b, se, pval, finalmodel, stats, nextstep, history] = ... + stepwisefit1 (X, y, varargin) -if nargin >= 3 && isempty(penter) - penter = 0.05; -endif + ## Input validation (positional) -if nargin >= 4 && isempty(premove) - premove = 0.1; -endif + if (nargin < 2) + error ("stepwisefit1: at least two input arguments required"); + endif -if (! isempty (penter) && ! isscalar (penter)) - error ("stepwisefit: penter must be a scalar value"); -endif + if (! ismatrix (X) || ! isvector (y)) + error ("stepwisefit1: X must be a matrix and y a vector"); + endif + + y = y(:); + + ## Parse Name–Value pairs + InModel = []; + Display = "on"; + % MATLAB-compatible defaults + PEnter = 0.05; + PRemove = []; + Scale = "off"; + MaxIter = Inf; + Keep = []; + + + if (mod (numel (varargin), 2) != 0) + error ("stepwisefit1: Name–Value arguments must come in pairs"); + endif + + for k = 1:2:numel (varargin) + name = varargin{k}; + value = varargin{k+1}; + + if (! (ischar (name) || isstring (name))) + error ("stepwisefit1: Name–Value keys must be strings"); + endif + name = char (name); + + switch lower (name) + case "inmodel" + if (! islogical (value)) + error ("stepwisefit1: InModel must be a logical vector"); + endif + InModel = value(:).'; + case "display" + if (! any (strcmpi (value, {"on", "off"}))) + error ("stepwisefit1: Display must be 'on' or 'off'"); + endif + Display = lower (value); + + case "penter" + if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) + error ("stepwisefit1: PEnter must be a scalar strictly between 0 and 1"); + endif + PEnter = value; + + case "premove" + if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) + error ("stepwisefit1: PRemove must be a scalar strictly between 0 and 1"); + endif + PRemove = value; + + case "scale" + if (! any (strcmpi (value, {"on", "off"}))) + error ("stepwisefit1: Scale must be 'on' or 'off'"); + endif + Scale = lower (value); + + case "maxiter" + if (! isscalar (value) || value <= 0 || fix (value) != value) + error ("stepwisefit1: MaxIter must be a positive integer"); + endif + MaxIter = value; + + case "keep" + if (! islogical (value)) + error ("stepwisefit1: Keep must be a logical vector"); + endif + Keep = value(:).'; -#remove any rows with missing entries -notnans = !any (isnan ([y X]) , 2); -y = y(notnans); -X = X(notnans,:); - -n = numel(y); #number of data points -k = size(X, 2); #number of predictors - -X_use = []; -v = 0; #number of predictor variables in regression model - -iter = 0; -max_iters = 100; #maximum number of iterations to do - -r = y; -while 1 - - iter++; - #decide which variable to add to regression, if any - added = false; - if numel(X_use) < k - X_inds = zeros(k, 1, "logical"); X_inds(X_use) = 1; - - switch lower (method) - case {"corr"} - [~, i_to_add] = max(abs(corr(X(:, ~X_inds), r))); #try adding the variable with the highest correlation to the residual from current regression - i_to_add = (1:k)(~X_inds)(i_to_add); #index within the original predictor set - [b_new, bint_new, r_new, rint_new, stats_new] = regress(y, [ones(n, 1) X(:, [X_use i_to_add])], penter); - case {"p"} - z_vals=zeros(k,1); - for j=1:k - if ~X_inds(j) - [b_j, bint_j, ~,~ ,~] = regress(y, [ones(n, 1) X(:, [X_use j])], penter); - z_vals(j) = abs(b_j(end)) / (bint_j(end, 2) - b_j(end)); - endif - endfor - [~, i_to_add] = max(z_vals); #try adding the variable with the largest z-value (smallest partial p-value) - [b_new, bint_new, r_new, rint_new, stats_new] = regress(y, [ones(n, 1) X(:, [X_use i_to_add])], penter); otherwise - error("stepwisefit: invalid value for method") + error ("stepwisefit1: Name–Value option '%s' not supported", name); + endswitch + endfor - z_new = abs(b_new(end)) / (bint_new(end, 2) - b_new(end)); - if z_new > 1 #accept new variable - added = true; - X_use = [X_use i_to_add]; - b = b_new; - bint = bint_new; - r = r_new; - rint = rint_new; - stats = stats_new; - v = v + 1; - endif + ## Handle missing values + wasnan = any (isnan ([X y]), 2); + Xc = X(!wasnan, :); + yc = y(!wasnan); + + n = rows (Xc); + p = columns (Xc); + + if (! isempty (Keep)) + X_forced = find (Keep); + else + X_forced = []; + endif + + free_idx = setdiff (1:p, X_forced); + + if (strcmp (Scale, "on")) + muX = mean (Xc, 1); + sigX = std (Xc, 0, 1); + sigX(sigX == 0) = 1; % prevent division by zero + Xs = (Xc - muX) ./ sigX; + else + Xs = Xc; endif - #decide which variable to drop from regression, if any - dropped = false; - if v > 0 - t_ratio = tinv(1 - premove/2, n - v - 1) / tinv(1 - penter/2, n - v - 1); #estimate the ratio between the z score corresponding to premove to that corresponding to penter - [z_min, i_min] = min(abs(b(2:end)) ./ (bint(2:end, 2) - b(2:end))); - if z_min < t_ratio #drop a variable - dropped = true; - X_use(i_min) = []; - [b, bint, r, rint, stats] = regress(y, [ones(n, 1) X(:, X_use)], penter); - v = v - 1; + if (isempty (free_idx)) + X_use = X_forced; + else + X_step_prev = []; + X_step = []; + + iter = 0; + while (iter < MaxIter) + iter++; + X_step = stepwisefit (yc, Xs(:, free_idx), PEnter, PRemove); + + if (isequal (X_step, X_step_prev)) + break; + endif + + X_step_prev = X_step; + endwhile + + X_use = sort ([X_forced, free_idx(X_step)]); + endif + + ## Validate InModel + + if (! isempty (InModel)) + if (numel (InModel) != p) + error ("stepwisefit1: InModel length must match number of predictors"); endif endif - #terminate if no change in the list of regression variables - if ~added && ~dropped - break + if (isempty (PRemove)) + PRemove = max (PEnter, 0.1); endif - if iter >= max_iters - warning('stepwisefit: maximum iteration count exceeded before convergence') - break + if (PRemove < PEnter) + error ("stepwisefit1: PRemove must be greater than or equal to PEnter"); endif -endwhile + + ## Final regression on selected predictors + Xfinal = [ones(n,1), Xc(:, X_use)]; + [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); + + Rresid = R(:); % freeze residual vector + + ## Allocate outputs + b = zeros (p,1); + se = zeros (p,1); + pval = zeros (p,1); + + df = n - columns (Xfinal); + + ## Included predictors + b(X_use) = B(2:end); + se(X_use) = (BINT(2:end,2) - B(2:end)) ./ tinv (0.975, df); + pval(X_use) = 2 * (1 - tcdf (abs (B(2:end) ./ se(X_use)), df)); + + ## Excluded predictors: conditional refit + excluded = setdiff (1:p, X_use); + + for j = excluded + Xj = [ones(n,1), Xc(:, [X_use j])]; + [Bj, BjINT] = regress (yc, Xj); + + bj = Bj(end); + sej = (BjINT(end,2) - bj) ./ tinv (0.975, n - columns (Xj)); + + b(j) = bj; + se(j) = sej; + pval(j) = 2 * (1 - tcdf (abs (bj / sej), n - columns (Xj))); + endfor + + ## Final model indicator + finalmodel = false (1,p); + finalmodel(X_use) = true; + + ## Stats structure + stats = struct (); + stats.source = "stepwisefit"; + stats.df0 = numel (X_use); + stats.dfe = n - stats.df0 - 1; + stats.SStotal = sum ((yc - mean (yc)).^2); + stats.SSresid = sum (Rresid.^2); + stats.rmse = sqrt (stats.SSresid / stats.dfe); + stats.intercept = B(1); + stats.wasnan = wasnan; + + stats.yr = Rresid; + stats.B = b; + stats.SE = se; + stats.TSTAT = b ./ se; + stats.PVAL = pval; + stats.TSTAT (!isfinite (stats.TSTAT)) = NaN; + + excluded = setdiff (1:p, X_use); +xr = zeros (n, numel (excluded)); + +if (! isempty (X_use)) + Z = [ones(n,1), Xc(:, X_use)]; + P = Z / (Z' * Z) * Z'; % projection matrix + for k = 1:numel (excluded) + j = excluded(k); + xr(:,k) = Xc(:,j) - P * Xc(:,j); + endfor +else + % intercept-only case + for k = 1:numel (excluded) + j = excluded(k); + xr(:,k) = Xc(:,j) - mean (Xc(:,j)); + endfor +endif + +stats.xr = xr; + + covb = NaN (p+1, p+1); + covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); + + idx = [1, X_use + 1]; + covb(idx, idx) = covB; + + stats.covb = covb; + + stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... + / (stats.SSresid / stats.dfe); + + stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); + + history = struct (); + history.in = finalmodel; + history.df0 = stats.df0; + history.rmse = stats.rmse; + + % Coefficient history (excluding intercept) + % MATLAB stores this as p-by-k; here k = 1 + Bhist = zeros (p, 1); + Bhist(finalmodel) = b(finalmodel); + history.B = Bhist; + + ## Placeholders for future phases + nextstep = 0; endfunction + +%!test +%! % S1.2: default full outputs (numeric contract) +%! X = [7 26 6 60; +%! 1 29 15 52; +%! 11 56 8 20; +%! 11 31 8 47; +%! 7 52 6 33; +%! 11 55 9 22; +%! 3 71 17 6; +%! 1 31 22 44; +%! 2 54 18 22; +%! 21 47 4 26; +%! 1 40 23 34; +%! 11 66 9 12; +%! 10 68 8 12]; +%! y = [78.5; 74.3; 104.3; 87.6; 95.9; 109.2; +%! 102.7; 72.5; 93.1; 115.9; 83.8; 113.3; 109.4]; +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! assert (finalmodel, [true false false true]); +%! assert (b, [1.4400; 0.4161; -0.4100; -0.6140], 1e-4); +%! assert (se, [0.1384; 0.1856; 0.1992; 0.0486], 1e-4); +%! assert (pval, [0; 0.0517; 0.0697; 0], 1e-4); +%! assert (stats.rmse, 2.7343, 1e-4); +%! assert (stats.SStotal, 2715.7631, 1e-3); +%! assert (stats.SSresid, 74.7621, 1e-4); +%! assert (stats.df0, 2); +%! assert (stats.dfe, 10); +%! assert (stats.intercept, 103.0974, 1e-4); + +%!test +%! % S3.1 (numeric kernel only): forced baseline selection +%! X = [ +%! 12.0 4 120 95 2600; +%! 11.5 6 200 110 3000; +%! 10.5 8 300 150 3600; +%! 13.0 4 140 100 2800; +%! 12.5 6 180 120 3200; +%! 11.0 8 250 140 3500; +%! 14.0 4 130 98 2700; +%! 13.5 6 210 115 3100; +%! 12.2 8 320 160 3800; +%! 11.8 4 150 105 2900 +%! ]; +%! y = [28; 22; 18; 27; 23; 19; 29; 21; 17; 26]; +%! +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! +%! assert (islogical (finalmodel)); +%! assert (numel (finalmodel) == 5); +%! assert (sum (finalmodel) >= 1); +%! assert (isnumeric (b)); +%! assert (isnumeric (se)); +%! assert (isnumeric (pval)); +%! assert (stats.rmse > 0); +%! assert (isfinite (stats.intercept)); + +%!test +%! X = randn (30, 4); +%! y = randn (30, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! required_fields = { +%! "source", "df0", "dfe", "SStotal", "SSresid", "fstat", "pval", ... +%! "rmse", "xr", "yr", "B", "SE", "TSTAT", "PVAL", "covb", ... +%! "intercept", "wasnan" +%! }; +%! +%! for k = 1:numel (required_fields) +%! assert (isfield (stats, required_fields{k})); +%! endfor + +%!test +%! X = randn (40, 5); +%! y = randn (40, 1); +%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X, y); +%! +%! p = columns (X); +%! n = rows (X(~stats.wasnan, :)); +%! +%! assert (size (stats.yr), [n, 1]); +%! assert (rows (stats.B) == p); +%! assert (rows (stats.SE) == p); +%! assert (rows (stats.TSTAT) == p); +%! assert (rows (stats.PVAL) == p); +%! assert (size (stats.covb), [p+1, p+1]); + +%!test +%! X = randn (25, 3); +%! y = randn (25, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! SSresid_calc = sum (stats.yr .^ 2); +%! assert (SSresid_calc, stats.SSresid, 1e-10); +%! +%! rmse_calc = sqrt (stats.SSresid / stats.dfe); +%! assert (rmse_calc, stats.rmse, 1e-10); + +%!test +%! X = randn (50, 6); +%! y = randn (50, 1); +%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! +%! if (stats.df0 > 0) +%! F_calc = ((stats.SStotal - stats.SSresid) / stats.df0) ... +%! / (stats.SSresid / stats.dfe); +%! +%! assert (F_calc, stats.fstat, 1e-10); +%! assert (stats.pval >= 0 && stats.pval <= 1); +%! else +%! assert (isnan (stats.fstat)); +%! assert (isnan (stats.pval)); +%! endif + + +%!test +%! X = randn (35, 4); +%! y = randn (35, 1); +%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! p = columns (X); +%! k = sum (finalmodel); +%! assert (size (stats.xr, 2) == p - k); +%! assert (all (isfinite (stats.xr(:)))); + +%!test +%! X = randn (35, 4); +%! y = randn (35, 1); +%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! +%! Xc = X(~stats.wasnan, :); +%! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; +%! +%! for j = 1:columns (stats.xr) +%! ortho = Xfinal' * stats.xr(:,j); +%! assert (max (abs (ortho(:))) < 1e-6); +%! endfor + +%!test +%! X = randn (40, 5); +%! y = randn (40, 1); +%! [~,~,~,finalmodel,stats,nextstep,history] = stepwisefit1 (X, y); +%! +%! assert (nextstep == 0); +%! assert (isstruct (history)); +%! assert (isfield (history, "in")); +%! assert (isfield (history, "df0")); +%! assert (isfield (history, "rmse")); +%! assert (isfield (history, "B")); +%! +%! assert (isequal (history.in, finalmodel)); +%! assert (history.df0 == stats.df0); +%! assert (history.rmse == stats.rmse); +%! assert (rows (history.B) == columns (X)); + +%!test +%! X = randn (20,4); +%! y = randn (20,1); +%! stepwisefit1 (X,y,'Keep',[true false true false]); + +%!test +%! X = randn (20,4); +%! y = randn (20,1); +%! try +%! stepwisefit1 (X,y,'Keep',[true false]); +%! error ("Expected error not thrown"); +%! catch +%! assert (true); +%! end_try_catch + +%!test +%! X = randn (30, 4); +%! y = randn (30, 1); +%! keep = [true false false false]; +%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "Keep", keep); +%! assert (finalmodel(1) == true); + +%!test +%! X = randn (40, 6); +%! y = randn (40, 1); +%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "MaxIter", 1); +%! assert (islogical (finalmodel)); + %!test -%! % Sample data from Draper and Smith (n = 13, k = 4) -%! X = [7 1 11 11 7 11 3 1 2 21 1 11 10; ... -%! 26 29 56 31 52 55 71 31 54 47 40 66 68; ... -%! 6 15 8 8 6 9 17 22 18 4 23 9 8; ... -%! 60 52 20 47 33 22 6 44 22 26 34 12 12]'; -%! y = [78.5 74.3 104.3 87.6 95.9 109.2 102.7 72.5 93.1 115.9 83.8 113.3 109.4]'; -%! [X_use, b, bint, r, rint, stats] = stepwisefit(y, X); -%! assert(X_use, [4 1]) -%! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) -%! [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, 0.05, 0.1, "corr"); -%! assert(X_use, [4 1]) -%! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) -%! [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, [], [], "p"); -%! assert(X_use, [4 1]) -%! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) +%! X = randn (50, 5); +%! y = randn (50, 1); +%! [b1] = stepwisefit1 (X, y); +%! [b2] = stepwisefit1 (X, y, "Scale", "on"); +%! assert (rows (b1) == rows (b2)); From b394b3c1236d74c585c7779696725fc9d48e77b2 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 22 Dec 2025 02:15:51 +0530 Subject: [PATCH 18/23] stepwisefit : migration_2 --- inst/stepwisefit.m | 65 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 9a373dafd..518479d17 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -18,12 +18,11 @@ ## this program; if not, see . ## -*- texinfo -*- -## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit1 (@var{X}, @var{y}, @var{varargin}) +## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit (@var{X}, @var{y}, @var{varargin}) ## ## Perform stepwise linear regression with extended diagnostic outputs. ## -## @code{stepwisefit1} is a wrapper around the legacy @code{stepwisefit} -## function that provides additional regression diagnostics, structured +## @code{stepwisefit} provides additional regression diagnostics, structured ## outputs, and partial MATLAB-compatible Name–Value argument handling. ## ## Predictor selection is performed using the existing @code{stepwisefit} @@ -114,16 +113,16 @@ ## @end deftypefn function [b, se, pval, finalmodel, stats, nextstep, history] = ... - stepwisefit1 (X, y, varargin) + stepwisefit (X, y, varargin) ## Input validation (positional) if (nargin < 2) - error ("stepwisefit1: at least two input arguments required"); + error ("stepwisefit: at least two input arguments required"); endif if (! ismatrix (X) || ! isvector (y)) - error ("stepwisefit1: X must be a matrix and y a vector"); + error ("stepwisefit: X must be a matrix and y a vector"); endif y = y(:); @@ -140,7 +139,7 @@ if (mod (numel (varargin), 2) != 0) - error ("stepwisefit1: Name–Value arguments must come in pairs"); + error ("stepwisefit: Name–Value arguments must come in pairs"); endif for k = 1:2:numel (varargin) @@ -148,54 +147,54 @@ value = varargin{k+1}; if (! (ischar (name) || isstring (name))) - error ("stepwisefit1: Name–Value keys must be strings"); + error ("stepwisefit: Name–Value keys must be strings"); endif name = char (name); switch lower (name) case "inmodel" if (! islogical (value)) - error ("stepwisefit1: InModel must be a logical vector"); + error ("stepwisefit: InModel must be a logical vector"); endif InModel = value(:).'; case "display" if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit1: Display must be 'on' or 'off'"); + error ("stepwisefit: Display must be 'on' or 'off'"); endif Display = lower (value); case "penter" if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit1: PEnter must be a scalar strictly between 0 and 1"); + error ("stepwisefit: PEnter must be a scalar strictly between 0 and 1"); endif PEnter = value; case "premove" if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit1: PRemove must be a scalar strictly between 0 and 1"); + error ("stepwisefit: PRemove must be a scalar strictly between 0 and 1"); endif PRemove = value; case "scale" if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit1: Scale must be 'on' or 'off'"); + error ("stepwisefit: Scale must be 'on' or 'off'"); endif Scale = lower (value); case "maxiter" if (! isscalar (value) || value <= 0 || fix (value) != value) - error ("stepwisefit1: MaxIter must be a positive integer"); + error ("stepwisefit: MaxIter must be a positive integer"); endif MaxIter = value; case "keep" if (! islogical (value)) - error ("stepwisefit1: Keep must be a logical vector"); + error ("stepwisefit: Keep must be a logical vector"); endif Keep = value(:).'; otherwise - error ("stepwisefit1: Name–Value option '%s' not supported", name); + error ("stepwisefit: Name–Value option '%s' not supported", name); endswitch endfor @@ -250,7 +249,7 @@ if (! isempty (InModel)) if (numel (InModel) != p) - error ("stepwisefit1: InModel length must match number of predictors"); + error ("stepwisefit: InModel length must match number of predictors"); endif endif @@ -259,7 +258,7 @@ endif if (PRemove < PEnter) - error ("stepwisefit1: PRemove must be greater than or equal to PEnter"); + error ("stepwisefit: PRemove must be greater than or equal to PEnter"); endif @@ -385,7 +384,7 @@ %! 10 68 8 12]; %! y = [78.5; 74.3; 104.3; 87.6; 95.9; 109.2; %! 102.7; 72.5; 93.1; 115.9; 83.8; 113.3; 109.4]; -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! [b,se,pval,finalmodel,stats] = stepwisefit (X,y); %! assert (finalmodel, [true false false true]); %! assert (b, [1.4400; 0.4161; -0.4100; -0.6140], 1e-4); %! assert (se, [0.1384; 0.1856; 0.1992; 0.0486], 1e-4); @@ -413,7 +412,7 @@ %! ]; %! y = [28; 22; 18; 27; 23; 19; 29; 21; 17; 26]; %! -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X,y); +%! [b,se,pval,finalmodel,stats] = stepwisefit (X,y); %! %! assert (islogical (finalmodel)); %! assert (numel (finalmodel) == 5); @@ -427,7 +426,7 @@ %!test %! X = randn (30, 4); %! y = randn (30, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! [~,~,~,~,stats] = stepwisefit (X, y); %! %! required_fields = { %! "source", "df0", "dfe", "SStotal", "SSresid", "fstat", "pval", ... @@ -442,7 +441,7 @@ %!test %! X = randn (40, 5); %! y = randn (40, 1); -%! [b,se,pval,finalmodel,stats] = stepwisefit1 (X, y); +%! [b,se,pval,finalmodel,stats] = stepwisefit (X, y); %! %! p = columns (X); %! n = rows (X(~stats.wasnan, :)); @@ -457,7 +456,7 @@ %!test %! X = randn (25, 3); %! y = randn (25, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! [~,~,~,~,stats] = stepwisefit (X, y); %! %! SSresid_calc = sum (stats.yr .^ 2); %! assert (SSresid_calc, stats.SSresid, 1e-10); @@ -468,7 +467,7 @@ %!test %! X = randn (50, 6); %! y = randn (50, 1); -%! [~,~,~,~,stats] = stepwisefit1 (X, y); +%! [~,~,~,~,stats] = stepwisefit (X, y); %! %! if (stats.df0 > 0) %! F_calc = ((stats.SStotal - stats.SSresid) / stats.df0) ... @@ -485,7 +484,7 @@ %!test %! X = randn (35, 4); %! y = randn (35, 1); -%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! [~,~,~,finalmodel,stats] = stepwisefit (X, y); %! p = columns (X); %! k = sum (finalmodel); %! assert (size (stats.xr, 2) == p - k); @@ -494,7 +493,7 @@ %!test %! X = randn (35, 4); %! y = randn (35, 1); -%! [~,~,~,finalmodel,stats] = stepwisefit1 (X, y); +%! [~,~,~,finalmodel,stats] = stepwisefit (X, y); %! %! Xc = X(~stats.wasnan, :); %! Xfinal = [ones(rows (Xc),1), Xc(:, finalmodel)]; @@ -507,7 +506,7 @@ %!test %! X = randn (40, 5); %! y = randn (40, 1); -%! [~,~,~,finalmodel,stats,nextstep,history] = stepwisefit1 (X, y); +%! [~,~,~,finalmodel,stats,nextstep,history] = stepwisefit (X, y); %! %! assert (nextstep == 0); %! assert (isstruct (history)); @@ -524,13 +523,13 @@ %!test %! X = randn (20,4); %! y = randn (20,1); -%! stepwisefit1 (X,y,'Keep',[true false true false]); +%! stepwisefit (X,y,'Keep',[true false true false]); %!test %! X = randn (20,4); %! y = randn (20,1); %! try -%! stepwisefit1 (X,y,'Keep',[true false]); +%! stepwisefit (X,y,'Keep',[true false]); %! error ("Expected error not thrown"); %! catch %! assert (true); @@ -540,18 +539,18 @@ %! X = randn (30, 4); %! y = randn (30, 1); %! keep = [true false false false]; -%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "Keep", keep); +%! [~,~,~,finalmodel] = stepwisefit (X, y, "Keep", keep); %! assert (finalmodel(1) == true); %!test %! X = randn (40, 6); %! y = randn (40, 1); -%! [~,~,~,finalmodel] = stepwisefit1 (X, y, "MaxIter", 1); +%! [~,~,~,finalmodel] = stepwisefit (X, y, "MaxIter", 1); %! assert (islogical (finalmodel)); %!test %! X = randn (50, 5); %! y = randn (50, 1); -%! [b1] = stepwisefit1 (X, y); -%! [b2] = stepwisefit1 (X, y, "Scale", "on"); +%! [b1] = stepwisefit (X, y); +%! [b2] = stepwisefit (X, y, "Scale", "on"); %! assert (rows (b1) == rows (b2)); From 4fee699dab6dce7fd5a01ce8f5707531aaef73e8 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 22 Dec 2025 03:30:18 +0530 Subject: [PATCH 19/23] stepwisefit : migration_3 --- inst/stepwisefit.m | 199 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 160 insertions(+), 39 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 518479d17..72ec172c9 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -25,8 +25,9 @@ ## @code{stepwisefit} provides additional regression diagnostics, structured ## outputs, and partial MATLAB-compatible Name–Value argument handling. ## -## Predictor selection is performed using the existing @code{stepwisefit} -## algorithm. After variable selection, the final regression model is +## Predictor selection is performed internally using a MATLAB-compatible stepwise +## procedure based on conditional p-values. +## After variable selection, the final regression model is ## explicitly refit using @code{regress} to compute coefficient estimates ## and inferential statistics for both included and excluded predictors. ## @@ -115,6 +116,14 @@ function [b, se, pval, finalmodel, stats, nextstep, history] = ... stepwisefit (X, y, varargin) + b = []; + se = []; + pval = []; + finalmodel = []; + stats = struct (); + nextstep = 0; + history = struct (); + ## Input validation (positional) if (nargin < 2) @@ -207,13 +216,26 @@ n = rows (Xc); p = columns (Xc); - if (! isempty (Keep)) - X_forced = find (Keep); - else - X_forced = []; + % Validate Keep and InModel lengths if provided + if (! isempty (Keep) && numel (Keep) != p) + error ("stepwisefit: Keep length must match number of predictors"); + endif + if (! isempty (InModel) && numel (InModel) != p) + error ("stepwisefit: InModel length must match number of predictors"); endif - free_idx = setdiff (1:p, X_forced); + if (isempty (Keep)) + Keep = false(1, p); + endif + + % Default PRemove if unset + if (isempty (PRemove)) + PRemove = max (PEnter, 0.1); + endif + + if (PRemove < PEnter) + error ("stepwisefit: PRemove must be greater than or equal to PEnter"); + endif if (strcmp (Scale, "on")) muX = mean (Xc, 1); @@ -224,43 +246,132 @@ Xs = Xc; endif - if (isempty (free_idx)) - X_use = X_forced; + ## Validate InModel + + if (! isempty (InModel)) + cur = logical (InModel(:).'); else - X_step_prev = []; - X_step = []; + cur = false (1, p); + endif + + % Ensure Keep predictors are always in model (already set) + cur(Keep) = true; + prev = cur; + iter = 0; + + % Iterative selection: each iteration attempts ADD then REMOVE. + while (iter < MaxIter) + iter = iter + 1; + + % ----------------------- + % ADD phase: evaluate candidates by conditional p-value + % ----------------------- + candidates = find (~cur); + if (! isempty (candidates)) + best_p = Inf; + best_j = -1; + cols = find (cur); % current included predictors (may be empty) + + for idx = 1:numel (candidates) + j = candidates(idx); + + % Build trial design: intercept, current included (if any), candidate j + if (isempty (cols)) + Xtry = [ones(n,1), Xs(:, j)]; + else + Xtry = [ones(n,1), Xs(:, cols), Xs(:, j)]; + endif + + % Regress and compute candidate p-value; skip singular/failed fits + try + [btry, binttry] = regress (yc, Xtry); + catch + continue; + end_try_catch + + df_try = n - columns (Xtry); + if (df_try <= 0) + continue; + endif + + se_try = (binttry(end,2) - btry(end)) / tinv (0.975, df_try); + if (se_try <= 0 || ! isfinite (se_try)) + continue; + endif - iter = 0; - while (iter < MaxIter) - iter++; - X_step = stepwisefit (yc, Xs(:, free_idx), PEnter, PRemove); + tstat = btry(end) / se_try; + p_candidate = 2 * (1 - tcdf (abs (tstat), df_try)); - if (isequal (X_step, X_step_prev)) - break; + % Deterministic tie-break: first encountered when nearly equal + if (p_candidate < best_p - eps) + best_p = p_candidate; + best_j = j; + endif + endfor + + % Add best candidate only if it meets the PEnter threshold + if (best_j > 0 && best_p < PEnter) + cur(best_j) = true; endif + endif - X_step_prev = X_step; - endwhile + % ----------------------- + % REMOVE phase: compute conditional p-values for included predictors, + % remove worst (largest p) among removable predictors + % ----------------------- + included = find (cur); + removable = setdiff (included, find (Keep)); % never remove Keep + + if (! isempty (removable)) + Xfull = [ones(n,1), Xs(:, included)]; + + % Regress current full model; guard against singular / failed fits + try + [bfull, bintfull] = regress (yc, Xfull); + catch + bfull = NaN (columns (Xfull), 1); + bintfull = NaN (columns (Xfull), 2); + end_try_catch + + df_full = n - columns (Xfull); + pvals_included = Inf (1, numel (included)); + + for ii = 1:numel (included) + if (df_full <= 0) + pvals_included(ii) = Inf; + else + se_i = (bintfull(ii+1,2) - bfull(ii+1)) / tinv (0.975, df_full); + if (se_i > 0 && isfinite (se_i)) + t_i = bfull(ii+1) / se_i; + pvals_included(ii) = 2 * (1 - tcdf (abs (t_i), df_full)); + else + pvals_included(ii) = Inf; + endif + endif + endfor - X_use = sort ([X_forced, free_idx(X_step)]); - endif + % Map removable predictors to positions within 'included' + removable_positions = arrayfun (@(x) find (included == x, 1), removable); + [maxp, pos] = max (pvals_included(removable_positions)); - ## Validate InModel - - if (! isempty (InModel)) - if (numel (InModel) != p) - error ("stepwisefit: InModel length must match number of predictors"); + if (maxp > PRemove) + cur(removable(pos)) = false; + endif endif - endif - if (isempty (PRemove)) - PRemove = max (PEnter, 0.1); - endif + % ----------------------- + % Convergence: structural (model unchanged) + % ----------------------- + if (isequal (cur, prev)) + break; + endif + + prev = cur; + endwhile - if (PRemove < PEnter) - error ("stepwisefit: PRemove must be greater than or equal to PEnter"); - endif + % Final set of selected predictors + X_use = find (cur); ## Final regression on selected predictors Xfinal = [ones(n,1), Xc(:, X_use)]; @@ -275,6 +386,12 @@ df = n - columns (Xfinal); + if (df <= 0) + % Not enough residual degrees of freedom to estimate SE reliably. + se(:) = NaN; + pval(:) = NaN; + endif + ## Included predictors b(X_use) = B(2:end); se(X_use) = (BINT(2:end,2) - B(2:end)) ./ tinv (0.975, df); @@ -338,17 +455,21 @@ stats.xr = xr; covb = NaN (p+1, p+1); - covB = (stats.rmse^2) * inv (Xfinal' * Xfinal); + covB = (stats.rmse^2) * pinv (Xfinal' * Xfinal); idx = [1, X_use + 1]; covb(idx, idx) = covB; stats.covb = covb; - - stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... - / (stats.SSresid / stats.dfe); - - stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); + + if (stats.df0 > 0) + stats.fstat = ((stats.SStotal - stats.SSresid) / stats.df0) ... + / (stats.SSresid / stats.dfe); + stats.pval = 1 - fcdf (stats.fstat, stats.df0, stats.dfe); + else + stats.fstat = NaN; + stats.pval = NaN; + endif history = struct (); history.in = finalmodel; From 5602dd80fd9ab9103374a29698fca8568afa9203 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Mon, 22 Dec 2025 03:44:38 +0530 Subject: [PATCH 20/23] stepwisefit : textinfo update --- inst/stepwisefit.m | 67 ++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 72ec172c9..9bcdc6f6f 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -16,21 +16,28 @@ ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . - ## -*- texinfo -*- -## @deftypefn {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history}} = stepwisefit (@var{X}, @var{y}, @var{varargin}) +## @deftypefn {statistics} {} stepwisefit (@var{X}, @var{y}) +## @deftypefnx {statistics} {@var{b} =} stepwisefit (@var{X}, @var{y}) +## @deftypefnx {statistics} {@var{b}, @var{se}, @var{pval}, @var{finalmodel}, @var{stats}, @var{nextstep}, @var{history} =} stepwisefit (@var{X}, @var{y}, @var{varargin}) ## -## Perform stepwise linear regression with extended diagnostic outputs. +## Perform stepwise linear regression using conditional p-value criteria. ## -## @code{stepwisefit} provides additional regression diagnostics, structured -## outputs, and partial MATLAB-compatible Name–Value argument handling. +## @code{stepwisefit} fits a linear regression model to response vector +## @var{y} using predictor matrix @var{X} and performs stepwise variable +## selection based on hypothesis tests for individual regression coefficients. ## -## Predictor selection is performed internally using a MATLAB-compatible stepwise -## procedure based on conditional p-values. -## After variable selection, the final regression model is -## explicitly refit using @code{regress} to compute coefficient estimates -## and inferential statistics for both included and excluded predictors. +## At each iteration, predictors not currently in the model are tested for +## inclusion using partial F- or t-tests. The predictor with the smallest +## p-value below the entry threshold is added. Predictors currently in the +## model (excluding forced predictors) are then tested for removal, and the +## predictor with the largest p-value exceeding the removal threshold is +## removed. The procedure repeats until the model stabilizes or the maximum +## number of iterations is reached. ## +## After variable selection, the final regression model is refit using +## @code{regress} to compute coefficient estimates and inferential statistics +## for both included and excluded predictors. ## ## @subheading Arguments ## @@ -42,28 +49,29 @@ ## @var{y} is an @var{n}-by-1 numeric response vector. ## ## @item -## Optional Name–Value pairs may be provided to control model selection. +## Optional Name–Value pairs may be supplied to control the stepwise +## selection procedure. ## @end itemize ## ## @subheading Name–Value Arguments ## ## @table @asis ## @item @qcode{"InModel"} -## Logical row vector of length @var{p} specifying an initial model. -## Predictors marked @code{true} are treated as initially included. +## Logical row vector of length @var{p} specifying predictors that are initially +## included in the model. ## ## @item @qcode{"Keep"} -## Logical row vector of length @var{p} specifying predictors that must -## always be included in the final model. +## Logical row vector of length @var{p} specifying predictors that must remain +## in the model and are never removed during stepwise selection. ## ## @item @qcode{"PEnter"} -## Scalar significance level in the open interval (0,1) specifying the -## entry threshold for stepwise selection. Default is @code{0.05}. +## Scalar significance level in the open interval (0,1) specifying the maximum +## p-value required for a predictor to enter the model. Default is @code{0.05}. ## ## @item @qcode{"PRemove"} -## Scalar significance level in the open interval (0,1) specifying the -## removal threshold for stepwise selection. If not specified, a default -## value greater than or equal to @qcode{"PEnter"} is used. +## Scalar significance level in the open interval (0,1) specifying the minimum +## p-value required for a predictor to be removed from the model. If not +## specified, a default value greater than or equal to @qcode{"PEnter"} is used. ## ## @item @qcode{"MaxIter"} ## Positive integer specifying the maximum number of stepwise iterations. @@ -71,12 +79,12 @@ ## ## @item @qcode{"Scale"} ## Either @qcode{"on"} or @qcode{"off"}. When enabled, predictors are -## standardized prior to stepwise selection only. Final coefficients -## are always reported on the original data scale. +## standardized prior to stepwise selection only. Final regression +## coefficients are always reported on the original data scale. ## ## @item @qcode{"Display"} -## Either @qcode{"on"} or @qcode{"off"}. Currently accepted for interface -## compatibility but does not affect output. +## Either @qcode{"on"} or @qcode{"off"}. Accepted for compatibility but +## currently does not affect output. ## @end table ## ## @subheading Return Values @@ -84,7 +92,7 @@ ## @itemize @bullet ## @item ## @var{b} is a @var{p}-by-1 vector of regression coefficients. Coefficients -## corresponding to excluded predictors are estimated conditionally. +## for excluded predictors are computed conditionally. ## ## @item ## @var{se} is a @var{p}-by-1 vector of standard errors. @@ -93,8 +101,8 @@ ## @var{pval} is a @var{p}-by-1 vector of two-sided p-values. ## ## @item -## @var{finalmodel} is a logical row vector indicating predictors selected -## in the final model. +## @var{finalmodel} is a logical row vector indicating which predictors are +## included in the final model. ## ## @item ## @var{stats} is a structure containing regression diagnostics, including @@ -102,8 +110,8 @@ ## F-statistic, and related quantities. ## ## @item -## @var{nextstep} is a scalar placeholder indicating whether an additional -## stepwise iteration is recommended. Currently always zero. +## @var{nextstep} is a scalar indicating whether an additional stepwise +## iteration is recommended. Currently always zero. ## ## @item ## @var{history} is a structure summarizing the final model state, including @@ -113,6 +121,7 @@ ## @seealso{regress} ## @end deftypefn + function [b, se, pval, finalmodel, stats, nextstep, history] = ... stepwisefit (X, y, varargin) From 14a641dd8854c5405f6a315c173bb5985723f85f Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 23 Dec 2025 20:59:06 +0530 Subject: [PATCH 21/23] stepwisefit: use pairedArgs, fix comment style, clean up tests --- inst/stepwisefit.m | 168 ++++++++++++++------------------------------- 1 file changed, 51 insertions(+), 117 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 9bcdc6f6f..35a901af1 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -148,74 +148,30 @@ ## Parse Name–Value pairs InModel = []; Display = "on"; - % MATLAB-compatible defaults + ## MATLAB-compatible defaults PEnter = 0.05; PRemove = []; Scale = "off"; MaxIter = Inf; Keep = []; + ## Parse Name-Value paired arguments using pairedArgs + optNames = {"InModel", "Display", "PEnter", "PRemove", ... + "Scale", "MaxIter", "Keep"}; + dfValues = {[], "on", 0.05, [], "off", Inf, []}; - if (mod (numel (varargin), 2) != 0) - error ("stepwisefit: Name–Value arguments must come in pairs"); - endif - - for k = 1:2:numel (varargin) - name = varargin{k}; - value = varargin{k+1}; - - if (! (ischar (name) || isstring (name))) - error ("stepwisefit: Name–Value keys must be strings"); - endif - name = char (name); - - switch lower (name) - case "inmodel" - if (! islogical (value)) - error ("stepwisefit: InModel must be a logical vector"); - endif - InModel = value(:).'; - case "display" - if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit: Display must be 'on' or 'off'"); - endif - Display = lower (value); - - case "penter" - if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit: PEnter must be a scalar strictly between 0 and 1"); - endif - PEnter = value; - - case "premove" - if (! isscalar (value) || ! isnumeric (value) || value <= 0 || value >= 1) - error ("stepwisefit: PRemove must be a scalar strictly between 0 and 1"); - endif - PRemove = value; + [InModel, Display, PEnter, PRemove, Scale, MaxIter, Keep, args] = ... + pairedArgs (optNames, dfValues, varargin(:)); - case "scale" - if (! any (strcmpi (value, {"on", "off"}))) - error ("stepwisefit: Scale must be 'on' or 'off'"); - endif - Scale = lower (value); - - case "maxiter" - if (! isscalar (value) || value <= 0 || fix (value) != value) - error ("stepwisefit: MaxIter must be a positive integer"); - endif - MaxIter = value; - - case "keep" - if (! islogical (value)) - error ("stepwisefit: Keep must be a logical vector"); - endif - Keep = value(:).'; - - otherwise - error ("stepwisefit: Name–Value option '%s' not supported", name); + ## Validate Display + if (! any (strcmpi (Display, {"on", "off"}))) + error ("stepwisefit: Display must be 'on' or 'off'"); + endif + Display = lower (Display); - endswitch - endfor + if (! isempty (args)) + error ("stepwisefit: unrecognized input arguments"); + endif ## Handle missing values wasnan = any (isnan ([X y]), 2); @@ -225,7 +181,7 @@ n = rows (Xc); p = columns (Xc); - % Validate Keep and InModel lengths if provided + ## Validate Keep and InModel lengths if provided if (! isempty (Keep) && numel (Keep) != p) error ("stepwisefit: Keep length must match number of predictors"); endif @@ -237,7 +193,7 @@ Keep = false(1, p); endif - % Default PRemove if unset + ## Default PRemove if unset if (isempty (PRemove)) PRemove = max (PEnter, 0.1); endif @@ -246,10 +202,16 @@ error ("stepwisefit: PRemove must be greater than or equal to PEnter"); endif + ## Validate Scale + if (! any (strcmpi (Scale, {"on", "off"}))) + error ("stepwisefit: Scale must be 'on' or 'off'"); + endif + Scale = lower (Scale); + if (strcmp (Scale, "on")) muX = mean (Xc, 1); sigX = std (Xc, 0, 1); - sigX(sigX == 0) = 1; % prevent division by zero + sigX(sigX == 0) = 1; ## prevent division by zero Xs = (Xc - muX) ./ sigX; else Xs = Xc; @@ -263,35 +225,33 @@ cur = false (1, p); endif - % Ensure Keep predictors are always in model (already set) + ## Ensure Keep predictors are always in model (already set) cur(Keep) = true; prev = cur; iter = 0; - % Iterative selection: each iteration attempts ADD then REMOVE. + ## Iterative selection: each iteration attempts ADD then REMOVE. while (iter < MaxIter) iter = iter + 1; - % ----------------------- - % ADD phase: evaluate candidates by conditional p-value - % ----------------------- + ## ADD phase: evaluate candidates by conditional p-value candidates = find (~cur); if (! isempty (candidates)) best_p = Inf; best_j = -1; - cols = find (cur); % current included predictors (may be empty) + cols = find (cur); ## current included predictors (may be empty) for idx = 1:numel (candidates) j = candidates(idx); - % Build trial design: intercept, current included (if any), candidate j + ## Build trial design: intercept, current included (if any), candidate j if (isempty (cols)) Xtry = [ones(n,1), Xs(:, j)]; else Xtry = [ones(n,1), Xs(:, cols), Xs(:, j)]; endif - % Regress and compute candidate p-value; skip singular/failed fits + ## Regress and compute candidate p-value; skip singular/failed fits try [btry, binttry] = regress (yc, Xtry); catch @@ -311,30 +271,28 @@ tstat = btry(end) / se_try; p_candidate = 2 * (1 - tcdf (abs (tstat), df_try)); - % Deterministic tie-break: first encountered when nearly equal + ## Deterministic tie-break: first encountered when nearly equal if (p_candidate < best_p - eps) best_p = p_candidate; best_j = j; endif endfor - % Add best candidate only if it meets the PEnter threshold + ## Add best candidate only if it meets the PEnter threshold if (best_j > 0 && best_p < PEnter) cur(best_j) = true; endif endif - % ----------------------- - % REMOVE phase: compute conditional p-values for included predictors, - % remove worst (largest p) among removable predictors - % ----------------------- + ## REMOVE phase: compute conditional p-values for included predictors, + ## remove worst (largest p) among removable predictors included = find (cur); - removable = setdiff (included, find (Keep)); % never remove Keep + removable = setdiff (included, find (Keep)); ## never remove Keep if (! isempty (removable)) Xfull = [ones(n,1), Xs(:, included)]; - % Regress current full model; guard against singular / failed fits + ## Regress current full model; guard against singular / failed fits try [bfull, bintfull] = regress (yc, Xfull); catch @@ -359,7 +317,7 @@ endif endfor - % Map removable predictors to positions within 'included' + ## Map removable predictors to positions within 'included' removable_positions = arrayfun (@(x) find (included == x, 1), removable); [maxp, pos] = max (pvals_included(removable_positions)); @@ -368,9 +326,7 @@ endif endif - % ----------------------- - % Convergence: structural (model unchanged) - % ----------------------- + ## Convergence: structural (model unchanged) if (isequal (cur, prev)) break; endif @@ -379,14 +335,15 @@ endwhile - % Final set of selected predictors + ## Final set of selected predictors X_use = find (cur); ## Final regression on selected predictors Xfinal = [ones(n,1), Xc(:, X_use)]; - [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); + ## regstats intentionally unused; retained for MATLAB parity + [B, BINT, R, RINT, regstats] = regress (yc, Xfinal); - Rresid = R(:); % freeze residual vector + Rresid = R(:); ## freeze residual vector ## Allocate outputs b = zeros (p,1); @@ -396,7 +353,7 @@ df = n - columns (Xfinal); if (df <= 0) - % Not enough residual degrees of freedom to estimate SE reliably. + ## Not enough residual degrees of freedom to estimate SE reliably. se(:) = NaN; pval(:) = NaN; endif @@ -448,13 +405,13 @@ if (! isempty (X_use)) Z = [ones(n,1), Xc(:, X_use)]; - P = Z / (Z' * Z) * Z'; % projection matrix + P = Z / (Z' * Z) * Z'; ## projection matrix for k = 1:numel (excluded) j = excluded(k); xr(:,k) = Xc(:,j) - P * Xc(:,j); endfor else - % intercept-only case + ## intercept-only case for k = 1:numel (excluded) j = excluded(k); xr(:,k) = Xc(:,j) - mean (Xc(:,j)); @@ -485,8 +442,8 @@ history.df0 = stats.df0; history.rmse = stats.rmse; - % Coefficient history (excluding intercept) - % MATLAB stores this as p-by-k; here k = 1 + ## Coefficient history (excluding intercept) + ## MATLAB stores this as p-by-k; here k = 1 Bhist = zeros (p, 1); Bhist(finalmodel) = b(finalmodel); history.B = Bhist; @@ -496,9 +453,7 @@ endfunction - %!test -%! % S1.2: default full outputs (numeric contract) %! X = [7 26 6 60; %! 1 29 15 52; %! 11 56 8 20; @@ -525,9 +480,7 @@ %! assert (stats.df0, 2); %! assert (stats.dfe, 10); %! assert (stats.intercept, 103.0974, 1e-4); - %!test -%! % S3.1 (numeric kernel only): forced baseline selection %! X = [ %! 12.0 4 120 95 2600; %! 11.5 6 200 110 3000; @@ -552,7 +505,6 @@ %! assert (isnumeric (pval)); %! assert (stats.rmse > 0); %! assert (isfinite (stats.intercept)); - %!test %! X = randn (30, 4); %! y = randn (30, 1); @@ -567,7 +519,6 @@ %! for k = 1:numel (required_fields) %! assert (isfield (stats, required_fields{k})); %! endfor - %!test %! X = randn (40, 5); %! y = randn (40, 1); @@ -582,7 +533,6 @@ %! assert (rows (stats.TSTAT) == p); %! assert (rows (stats.PVAL) == p); %! assert (size (stats.covb), [p+1, p+1]); - %!test %! X = randn (25, 3); %! y = randn (25, 1); @@ -593,7 +543,6 @@ %! %! rmse_calc = sqrt (stats.SSresid / stats.dfe); %! assert (rmse_calc, stats.rmse, 1e-10); - %!test %! X = randn (50, 6); %! y = randn (50, 1); @@ -609,8 +558,6 @@ %! assert (isnan (stats.fstat)); %! assert (isnan (stats.pval)); %! endif - - %!test %! X = randn (35, 4); %! y = randn (35, 1); @@ -619,7 +566,6 @@ %! k = sum (finalmodel); %! assert (size (stats.xr, 2) == p - k); %! assert (all (isfinite (stats.xr(:)))); - %!test %! X = randn (35, 4); %! y = randn (35, 1); @@ -632,7 +578,6 @@ %! ortho = Xfinal' * stats.xr(:,j); %! assert (max (abs (ortho(:))) < 1e-6); %! endfor - %!test %! X = randn (40, 5); %! y = randn (40, 1); @@ -644,43 +589,32 @@ %! assert (isfield (history, "df0")); %! assert (isfield (history, "rmse")); %! assert (isfield (history, "B")); -%! %! assert (isequal (history.in, finalmodel)); %! assert (history.df0 == stats.df0); %! assert (history.rmse == stats.rmse); %! assert (rows (history.B) == columns (X)); - %!test %! X = randn (20,4); %! y = randn (20,1); %! stepwisefit (X,y,'Keep',[true false true false]); - -%!test -%! X = randn (20,4); -%! y = randn (20,1); -%! try -%! stepwisefit (X,y,'Keep',[true false]); -%! error ("Expected error not thrown"); -%! catch -%! assert (true); -%! end_try_catch - %!test %! X = randn (30, 4); %! y = randn (30, 1); %! keep = [true false false false]; %! [~,~,~,finalmodel] = stepwisefit (X, y, "Keep", keep); %! assert (finalmodel(1) == true); - %!test %! X = randn (40, 6); %! y = randn (40, 1); %! [~,~,~,finalmodel] = stepwisefit (X, y, "MaxIter", 1); %! assert (islogical (finalmodel)); - %!test %! X = randn (50, 5); %! y = randn (50, 1); %! [b1] = stepwisefit (X, y); %! [b2] = stepwisefit (X, y, "Scale", "on"); %! assert (rows (b1) == rows (b2)); +%!test +%! X = randn (20,4); +%! y = randn (20,1); +%! fail ("stepwisefit (X,y,'Keep',[true false])"); From a4d9bca8e194440e7b87c3fc996ee49ff89d2916 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 23 Dec 2025 23:39:16 +0530 Subject: [PATCH 22/23] stepwisefit: input validation and tests --- inst/stepwisefit.m | 91 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 35a901af1..6fe97f661 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -163,12 +163,56 @@ [InModel, Display, PEnter, PRemove, Scale, MaxIter, Keep, args] = ... pairedArgs (optNames, dfValues, varargin(:)); + ## Semantic validation for Name-Value options + ## Validate Display if (! any (strcmpi (Display, {"on", "off"}))) error ("stepwisefit: Display must be 'on' or 'off'"); endif Display = lower (Display); + ## Validate Scale + if (! any (strcmpi (Scale, {"on", "off"}))) + error ("stepwisefit: Scale must be 'on' or 'off'"); + endif + Scale = lower (Scale); + + ## Validate PEnter + if (! (isscalar (PEnter) && isnumeric (PEnter) && PEnter > 0 && PEnter < 1)) + error ("stepwisefit: PEnter must be a scalar strictly between 0 and 1"); + endif + + ## Validate PRemove (if provided) + if (! isempty (PRemove)) + if (! (isscalar (PRemove) && isnumeric (PRemove) && PRemove > 0 && PRemove < 1)) + error ("stepwisefit: PRemove must be a scalar strictly between 0 and 1"); + endif + if (PRemove < PEnter) + error ("stepwisefit: PRemove must be greater than or equal to PEnter"); + endif + endif + + ## Validate MaxIter + if (! (isscalar (MaxIter) && isnumeric (MaxIter) && MaxIter > 0 && fix (MaxIter) == MaxIter)) + error ("stepwisefit: MaxIter must be a positive integer"); + endif + + ## Validate Keep and InModel type (if provided) + if (! isempty (Keep) && ! islogical (Keep)) + error ("stepwisefit: Keep must be a logical vector"); + endif + if (! isempty (InModel) && ! islogical (InModel)) + error ("stepwisefit: InModel must be a logical vector"); + endif + + ## Validate lengths (these already exist in your file, but keep them here for order) + if (! isempty (Keep) && numel (Keep) != p) + error ("stepwisefit: Keep length must match number of predictors"); + endif + if (! isempty (InModel) && numel (InModel) != p) + error ("stepwisefit: InModel length must match number of predictors"); + endif + if (! isempty (args)) error ("stepwisefit: unrecognized input arguments"); endif @@ -181,14 +225,6 @@ n = rows (Xc); p = columns (Xc); - ## Validate Keep and InModel lengths if provided - if (! isempty (Keep) && numel (Keep) != p) - error ("stepwisefit: Keep length must match number of predictors"); - endif - if (! isempty (InModel) && numel (InModel) != p) - error ("stepwisefit: InModel length must match number of predictors"); - endif - if (isempty (Keep)) Keep = false(1, p); endif @@ -202,12 +238,6 @@ error ("stepwisefit: PRemove must be greater than or equal to PEnter"); endif - ## Validate Scale - if (! any (strcmpi (Scale, {"on", "off"}))) - error ("stepwisefit: Scale must be 'on' or 'off'"); - endif - Scale = lower (Scale); - if (strcmp (Scale, "on")) muX = mean (Xc, 1); sigX = std (Xc, 0, 1); @@ -618,3 +648,36 @@ %! X = randn (20,4); %! y = randn (20,1); %! fail ("stepwisefit (X,y,'Keep',[true false])"); + +## Test input validation +%!error ... +%! stepwisefit () +%!error ... +%! stepwisefit (ones (2,2,2), [1;2]) +%!error ... +%! stepwisefit (ones (3,2), ones (2,1)) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "UnknownOpt", 5) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "Display", "maybe") +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "Scale", 123) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "PEnter", -0.1) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "PRemove", 1.5) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), ... +%! "PEnter", 0.05, "PRemove", 0.01) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "MaxIter", -2) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "MaxIter", 2.5) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "Keep", [1 0]) +%!error ... +%! stepwisefit (randn (10,2), randn (10,1), "InModel", [1 0]) +%!error ... +%! stepwisefit (randn (10,4), randn (10,1), "Keep", [true false]) +%!error ... +%! stepwisefit (randn (10,4), randn (10,1), "InModel", true) From 2ba21c63c985f814451a81bfd4aa24f2b4287022 Mon Sep 17 00:00:00 2001 From: jayant chauhan <0001jayant@gmail.com> Date: Tue, 30 Dec 2025 13:19:02 +0530 Subject: [PATCH 23/23] small fixes --- inst/stepwisefit.m | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/inst/stepwisefit.m b/inst/stepwisefit.m index 6fe97f661..1e84b71ed 100644 --- a/inst/stepwisefit.m +++ b/inst/stepwisefit.m @@ -145,6 +145,11 @@ y = y(:); + ## Validate row compatibility BEFORE any concatenation +if (rows (X) != rows (y)) + error ("stepwisefit: X must be a matrix and y a vector"); +endif + ## Parse Name–Value pairs InModel = []; Display = "on"; @@ -197,6 +202,14 @@ error ("stepwisefit: MaxIter must be a positive integer"); endif + ## Handle missing values + wasnan = any (isnan ([X y]), 2); + Xc = X(!wasnan, :); + yc = y(!wasnan); + + n = rows (Xc); + p = columns (Xc); + ## Validate Keep and InModel type (if provided) if (! isempty (Keep) && ! islogical (Keep)) error ("stepwisefit: Keep must be a logical vector"); @@ -217,14 +230,6 @@ error ("stepwisefit: unrecognized input arguments"); endif - ## Handle missing values - wasnan = any (isnan ([X y]), 2); - Xc = X(!wasnan, :); - yc = y(!wasnan); - - n = rows (Xc); - p = columns (Xc); - if (isempty (Keep)) Keep = false(1, p); endif