From 7693a17fb882fdee6cbf23920b4c99a15d2ae9ef Mon Sep 17 00:00:00 2001 From: Don Chacko Date: Wed, 24 Dec 2025 20:30:13 +0300 Subject: [PATCH 1/2] randsample.m: tiny doc fix as strcat trims trailing whitespace --- inst/randsample.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/randsample.m b/inst/randsample.m index d96258aa..a4e9f544 100644 --- a/inst/randsample.m +++ b/inst/randsample.m @@ -51,8 +51,8 @@ endif if k < 0 || ( k > n && !replacement ) - error (strcat ("randsample: The input k must be a non-negative ", ... - "integer. Sampling without replacement needs k <= n.")); + error (strcat ("randsample: The input k must be a non-negative", ... + " integer. Sampling without replacement needs k <= n.")); endif if (all (length (w) != [0, n])) From 8efe7a58c8fe0479e45902d453e1bb1bd0af9173 Mon Sep 17 00:00:00 2001 From: Don Chacko Date: Wed, 24 Dec 2025 22:36:16 +0300 Subject: [PATCH 2/2] randsample.m: fix support for categorical vectors, add vector test cases, and style fixes --- inst/randsample.m | 88 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/inst/randsample.m b/inst/randsample.m index a4e9f544..755df4fa 100644 --- a/inst/randsample.m +++ b/inst/randsample.m @@ -1,5 +1,5 @@ ## Copyright (C) 2014 - Nir Krakauer -## Copyright (C) 2022 Andreas Bertsatos +## Copyright (C) 2025 Andreas Bertsatos ## ## This file is part of the statistics package for GNU Octave. ## @@ -24,7 +24,7 @@ ## Sample elements from a vector. ## ## Returns @var{k} random elements from a vector @var{v} with @var{n} elements, -## sampled without or with @var{replacement}. +## sampled without or with @var{replacement}, with an optional weight vector. ## ## If @var{v} is a scalar, samples from 1:@var{v}. ## @@ -38,13 +38,13 @@ ## @seealso{datasample, randperm} ## @end deftypefn -function y = randsample (v, k, replacement=false ,w=[]) +function y = randsample (v, k, replacement=false, w=[]) if (isscalar (v) && isreal (v)) n = v; vector_v = false; elseif (isvector (v)) - n = numel (v); + n = length (v); vector_v = true; else error ("randsample: The input v must be a vector or positive integer."); @@ -57,7 +57,7 @@ if (all (length (w) != [0, n])) error ("randsample: the size w (%d) must match the first argument (%d)", ... - length(w), n); + length (w), n); endif @@ -78,8 +78,8 @@ if (! any (Idup)) break else - Idup(idx) = Idup; # find duplicates in original vector - w(y) = 0; # don't permit resampling + Idup (idx) = Idup; # find duplicates in original vector + w (y) = 0; # don't permit resampling ## remove duplicates, then sample again y = [y(! Idup), (weighted_replacement (sum (Idup), w))]; endif @@ -95,7 +95,7 @@ function y = weighted_replacement (k, w) w = w / sum (w); - w = [0, cumsum(w(:))']; + w = [0, (cumsum (w(:))')]; ## distribute k uniform random deviates based on the given weighting y = arrayfun (@(x) find (w <= x, 1, "last"), rand (1, k)); endfunction @@ -135,3 +135,75 @@ %! assert (size(x), [k 1]); %! x = randsample(k, k, false, 1:k); %! assert (size(x), [1 k]); + +%!test +%! n = 20; +%! k = 5; +%! p = 1:n; +%! x = randsample(p, k); +%! assert (isnumeric(x)); +%! assert (size(x), [1 k]); +%! x = randsample(p, k, true); +%! assert (isnumeric(x)); +%! assert (size(x), [1 k]); +%! x = randsample(p, k, false); +%! assert (isnumeric(x)); +%! assert (size(x), [1 k]); +%! k = 30; +%! x = randsample(p, k, true); +%! assert (isnumeric(x)); +%! assert (size(x), [1 k]); + +%!test +%! p = categorical({'a', 'b', 'c', 'd', 'a'}); +%! k = 3; +%! x = randsample(p, k, true); +%! assert (iscategorical(x)); +%! assert (size(x), [1 k]); +%! x = randsample(p, k, false); +%! assert (iscategorical(x)); +%! assert (size(x), [1 k]); +%! k = 30; +%! x = randsample(p, k, true, ones(length(p),1)); +%! assert (iscategorical(x)); +%! assert (size(x), [1 k]); + +%!test +%! p = {'a', 'b', 'c', 'd', 'a'}; +%! k = 2; +%! x = randsample(p, k, true); +%! assert (iscell(x)); +%! assert (size(x), [1 k]); +%! x = randsample(p, k, false); +%! assert (iscell(x)); +%! assert (size(x), [1 k]); +%! k = 30; +%! x = randsample(p, k, true, ones(length(p),1)); +%! assert (iscell(x)); +%! assert (size(x), [1 k]); + +%!test +%! p = string({'a', 'b', 'c', 'd', 'a'}); +%! k = 2; +%! x = randsample(p, k, true); +%! assert (isstring(x)); +%! assert (size(x), [1 k]); +%! x = randsample(p, k, false); +%! assert (isstring(x)); +%! assert (size(x), [1 k]); +%! k = 30; +%! x = randsample(p, k, true, ones(length(p),1)); +%! assert (isstring(x)); +%! assert (size(x), [1 k]); + +%!error ... +%! randsample ([1 2 3; 1 2 3], 5) + +%!error ... +%! randsample (10, -1) + +%!error ... +%! randsample (10, 100) + +%!error ... +%! randsample (10, 5, false, ones(5,1))