Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,9 @@ replace_dot_alias = function(e) {
allow.cartesian = TRUE
}
# TODO: collect all '==' ops first to speeden up Cnestedid
rightcols = colnamesInt(x, names(on), check_dups=FALSE)
leftcols = colnamesInt(i, unname(on), check_dups=FALSE)
rightcols = colnamesInt(x, names(on), check_dups=FALSE, skip_absent=FALSE, context="x table's columns in on= join")
leftcols = colnamesInt(i, unname(on), check_dups=FALSE,
skip_absent=FALSE, context="i table's columns in on= join")
} else {
## missing on
rightcols = chmatch(key(x), names_x) # NAs here (i.e. invalid data.table) checked in bmerge()
Expand Down Expand Up @@ -2490,7 +2491,7 @@ na.omit.data.table = function(object, cols = seq_along(object), invert = FALSE,
if (!cedta()) return(NextMethod()) # nocov
if ( !missing(invert) && is.na(as.logical(invert)) )
stopf("Argument 'invert' must be logical TRUE/FALSE")
cols = colnamesInt(object, cols, check_dups=FALSE)
cols = colnamesInt(object, cols, check_dups=FALSE, skip_absent=FALSE, context="setnames")
ix = .Call(Cdt_na, object, cols)
# forgot about invert with no NA case, #2660
if (invert) {
Expand Down Expand Up @@ -2653,7 +2654,7 @@ copy = function(x) {

.shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) {
wasnull = is.null(cols)
cols = colnamesInt(x, cols, check_dups=FALSE)
cols = colnamesInt(x, cols, check_dups=FALSE, skip_absent=FALSE, context="column selection")
ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only

if(retain.key){
Expand Down Expand Up @@ -2840,12 +2841,12 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL, skip_absent=
stopf("Provide either before= or after= but not both")
if (length(before)>1L || length(after)>1L)
stopf("before=/after= accept a single column name or number, not more than one")
neworder = colnamesInt(x, neworder, check_dups=FALSE, skip_absent=skip_absent) # dups are now checked inside Csetcolorder below
neworder = colnamesInt(x, neworder, check_dups=FALSE, skip_absent=skip_absent, context="setcolorder") # dups are now checked inside Csetcolorder below
neworder = neworder[neworder != 0L] # tests 498.11, 498.13 fail w/o this
if (length(before))
neworder = c(setdiff(seq_len(colnamesInt(x, before) - 1L), neworder), neworder)
neworder = c(setdiff(seq_len(colnamesInt(x, before, check_dups=FALSE, skip_absent=FALSE, context="setcolorder 'before'") - 1L), neworder), neworder)
if (length(after))
neworder = c(setdiff(seq_len(colnamesInt(x, after)), neworder), neworder)
neworder = c(setdiff(seq_len(colnamesInt(x, after, check_dups=FALSE, skip_absent=FALSE, context="setcolorder 'after'")), neworder), neworder)
if (length(neworder) != length(x)) {
# pad by the missing elements (checks inside Csetcolorder catch other mistakes)
neworder = c(neworder, setdiff(seq_along(x), neworder))
Expand Down Expand Up @@ -3144,7 +3145,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) {
} else if (!length(cols)) {
stopf("x is a list, 'cols' cannot be 0-length.")
}
cols = colnamesInt(x, cols, check_dups=FALSE)
cols = colnamesInt(x, cols, check_dups=FALSE, skip_absent=FALSE, context="column deletion")
ids = .Call(Crleid, x, cols)
if (!is.null(prefix)) ids = paste0(prefix, ids)
ids
Expand Down
2 changes: 1 addition & 1 deletion R/duplicated.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon
## unique.data.table has been refactored to simply call duplicated.data.table
## making the refactor unnecessary, but let's leave it here just in case
.duplicated.helper = function(x, by) {
cols = colnamesInt(x, by, check_dups=FALSE)
cols = colnamesInt(x, by, check_dups=FALSE, skip_absent=FALSE, context="duplicated/unique")
use.keyprefix = if (is.null(by)) FALSE else {
haskey(x) &&
length(by) <= length(key(x)) &&
Expand Down
2 changes: 1 addition & 1 deletion R/frank.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
cols = 1L
x = as_list(x)
} else {
cols = colnamesInt(x, cols, check_dups=TRUE)
cols = colnamesInt(x, cols, check_dups=TRUE, skip_absent=FALSE, context="frank")
if (!length(cols))
stopf("x is a list, 'cols' can not be 0-length")
}
Expand Down
12 changes: 6 additions & 6 deletions R/mergelist.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ onkeys = function(x, y) {

# column index selection helper
someCols = function(x, cols, drop=character(), keep=character(), retain.order=FALSE) {
keep = colnamesInt(x, keep)
drop = colnamesInt(x, drop)
cols = colnamesInt(x, cols)
keep = colnamesInt(x, keep, check_dups=FALSE, skip_absent=FALSE, context="merge(keep=)")
drop = colnamesInt(x, drop, check_dups=FALSE, skip_absent=FALSE, context="merge(drop=)")
cols = colnamesInt(x, cols, check_dups=FALSE, skip_absent=FALSE, context="merge(cols=)")
ans = union(keep, setdiff(cols, drop))
if (!retain.order) return(ans)
sort(ans)
Expand Down Expand Up @@ -104,8 +104,8 @@ dtmerge = function(x, i, on, how, mult, join.many, void=FALSE, verbose) {
stopf("'on' must be non-zero length character vector")
if (mult == "all" && (how == "semi" || how == "anti"))
stopf("semi and anti joins must be used with mult!='all'")
icols = colnamesInt(i, on, check_dups=TRUE)
xcols = colnamesInt(x, on, check_dups=TRUE)
icols = colnamesInt(i, on, check_dups=TRUE, skip_absent=FALSE, context="merge 'i' table columns")
xcols = colnamesInt(x, on, check_dups=TRUE, skip_absent=FALSE, context="merge 'x' table columns")
ans = bmerge(i, x, icols, xcols, roll=0, rollends=c(FALSE, TRUE), nomatch=nomatch, mult=mult, ops=rep.int(1L, length(on)), verbose=verbose)
if (void) { ## void=T is only for the case when we want raise error for mult='error', and that would happen in above line
return(invisible(NULL))
Expand Down Expand Up @@ -338,7 +338,7 @@ mergelist_impl_ = function(l, on, cols, how, mult, join.many, copy) {
}
out.mem = vapply_1c(out, address)
if (copy)
.Call(CcopyCols, out, colnamesInt(out, names(out.mem)[out.mem %chin% unique(unlist(l.mem, recursive=FALSE))]))
.Call(CcopyCols, out, colnamesInt(out, names(out.mem)[out.mem %chin% unique(unlist(l.mem, recursive=FALSE))], check_dups=FALSE, skip_absent=FALSE, context="merge column copy"))
if (verbose)
catf("mergelist: merging %d tables, took %.3fs\n", n, proc.time()[[3L]] - p)
out
Expand Down
2 changes: 1 addition & 1 deletion R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, retStats=retGrp, sort=TRUE,
by = NULL
} else {
if (!length(x)) return(integer(0L)) # e.g. forderv(data.table(NULL)) and forderv(list()) return integer(0L))
by = colnamesInt(x, by, check_dups=FALSE)
by = colnamesInt(x, by, check_dups=FALSE, skip_absent=FALSE, context="setkey")
}
order = as.integer(order) # length and contents of order being +1/-1 is checked at C level
.Call(CforderReuseSorting, x, by, retGrp, retStats, sort, order, na.last, reuseSorting) # returns integer() if already sorted, regardless of sort=TRUE|FALSE
Expand Down
4 changes: 2 additions & 2 deletions R/setops.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE)
if (!is.data.table(x) || !is.data.table(y)) stopf("x and y must both be data.tables")
# !ncol redundant since all 0-column data.tables have 0 rows
if (!nrow(x)) return(x)
by.x = colnamesInt(x, by.x, check_dups=TRUE)
by.x = colnamesInt(x, by.x, check_dups=TRUE, skip_absent=FALSE, context="set operation 'x' columns")
if (!nrow(y)) return(unique(x, by=by.x))
by.y = colnamesInt(y, by.y, check_dups=TRUE)
by.y = colnamesInt(y, by.y, check_dups=TRUE, skip_absent=FALSE, context="set operation 'y' columns")
if (length(by.x) != length(by.y)) stopf("length(by.x) != length(by.y)", domain=NA)
# factor in x should've factor/character in y, and vice-versa
for (a in seq_along(by.x)) {
Expand Down
2 changes: 1 addition & 1 deletion R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fcase = function(..., default=NA) {
.Call(CfcaseR, parent.frame(), arg_list)
}

colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent)
colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE, context=NULL) .Call(CcolnamesInt, x, cols, check_dups, skip_absent, context)

testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L])

Expand Down
10 changes: 7 additions & 3 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2977,7 +2977,7 @@ test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])])
test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])])
test(991, unique(dt, by=NULL), dt[!duplicated(df)])
test(991.1, unique(dt, by=4), error="received non-existing column*.*4")
test(991.2, unique(dt, by=c(1,3.1)), error="is type 'double' and one or more items in it are not whole integers")
test(991.2, unique(dt, by=c(1,3.1)), error="In setkey, argument specifying columns is type 'double' and one or more items in it are not whole integers")
test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])])
test(991.4, unique(dt, by=c('C','D','E')), error="received non-existing column*.*D")

Expand Down Expand Up @@ -21962,9 +21962,13 @@ test(2355.4, fread(txt, skip=0, fill=TRUE), data.table(V1 = c("a1", "b1", "c1"),

# re-overallocate in set if quota is reached #496 #1831 #4100
DT = data.table()
test(2356.1, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = paste0("V",i), value = i); ncol(DT)}, 10L)
test(2356.1, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j=paste0("V",i), value=i); DT}, data.table(V1=1L,V2=2L,V3=3L,V4=4L,V5=5L,V6=6L,V7=7L,V8=8L,V9=9L,V10=10L))
DT = structure(list(a = 1, b = 2), class = c("data.table", "data.frame"))
test(2356.2, options=c(datatable.alloccol=1L), set(DT, j="c", value=3), data.table(a=1, b=2, c=3))
# ensure := and set are consistent if they need to overallocate
DT = data.table(); DT2 = data.table()
test(2356.3, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = sprintf("V%d",i), value = i); DT}, {for (i in seq(10)) DT2[, sprintf("V%d",i) := i]; DT2})
test(2356.3, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j=paste0("V",i), value=i); for (i in seq(10L)) DT2[, (paste0("V",i)):=i]; fsetdiff(DT, DT2)}, data.table(V1=integer(),V2=integer(),V3=integer(),V4=integer(),V5=integer(),V6=integer(),V7=integer(),V8=integer(),V9=integer(),V10=integer()))

# Test for context-aware error messages (Issue #5039)
test(2357, data.table:::colnamesInt(data.table(a=1), "missing", context="test_context"),
error="In test_context, argument specifying columns received non-existing column(s): cols[1]='missing'")
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ SEXP fitsInInt32R(SEXP x);
bool fitsInInt64(SEXP x);
SEXP fitsInInt64R(SEXP x);
bool allNA(SEXP x, bool errorForBadType);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent, SEXP context);
bool INHERITS(SEXP x, SEXP char_);
SEXP copyAsPlain(SEXP x);
void copySharedColumns(SEXP x);
Expand Down
2 changes: 1 addition & 1 deletion src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static const R_CallMethodDef callMethods[] = {
{"CfrollfunR", (DL_FUNC)&frollfunR, -1},
{"CdllVersion", (DL_FUNC)&dllVersion, -1},
{"CnafillR", (DL_FUNC)&nafillR, -1},
{"CcolnamesInt", (DL_FUNC)&colnamesInt, -1},
{"CcolnamesInt", (DL_FUNC)&colnamesInt, 5},
{"CinitLastUpdated", (DL_FUNC)&initLastUpdated, -1},
{"Ccj", (DL_FUNC)&cj, -1},
{"Ccoalesce", (DL_FUNC)&coalesce, -1},
Expand Down
7 changes: 6 additions & 1 deletion src/nafill.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,12 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
SET_VECTOR_ELT(obj, 0, obj1);
}
SEXP ricols = PROTECT(colnamesInt(obj, cols, /* check_dups= */ ScalarLogical(TRUE), /* skip_absent= */ ScalarLogical(FALSE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
// Step 1: Create and protect the source string
SEXP context_str = PROTECT(mkString("nafill")); protecti++;

// Step 2: Call colnamesInt using that string and protect the result
SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE), ScalarLogical(FALSE), context_str));
protecti++;
x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
int *icols = INTEGER(ricols);
for (int i=0; i<length(ricols); i++) {
Expand Down
19 changes: 12 additions & 7 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,18 @@ SEXP allNAR(SEXP x) {
* optionally (check_dups) check for no duplicates
* optionally (skip_absent) skip (return 0) for numbers outside the range or not naming extant columns
*/
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent) {
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent, SEXP context) {
if (!isNewList(x))
error(_("'x' argument must be data.table compatible"));
if (!IS_TRUE_OR_FALSE(check_dups))
error(_("%s must be TRUE or FALSE"), "check_dups");
if (!IS_TRUE_OR_FALSE(skip_absent))
error(_("%s must be TRUE or FALSE"), "skip_absent");
const char *ctx = "unknown";
if (context != R_NilValue && isString(context) && length(context) > 0) {
ctx = CHAR(STRING_ELT(context, 0));
}

int protecti = 0;
R_len_t nx = length(x);
R_len_t nc = length(cols);
Expand All @@ -142,13 +147,13 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent) {
ricols = cols;
} else if (isReal(cols)) {
if (!fitsInInt32(cols))
error(_("argument specifying columns is type 'double' and one or more items in it are not whole integers"));
error(_("In %s, argument specifying columns is type 'double' and one or more items in it are not whole integers"), ctx);
ricols = PROTECT(coerceVector(cols, INTSXP)); protecti++;
}
int *icols = INTEGER(ricols);
for (int i=0; i<nc; ++i) {
if ((!bskip_absent && icols[i]>nx) || (icols[i]<1))
error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also
error(_("In %s, argument specifying columns received non-existing column(s): cols[%d]=%d"), ctx, i+1, icols[i]);// handles NAs also
else if(bskip_absent && icols[i]>nx)
icols[i] = 0L;
}
Expand All @@ -161,14 +166,14 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent) {
if (!bskip_absent) {
for (int i=0; i<nc; ++i) {
if (icols[i]==0)
error(_("argument specifying columns received non-existing column(s): cols[%d]='%s'"), i+1, CHAR(STRING_ELT(cols, i))); // handles NAs also
error(_("In %s, argument specifying columns received non-existing column(s): cols[%d]='%s'"), ctx, i+1, CHAR(STRING_ELT(cols, i)));// handles NAs also
}
}
} else {
error(_("argument specifying columns must be character or numeric"));
}
error(_("In %s, argument specifying columns must be character or numeric"), ctx);
}
if (LOGICAL(check_dups)[0] && any_duplicated(ricols, FALSE))
error(_("argument specifying columns received duplicate column(s)"));
error(_("In %s, argument specifying columns received duplicate column(s)"), ctx);
UNPROTECT(protecti);
return ricols;
}
Expand Down
Loading