Skip to content

Commit

Permalink
allow slow pmatch()ing to be interrupted
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/trunk@85816 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
maechler committed Jan 22, 2024
1 parent e1e7acb commit 2c7a646
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 26 deletions.
4 changes: 4 additions & 0 deletions doc/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@

\item \pkg{tools}\code{:: startDynamicHelp()} now ensures \code{port}
is in proper range, fixing \PR{18645}.

\item \code{pmatch(x, table)} for large \code{table}, also called for
data frame row selection, \code{dfrm[nm, ]}, is now interruptable,
fixing \PR{18656}.
}
}
}
Expand Down
50 changes: 24 additions & 26 deletions src/main/unique.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1997--2023 The R Core Team
* Copyright (C) 1997--2024 The R Core Team
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
*
* This program is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -323,16 +323,14 @@ static hlen vhash(SEXP x, R_xlen_t indx, HashData *d)

static hlen vhash_one(SEXP _this, HashData *d)
{
int i;
unsigned int key;

/* Handle environments by pointer hashing. Previously,
environments were hashed based only on length, which is not
very effective and could be expensive to compute. */
if (TYPEOF(_this) == ENVSXP)
return scatter(PTRHASH(_this), d);

key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
int i;
unsigned int key = OBJECT(_this) + 2*TYPEOF(_this) + 100U*(unsigned int) length(_this);
/* maybe we should also look at attributes, but that slows us down */
switch (TYPEOF(_this)) {
case LGLSXP:
Expand Down Expand Up @@ -1497,8 +1495,8 @@ attribute_hidden SEXP do_match(SEXP call, SEXP op, SEXP args, SEXP env)
{
checkArity(op, args);

if ((!isVector(CAR(args)) && !isNull(CAR(args)))
|| (!isVector(CADR(args)) && !isNull(CADR(args))))
if ((!isVector(CAR (args)) && !isNull(CAR (args))) ||
(!isVector(CADR(args)) && !isNull(CADR(args))))
error(_("'match' requires vector arguments"));

int nomatch = asInteger(CADDR(args));
Expand Down Expand Up @@ -1526,35 +1524,32 @@ attribute_hidden SEXP do_match(SEXP call, SEXP op, SEXP args, SEXP env)
* Empty strings are unmatched BDR 2000/2/16
*/

// .Internal(pmatch(x, table, nomatch, duplicates.ok))
attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
{
SEXP ans, input, target;
int mtch, n_target, mtch_count, dups_ok, no_match;
size_t temp;
int *used = NULL, *ians;
const char **in, **tar;
Rboolean no_dups;
Rboolean useBytes = FALSE, useUTF8 = FALSE;

checkArity(op, args);
input = CAR(args);
SEXP input = CAR(args), // = x in R
target = CADR(args), // = table "
ans;
R_xlen_t n_input = XLENGTH(input);
target = CADR(args);
n_target = LENGTH(target); // not allowed to be long
no_match = asInteger(CADDR(args));
dups_ok = asLogical(CADDDR(args));

int n_target = LENGTH(target), // not allowed to be long
no_match = asInteger(CADDR(args)),
dups_ok = asLogical(CADDDR(args));
if (dups_ok == NA_LOGICAL)
error(_("invalid '%s' argument"), "duplicates.ok");
no_dups = !dups_ok;
Rboolean no_dups = !dups_ok;

if (!isString(input) || !isString(target))
error(_("argument is not of mode character"));

int *used = NULL;
if(no_dups) {
used = (int *) R_alloc((size_t) n_target, sizeof(int));
for (int j = 0; j < n_target; j++) used[j] = 0;
}

Rboolean useBytes = FALSE, useUTF8 = FALSE;
for(R_xlen_t i = 0; i < n_input; i++) {
if(IS_BYTES(STRING_ELT(input, i))) {
useBytes = TRUE;
Expand All @@ -1576,10 +1571,11 @@ attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
}
}

in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
const char **in, **tar;
in = (const char **) R_alloc((size_t) n_input, sizeof(char *));
tar = (const char **) R_alloc((size_t) n_target, sizeof(char *));
PROTECT(ans = allocVector(INTSXP, n_input));
ians = INTEGER0(ans);
int *ians = INTEGER0(ans);
if(useBytes) {
for(R_xlen_t i = 0; i < n_input; i++) {
in[i] = CHAR(STRING_ELT(input, i));
Expand Down Expand Up @@ -1643,11 +1639,13 @@ attribute_hidden SEXP do_pmatch(SEXP call, SEXP op, SEXP args, SEXP env)
const char *ss;
if (ians[i]) continue;
ss = in[i];
temp = strlen(ss);
size_t temp = strlen(ss);
if (temp == 0) continue;
mtch = 0;
mtch_count = 0;
int mtch = 0,
mtch_count = 0;
for (int j = 0; j < n_target; j++) {
if (!(((size_t)i * n_target + j) & 0x1fff))
R_CheckUserInterrupt();
if (no_dups && used[j]) continue;
if (strncmp(ss, tar[j], temp) == 0) {
mtch = j + 1;
Expand Down

0 comments on commit 2c7a646

Please sign in to comment.