From 2d17b41e0f9cb599069e3238d9e46b10a4062358 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Thu, 28 Dec 2023 01:06:20 +0100 Subject: [PATCH 1/3] rm trigger from expr string, only eval last expr --- R/session/vsc.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/R/session/vsc.R b/R/session/vsc.R index 5ab46139..a5fbf318 100644 --- a/R/session/vsc.R +++ b/R/session/vsc.R @@ -79,9 +79,19 @@ if (use_webserver) { }, complete = function(expr, trigger, ...) { + + # remove trigger + any rhs from expr_string to evaluate + expr_string <- expr + pattern <- paste0("^(.*\\", trigger, ")") + finding <- regexpr(pattern, expr_string) + if (finding > 0) { + expr_string <- substr(expr, 1L, attr(finding, "match.length") - 1L) + } + obj <- tryCatch({ - expr <- parse(text = expr, keep.source = FALSE)[[1]] - eval(expr, .GlobalEnv) + expr <- parse(text = expr_string, keep.source = FALSE) + last_expr <- tail(expr, 1L) # only autocomplete the last expression + eval(last_expr, .GlobalEnv) }, error = function(e) NULL) if (is.null(obj)) { @@ -105,6 +115,8 @@ if (use_webserver) { str = try_capture_str(item) ) }) + + print(result) return(result) } From 7861dd4a0762c6bd2e6c69c3540929ab4cc49ec8 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sun, 31 Dec 2023 00:03:13 +0100 Subject: [PATCH 2/3] use ast to isolate left side --- R/session/vsc.R | 149 ++++++++++++++++++++++++++++++++++++++++----- src/completions.ts | 13 +++- 2 files changed, 146 insertions(+), 16 deletions(-) diff --git a/R/session/vsc.R b/R/session/vsc.R index a5fbf318..24fcf62f 100644 --- a/R/session/vsc.R +++ b/R/session/vsc.R @@ -16,6 +16,124 @@ logger <- if (getOption("vsc.debug", FALSE)) { function(...) invisible() } +# use R parse tree to deduce, what code is needed to be evaluated +isolate_lhs_expression = function( + string, # all or some R code until trigger + trigger, # the trigger $ or @ for now + tag = ".vscode_r_internal_tag", # used to recover necessary parts of parse tree + verbose = TRUE + ) { + tagged_string = add_trigger_vscode_tag(string, trigger = trigger, tag = tag) + valid_expr = parse_local_valid_exprs(tagged_string, ignore_first = TRUE, verbose = verbose) + prune_ast_by_vscode_tag(valid_expr, trigger = trigger, tag = tag, verbose = verbose) +} + + +# assumes string is some R code ending where trigger operator was to be auto-completed +# add back the trigger-operator and a tag to search for in abstract sytanx tree +add_trigger_vscode_tag = function(string, trigger, tag = ".vscode_r_internal_tag") { + if (trigger %in% c("$", "@")) { + return(paste0(string, trigger, tag)) + } + stop("internal error: This trigger is not yet supported: ", trigger) +} + +# find tagged call-node in the expr ast, keep the lhs and drop the rest +prune_ast_by_vscode_tag = function(expr, trigger, tag = ".vscode_r_internal_tag", verbose = FALSE) { + + trigger = as.symbol(trigger) + tag = as.symbol(tag) + tagged_trigger_found = NULL + + recursive_search_tag = function(node) { + + if(!is.null(tagged_trigger_found)) { + if (verbose) cat("stop /n") + return() + } + if(verbose) cat("inspecting: ", as.character(node)) + if(is.atomic(node) || is.symbol(node)) { + # this is a leaf node, nothing to see here + if(verbose) cat("/n") + return() + } + if(is.call(node)) { + if( + length(node) == 3L && # $, @ are binary operators, (caller, lhs, rhs) + identical( node[[1L]], trigger) && # caller symbol is trigger e.g. $ + identical(node[[3L]], tag) # rhs is the tag + ) { + tagged_trigger_found <<- node[[2L]] # store found lhs + if (verbose) cat("found it!!!") + return() + } + node = rev(node) # reverse call to deep-search rhs first, where the trigger is + } + if (verbose) cat("\n") + lapply(node, recursive_search_tag) + } + recursive_search_tag(expr) + tagged_trigger_found +} + + +# this function will incrementally parse an input_string to find the smallest locally +# syntactically valid expression context. As code completion may happen in the middle +# of function argument or some incomplete code, the code may not be syntactically valid. +# It seems rstudio method to solve this is to fill missing brackets/parentheses/braces until +# a syntactic valid code is achieved with some C++ code. +# To avoid pushing C++ code, but still do this fast, and also achieve a wider applicable parsing, +# this approach was chosen instead. In rstudio, all code above in script must be syntactically valid. +# Here only the local snippet (single / multiline) must be valid. +# ignore_first = TRUE ignore first valid expression, which trivially could be an inserted tag +parse_local_valid_exprs = function(input_string, ignore_first = TRUE, verbose = FALSE) { + + # find full keywords/variables to iterate over (faster than single chars) + var_tokens <- gregexpr("([a-zA-Z0-9._])+", input_string, perl = TRUE) + + # show match data + if(verbose) { + print(regmatches(input_string, var_tokens)) + } + + # iterate backwards/right-to-left from smallest possible expression to last valid expr + # this is useful to stop at unfinished parenthesis/brackets/braces ... + expr = NULL + for(i in rev(var_tokens[[1]])) { + + # try parse larger and larger code segments + candidate_str = substr(input_string, i, nchar(input_string)) + candidate_expr = tryCatch( + parse(text = candidate_str, keep.source = FALSE), + error = function(e) NULL + ) + + # invalid parsed syntax, because code segment span out of expression context + if(is.null(candidate_expr) && !is.null(expr)) { + if(verbose) cat("\n",candidate_str, " >>>> invalid expr, use any last valid expr") + break + } + + # parsed multiple expressions, just need the latter one + if(length(candidate_expr)>1L) { + if(verbose) cat(candidate_str, " >>>> two vaild expr found, use the latter valid") + candidate_expr = tail(candidate_expr, 1) + } + + if(ignore_first) { + if(verbose) cat("\n",candidate_str, " >>>> ignore first valid expr") + ignore_first = FALSE + next + } + if(verbose) cat("\n",candidate_str, " >>>> valid expr") + expr = candidate_expr # store latest valid expr + } + if(verbose) cat("\n\n") + expr +} + + + load_settings <- function() { if (!file.exists(settings_file)) { return(FALSE) @@ -66,6 +184,7 @@ if (is.null(getOption("help_type"))) { options(help_type = "html") } + use_webserver <- isTRUE(getOption("vsc.use_webserver", FALSE)) if (use_webserver) { if (requireNamespace("httpuv", quietly = TRUE)) { @@ -79,20 +198,21 @@ if (use_webserver) { }, complete = function(expr, trigger, ...) { - - # remove trigger + any rhs from expr_string to evaluate - expr_string <- expr - pattern <- paste0("^(.*\\", trigger, ")") - finding <- regexpr(pattern, expr_string) - if (finding > 0) { - expr_string <- substr(expr, 1L, attr(finding, "match.length") - 1L) - } - - obj <- tryCatch({ - expr <- parse(text = expr_string, keep.source = FALSE) - last_expr <- tail(expr, 1L) # only autocomplete the last expression - eval(last_expr, .GlobalEnv) - }, error = function(e) NULL) + #browser() + expr_string <- expr # expr is actually a string + + # parse and isolate from ast lhs of trigger + expr <- isolate_lhs_expression(expr_string, trigger, verbose = FALSE) + + obj <- (function(){tryCatch({ + eval(expr, .GlobalEnv) + }, error = function(e) { + if (TRUE) { + # show completion error as msg in terminal for debugging + message(as.character(e)) + } + NULL + })})() if (is.null(obj)) { return(NULL) @@ -116,7 +236,6 @@ if (use_webserver) { ) }) - print(result) return(result) } diff --git a/src/completions.ts b/src/completions.ts index 17546f42..b1145cfe 100644 --- a/src/completions.ts +++ b/src/completions.ts @@ -166,10 +166,21 @@ export class LiveCompletionItemProvider implements vscode.CompletionItemProvider }); } else if(trigger === '$' || trigger === '@') { const symbolPosition = new vscode.Position(position.line, position.character - 1); + console.log("New completion \n sP",symbolPosition, position.line, position.character); if (session.server) { + const re = /([a-zA-Z0-9._$@ ])+(? Date: Wed, 3 Jan 2024 14:02:59 +0100 Subject: [PATCH 3/3] forgot finish_expression --- R/session/vsc.R | 285 ++++++++++++++++++++++++++++++--------------- src/completions.ts | 16 +-- 2 files changed, 195 insertions(+), 106 deletions(-) diff --git a/R/session/vsc.R b/R/session/vsc.R index 24fcf62f..38fbff96 100644 --- a/R/session/vsc.R +++ b/R/session/vsc.R @@ -16,124 +16,224 @@ logger <- if (getOption("vsc.debug", FALSE)) { function(...) invisible() } -# use R parse tree to deduce, what code is needed to be evaluated -isolate_lhs_expression = function( +# use R parser to deduce, what code is needed to be evaluated +isolate_lhs_expression <- function( string, # all or some R code until trigger trigger, # the trigger $ or @ for now tag = ".vscode_r_internal_tag", # used to recover necessary parts of parse tree - verbose = TRUE - ) { - tagged_string = add_trigger_vscode_tag(string, trigger = trigger, tag = tag) - valid_expr = parse_local_valid_exprs(tagged_string, ignore_first = TRUE, verbose = verbose) - prune_ast_by_vscode_tag(valid_expr, trigger = trigger, tag = tag, verbose = verbose) + verbose = TRUE) { + + # 1 add_trigger_vscode_tag to locate later in abstract syntax tree + tagged_string <- add_trigger_vscode_tag(string, trigger = trigger, tag = tag) + + # 2 add any missing trailing parenthesis to code, to make a full valid parse + finished_string <- finish_expression(tagged_string) + + #3a full parse code + expr <- tryCatch(parse(text = finished_string), error = function(err) NULL) + + #3b full parsing may fail if user syntax errors earlier in script. + # Fall back to minimal local parsing + if (is.null(expr)) { + if (verbose) print("fallback to local parsing") + expr <- parse_local_valid_exprs(tagged_string, ignore_first = TRUE, verbose = verbose) + } + + # only keep lhs of trigger operator in syntax tree, drop the rest. + prune_ast_by_vscode_tag(expr, trigger = trigger, tag = tag, verbose = verbose) } # assumes string is some R code ending where trigger operator was to be auto-completed # add back the trigger-operator and a tag to search for in abstract sytanx tree -add_trigger_vscode_tag = function(string, trigger, tag = ".vscode_r_internal_tag") { - if (trigger %in% c("$", "@")) { - return(paste0(string, trigger, tag)) - } - stop("internal error: This trigger is not yet supported: ", trigger) +add_trigger_vscode_tag <- function(string, trigger, tag = ".vscode_r_internal_tag") { + if (trigger %in% c("$", "@")) { + return(paste0(string, trigger, tag)) + } + stop("internal error: This trigger is not yet supported: ", trigger) } -# find tagged call-node in the expr ast, keep the lhs and drop the rest -prune_ast_by_vscode_tag = function(expr, trigger, tag = ".vscode_r_internal_tag", verbose = FALSE) { +# This is a baseR no C++, but fairly fast replacement for +# https://github.com/rstudio/rstudio/blob/6f7af1eab02568be3de327190a96fc5dba2dc247/src/cpp/session/modules/SessionRCompletions.cpp#L92 +finish_expression <- function(s) { + # try parse code + # browser() + sf <- srcfile("123.txt") + has_parse_error <- tryCatch( + { + parse(text = s, srcfile = sf, keep.source = FALSE) + FALSE + }, + error = function(err) TRUE + ) - trigger = as.symbol(trigger) - tag = as.symbol(tag) - tagged_trigger_found = NULL + # no parsing error, code does not need parentheses modification + if (!has_parse_error) { + return(s) + } - recursive_search_tag = function(node) { + # extract tokens from partial parsing result + parse_data_df <- getParseData(sf) + token_stream <- parse_data_df$token - if(!is.null(tagged_trigger_found)) { - if (verbose) cat("stop /n") - return() - } - if(verbose) cat("inspecting: ", as.character(node)) - if(is.atomic(node) || is.symbol(node)) { - # this is a leaf node, nothing to see here - if(verbose) cat("/n") - return() - } - if(is.call(node)) { - if( - length(node) == 3L && # $, @ are binary operators, (caller, lhs, rhs) - identical( node[[1L]], trigger) && # caller symbol is trigger e.g. $ - identical(node[[3L]], tag) # rhs is the tag - ) { - tagged_trigger_found <<- node[[2L]] # store found lhs - if (verbose) cat("found it!!!") - return() - } - node = rev(node) # reverse call to deep-search rhs first, where the trigger is - } - if (verbose) cat("\n") - lapply(node, recursive_search_tag) - } - recursive_search_tag(expr) - tagged_trigger_found -} + # find parenthesis imbalances, temporary and/or unsettled + l <- list( + ")" = cumsum((token_stream == "'('") - (token_stream == "')'")), + "}" = cumsum((token_stream == "'{'") - (token_stream == "'}'")), + "]" = cumsum((token_stream == "'['") - (token_stream == "']'") + (token_stream == "LBB") * 2L) + ) + # search backward in token stream for where imbalance started + is_imbalanced <- rev(Reduce("|", lapply(l, function(x) cumprod(rev(x) != 0L)))) -# this function will incrementally parse an input_string to find the smallest locally -# syntactically valid expression context. As code completion may happen in the middle -# of function argument or some incomplete code, the code may not be syntactically valid. -# It seems rstudio method to solve this is to fill missing brackets/parentheses/braces until -# a syntactic valid code is achieved with some C++ code. -# To avoid pushing C++ code, but still do this fast, and also achieve a wider applicable parsing, -# this approach was chosen instead. In rstudio, all code above in script must be syntactically valid. -# Here only the local snippet (single / multiline) must be valid. -# ignore_first = TRUE ignore first valid expression, which trivially could be an inserted tag -parse_local_valid_exprs = function(input_string, ignore_first = TRUE, verbose = FALSE) { - - # find full keywords/variables to iterate over (faster than single chars) - var_tokens <- gregexpr("([a-zA-Z0-9._])+", input_string, perl = TRUE) - - # show match data - if(verbose) { - print(regmatches(input_string, var_tokens)) - } - - # iterate backwards/right-to-left from smallest possible expression to last valid expr - # this is useful to stop at unfinished parenthesis/brackets/braces ... - expr = NULL - for(i in rev(var_tokens[[1]])) { - - # try parse larger and larger code segments - candidate_str = substr(input_string, i, nchar(input_string)) - candidate_expr = tryCatch( - parse(text = candidate_str, keep.source = FALSE), - error = function(e) NULL + # check if missing parenthesis is the issue + if ( + !any(is_imbalanced) || # other syntactic error, cannot be fixed + any(sapply(l, function(x) x < 0L))) { # negative parenthesis balance, cannot be fixed + return(s) + } + + # keep only analysis code part which is imbalanced + token_stream <- token_stream[is_imbalanced] + l <- lapply(l, function(x) x[is_imbalanced]) + + # redefine LBB token_stream as two [ [ for simplifying latter analysis + token_stream <- unlist(lapply(token_stream, function(x) if (x == "LBB") c("'['", "'['") else x)) + + # make pseudo mutable push-pop vectors, with reasonable performance + # TODO could still be a tiny bottleneck consider replace perhaps with some mutable object, env? + v_1 <- rep(NA_integer_, max(l$`)`)) + v_2 <- rep(NA_integer_, max(l$`}`)) + v_3 <- rep(NA_integer_, max(l$`]`)) + v_size1 <- 0L + v_size2 <- 0L + v_size3 <- 0L + + + # iterate over all token_stream and find indices for unmatch parethesis + not_used <- mapply( + token_stream, + seq_along(token_stream), + SIMPLIFY = FALSE, + FUN = function(token, i) { + switch(token, + # styler: off + # push opening parenthesis token idx on vector + "'('" = {v_size1 <<- v_size1 + 1L; v_1[v_size1] <<- i}, + "'{'" = {v_size2 <<- v_size2 + 1L; v_2[v_size2] <<- i}, + "'['" = {v_size3 <<- v_size3 + 1L; v_3[v_size3] <<- i}, + + # pop opening parenthesis token idx from vector + "')'" = {v_1[v_size1] <<- NA_integer_; v_size1 <<- v_size1 - 1L}, + "'}'" = {v_2[v_size2] <<- NA_integer_; v_size2 <<- v_size2 - 1L}, + "']'" = {v_3[v_size3] <<- NA_integer_; v_size3 <<- v_size3 - 1L}, + # styler: on + + # another token do nothing + NULL + ) + # styler: on + NULL + } ) - # invalid parsed syntax, because code segment span out of expression context - if(is.null(candidate_expr) && !is.null(expr)) { - if(verbose) cat("\n",candidate_str, " >>>> invalid expr, use any last valid expr") - break + # combine correction parentheses in the correct order + names(v_1) <- rep(")", length(v_1)) + names(v_2) <- rep("}", length(v_2)) + names(v_3) <- rep("]", length(v_3)) + corrections <- paste(names(sort(c(v_1, v_2, v_3), decreasing = TRUE)), collapse = "") + + # apply correction parentheses at the end of code + paste0(s, corrections) +} + +# instead parsing the entire script to determine the context, do a reverse parse until first valid context +parse_local_valid_exprs <- function(input_string, ignore_first = TRUE, verbose = FALSE) { + # find full keywords/variables to iterate over (faster than single chars) + var_tokens <- gregexpr("([a-zA-Z0-9._])+", input_string, perl = TRUE) + + # show match data + if (verbose) { + print(regmatches(input_string, var_tokens)) } - # parsed multiple expressions, just need the latter one - if(length(candidate_expr)>1L) { - if(verbose) cat(candidate_str, " >>>> two vaild expr found, use the latter valid") - candidate_expr = tail(candidate_expr, 1) + # iterate backwards/right-to-left from smallest possible expression to last valid expr + # this is useful to stop at unfinished parenthesis/brackets/braces ... + expr <- NULL + for (i in rev(var_tokens[[1]])) { + # try parse larger and larger code segments + candidate_str <- substr(input_string, i, nchar(input_string)) + candidate_expr <- tryCatch( + parse(text = candidate_str, keep.source = FALSE), + error = function(e) NULL + ) + + # invalid parsed syntax, because code segment span out of expression context + if (is.null(candidate_expr) && !is.null(expr)) { + if (verbose) cat("\n", candidate_str, " >>>> invalid expr, use any last valid expr") + break + } + + # parsed multiple expressions, just need the latter one + if (length(candidate_expr) > 1L) { + if (verbose) cat(candidate_str, " >>>> two vaild expr found, use the latter valid") + candidate_expr <- tail(candidate_expr, 1) + } + + if (ignore_first) { + if(verbose) cat("\n", candidate_str, " >>>> ignore first valid expr") + ignore_first <- FALSE + next + } + if (verbose) cat("\n", candidate_str, " >>>> valid expr") + expr <- candidate_expr # store latest valid expr } + if (verbose) cat("\n\n") + expr +} - if(ignore_first) { - if(verbose) cat("\n",candidate_str, " >>>> ignore first valid expr") - ignore_first = FALSE - next +# find tagged call-node in the expr ast, keep the lhs and drop the rest +prune_ast_by_vscode_tag <- function(expr, trigger, tag = ".vscode_r_internal_tag", verbose = FALSE) { + trigger <- as.symbol(trigger) + tag <- as.symbol(tag) + tagged_trigger_found <- NULL + + recursive_search_tag <- function(node) { + if (!is.null(tagged_trigger_found)) { + if (verbose) cat("stop /n") + return() + } + if (verbose) cat("inspecting: ", as.character(node)) + if (is.atomic(node) || is.symbol(node)) { + # this is a leaf node, nothing to see here + if (verbose) cat("/n") + return() + } + if (is.call(node)) { + if ( + length(node) == 3L && # $, @ are binary operators, (caller, lhs, rhs) + identical(node[[1L]], trigger) && # caller symbol is trigger e.g. $ + identical(node[[3L]], tag) # rhs is the tag + ) { + tagged_trigger_found <<- node[[2L]] # store found lhs + if (verbose) cat("found it!!!") + return() + } + node <- rev(node) # reverse call to deep-search rhs first, where the trigger is + } + if (verbose) cat("\n") + lapply(node, recursive_search_tag) } - if(verbose) cat("\n",candidate_str, " >>>> valid expr") - expr = candidate_expr # store latest valid expr - } - if(verbose) cat("\n\n") - expr + recursive_search_tag(expr) + tagged_trigger_found } + + + + load_settings <- function() { if (!file.exists(settings_file)) { return(FALSE) @@ -202,6 +302,7 @@ if (use_webserver) { expr_string <- expr # expr is actually a string # parse and isolate from ast lhs of trigger + # browser() expr <- isolate_lhs_expression(expr_string, trigger, verbose = FALSE) obj <- (function(){tryCatch({ diff --git a/src/completions.ts b/src/completions.ts index b1145cfe..0c8dc07a 100644 --- a/src/completions.ts +++ b/src/completions.ts @@ -166,27 +166,15 @@ export class LiveCompletionItemProvider implements vscode.CompletionItemProvider }); } else if(trigger === '$' || trigger === '@') { const symbolPosition = new vscode.Position(position.line, position.character - 1); - console.log("New completion \n sP",symbolPosition, position.line, position.character); if (session.server) { - - const re = /([a-zA-Z0-9._$@ ])+(?