diff --git a/R/sql_server.R b/R/sql_server.R index ed8c1ec..5ba0594 100644 --- a/R/sql_server.R +++ b/R/sql_server.R @@ -1,4 +1,3 @@ - #' @title R6 Class representing a SQL Server #' #' @description @@ -28,7 +27,7 @@ #' #' @export -sql_server <- R6Class("sql_server", public = list( +sql_server <- R6::R6Class("sql_server", public = list( #' @field driver driver to be used, e.g. "SQL Server". Quoted string, @@ -80,7 +79,6 @@ sql_server <- R6Class("sql_server", public = list( port = NULL, uid = NULL, pwd = NULL) { - # set up params self$driver <- driver self$server <- server @@ -89,15 +87,16 @@ sql_server <- R6Class("sql_server", public = list( self$uid <- uid self$pwd <- pwd self$conn - self$server_type <- case_when(tolower(self$driver) == "sql server" ~ "mssql", - grepl("mysql", tolower(self$driver)) ~ "mysql", - TRUE ~ "other") + self$server_type <- dplyr::case_when( + tolower(self$driver) == "sql server" ~ "mssql", + grepl("mysql", tolower(self$driver)) ~ "mysql", + TRUE ~ "other" + ) if (self$server_type == "other") { stop("class only works with MS SQL and MySQL servers, check driver input") } - }, #' @description @@ -110,49 +109,47 @@ sql_server <- R6Class("sql_server", public = list( #' database used to create class. connect = function(database = self$database) { - # if self$conn is null or invalid, connect/re-connect conn_null <- is.null(self$conn) if (conn_null == "TRUE") { conn_valid <- FALSE } else { - conn_valid <- DBI::dbIsValid(self$conn) + conn_valid <- DBI::dbIsValid(self$conn) } - + # if database is different to self$database, re-connect if (database != self$database) { conn_valid <- FALSE } if (isFALSE(conn_valid)) { - # set connection by server type if (self$server_type == "mssql") { - # set connection string conn_string <- paste("driver={", self$driver, "};", - "server=", self$server, ";", - "database=", database, ";", - "Encrypt=true;", - "trusted_connection=true", sep = "") + "server=", self$server, ";", + "database=", database, ";", + "Encrypt=true;", + "trusted_connection=true", + sep = "" + ) # set connection self$conn <- odbc::dbConnect(odbc::odbc(), - .connection_string = conn_string, - timeout = 60) - - + .connection_string = conn_string, + timeout = 60 + ) } else if (self$server_type == "mysql") { - # set connection with credentials self$conn <- DBI::dbConnect(odbc::odbc(), - Driver = self$driver, - Server = self$server, - UID = self$uid, - PWD = self$pwd, - Port = self$port, - database = database) + Driver = self$driver, + Server = self$server, + UID = self$uid, + PWD = self$pwd, + Port = self$port, + database = database + ) } } }, @@ -163,10 +160,9 @@ sql_server <- R6Class("sql_server", public = list( #' @param close logical, TRUE or FALSE whether to close the connection. close_connection = function(close = TRUE) { - if (isTRUE(close)) { if (DBI::dbIsValid(self$conn)) { - dbDisconnect(self$conn) + DBI::dbDisconnect(self$conn) } } }, @@ -182,12 +178,10 @@ sql_server <- R6Class("sql_server", public = list( #' such as using temporary tables. Logical, default TRUE. get = function(query, close_conn = TRUE) { - self$connect() output <- DBI::dbGetQuery(self$conn, query) self$close_connection(close_conn) return(output) - }, #' @description @@ -203,12 +197,10 @@ sql_server <- R6Class("sql_server", public = list( #' such as using temporary tables. Logical, default TRUE. run = function(query, close_conn = TRUE) { - self$connect() - output <- DBI::dbSendStatement(self$conn, query, immediate = TRUE) + output <- DBI::dbSendStatement(self$conn, query, immediate = TRUE) DBI::dbClearResult(output) self$close_connection(close_conn) - }, @@ -225,14 +217,13 @@ sql_server <- R6Class("sql_server", public = list( #' such as using temporary tables. Logical, default TRUE. table_exists = function(table_name, close_conn = TRUE) { - # if database is tempdb, check the table name if (self$database == "tempdb") { table_name <- self$temp_table_name(table_name) } self$connect() - if(DBI::dbExistsTable(self$conn, table_name)) { + if (DBI::dbExistsTable(self$conn, table_name)) { self$close_connection(close_conn) return("yes") } else { @@ -274,21 +265,17 @@ sql_server <- R6Class("sql_server", public = list( append_data = FALSE, batch_upload = NULL, close_conn = TRUE) { - # If variable_types is not NULL, make sure same length as number cols in data if (!is.null(variable_types)) { - # If not as many as there are columns stop, otherwise name them if (length(variable_types) != length(colnames(data))) { - - stop(glue("Number of Variable Types specified needs to be the same as ", - "the number of columns in the data")) - + stop(glue::glue( + "Number of Variable Types specified needs to be the same as ", + "the number of columns in the data" + )) } else { - # Make variable_types a "named character vector" - variable_types <- setNames(variable_types, c(colnames(data))) - + variable_types <- stats::setNames(variable_types, c(colnames(data))) } } @@ -300,29 +287,31 @@ sql_server <- R6Class("sql_server", public = list( } # Check if table exists and append_data = TRUE - if (self$table_exists(table_name, close_conn = close_conn) == "yes" - & append_data == FALSE) { - - stop(glue("Table {table_name} already exists. To add data to this table ", - "set append_data to TRUE")) - - } else if (self$table_exists(table_name, close_conn = close_conn) == "no" - & append_data == TRUE) { - + if (self$table_exists(table_name, close_conn = close_conn) == "yes" & + append_data == FALSE) { + stop(glue::glue( + "Table {table_name} already exists. To add data to this table ", + "set append_data to TRUE" + )) + } else if (self$table_exists(table_name, close_conn = close_conn) == "no" & + append_data == TRUE) { append_data <- FALSE - warning(glue("Append set to TRUE but table doesn't exist. Still run ", - "but check outputs")) - + warning(glue::glue( + "Append set to TRUE but table doesn't exist. Still run ", + "but check outputs" + )) } # get n rows in table to start with if (self$table_exists(table_name, close_conn = close_conn) == "no") { start_n_rows <- 0 } else { - start_n_rows <- self$get(query = glue("SELECT count(*) AS n + start_n_rows <- self$get( + query = glue::glue("SELECT count(*) AS n FROM {table_name}"), - close_conn = close_conn) %>% - pull(n) + close_conn = close_conn + ) %>% + dplyr::pull(n) } # set connection @@ -337,42 +326,44 @@ sql_server <- R6Class("sql_server", public = list( # set table name formatting depending on server type if (self$server_type == "mssql") { - tbl_name <- DBI::Id(schema = schema_name, - table = self$temp_table_name(table_name)) #check tt + tbl_name <- DBI::Id( + schema = schema_name, + table = self$temp_table_name(table_name) + ) # check tt } else if (self$server_type == "mysql") { tbl_name <- table_name } # if not doing in batch, just upload if (is.null(batch_upload)) { - # Upload to SQL server DBI::dbWriteTable(self$conn, - name = tbl_name, - value = data, - append = append_data, - field.types = variable_types) + name = tbl_name, + value = data, + append = append_data, + field.types = variable_types + ) # otherwise run as batches } else { - # group data data <- data %>% - mutate(group = floor(row_number()/batch_upload)) + dplyr::mutate(group = floor(dplyr::row_number() / batch_upload)) # set up progress bar progress <- 0 - pb <- txtProgressBar(min = progress, - max = max(unique(data$group)), - initial = 0, - style = 3) + pb <- utils::txtProgressBar( + min = progress, + max = max(unique(data$group)), + initial = 0, + style = 3 + ) # loop through groups for (i in unique(data$group)) { - data_to_upload <- data %>% - filter(group == i) %>% - select(-group) + stats::filter(group == i) %>% + dplyr::select(-group) # unless it's the first loop, set append to true and variable types to NULL if (i != min(unique(data$group))) { @@ -382,14 +373,15 @@ sql_server <- R6Class("sql_server", public = list( # Upload to SQL server DBI::dbWriteTable(self$conn, - name = tbl_name, - value = data_to_upload, - append = append_data, - field.types = variable_types) + name = tbl_name, + value = data_to_upload, + append = append_data, + field.types = variable_types + ) # update progress bar progress <- progress + 1 - setTxtProgressBar(pb, progress) + utils::setTxtProgressBar(pb, progress) } # close the progress bar @@ -397,10 +389,11 @@ sql_server <- R6Class("sql_server", public = list( } # check number of rows in table - table_n_rows <- self$get(glue("SELECT count(*) AS n + table_n_rows <- self$get(glue::glue("SELECT count(*) AS n FROM {table_name}"), - close_conn = close_conn) %>% - pull(n) + close_conn = close_conn + ) %>% + dplyr::pull(n) # Close ODBC connection self$close_connection(close_conn) @@ -409,9 +402,11 @@ sql_server <- R6Class("sql_server", public = list( if (start_n_rows + nrow(data) == table_n_rows) { return("success") } else { - return(glue("N rows don't match, number in table before upload ", - "({start_n_rows}) plus number rows in data ({nrow(data)}) ", - "not equal to n rows now in table in server ({table_n_rows})")) + return(glue::glue( + "N rows don't match, number in table before upload ", + "({start_n_rows}) plus number rows in data ({nrow(data)}) ", + "not equal to n rows now in table in server ({table_n_rows})" + )) } }, @@ -426,17 +421,15 @@ sql_server <- R6Class("sql_server", public = list( #' such as using temporary tables. Logical, default TRUE. drop_table = function(table_name, close_conn = TRUE) { - if (self$table_exists(table_name, - close_conn = close_conn) == "yes") { - + close_conn = close_conn + ) == "yes") { self$connect() DBI::dbRemoveTable(self$conn, table_name) self$close_connection(close_conn) - - } else { - message(glue("table {table_name} doesn't exist")) - } + } else { + message(glue::glue("table {table_name} doesn't exist")) + } }, @@ -446,20 +439,14 @@ sql_server <- R6Class("sql_server", public = list( #' `r lifecycle::badge("stable")` databases = function() { - if (self$server_type == "mssql") { - query <- "SELECT name FROM sys. databases" - } else if (self$server_type == "mysql") { - query <- "show databases" - } # return return(self$get(query)) - }, @@ -480,33 +467,33 @@ sql_server <- R6Class("sql_server", public = list( #' such as using temporary tables. Logical, default TRUE. db_tables = function(database = self$database, close_conn = TRUE) { - # connect to database set for meta data self$connect(database = database) if (self$server_type == "mssql") { - # data tables <- self$get("SELECT table_catalog as [database] , table_schema as [schema] , table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'", - close_conn = close_conn) %>% - arrange(table_name) - + close_conn = close_conn + ) %>% + dplyr::arrange(table_name) } else if (self$server_type == "mysql") { - # data - tables <- self$get(glue("SHOW FULL TABLES IN {database} + tables <- self$get(glue::glue("SHOW FULL TABLES IN {database} WHERE TABLE_TYPE LIKE 'BASE TABLE'"), - close_conn = close_conn) %>% - mutate(database = database, - schema = NA_character_) %>% - select(-Table_type) + close_conn = close_conn + ) %>% + dplyr::mutate( + database = database, + schema = NA_character_ + ) %>% + dplyr::select(-Table_type) colnames(tables)[1] <- "table_name" - arrange(tables, table_name) + dplyr::arrange(tables, table_name) } return(tables) }, @@ -526,31 +513,31 @@ sql_server <- R6Class("sql_server", public = list( db_views = function(database = self$database, close_conn = TRUE) { - self$connect(database = database) if (self$server_type == "mssql") { - # data views <- self$get("SELECT table_catalog as [database] , table_schema as [schema] , table_name as [view_name] FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'VIEW'", - close_conn = close_conn) %>% - arrange(view_name) - + close_conn = close_conn + ) %>% + dplyr::arrange(view_name) } else if (self$server_type == "mysql") { - # data - views <- self$get(glue("SHOW FULL TABLES IN {database} + views <- self$get(glue::glue("SHOW FULL TABLES IN {database} WHERE TABLE_TYPE LIKE 'VIEW'"), - close_conn = close_conn) %>% - mutate(database = database, - schema = NA_character_) %>% - select(-Table_type) + close_conn = close_conn + ) %>% + dplyr::mutate( + database = database, + schema = NA_character_ + ) %>% + dplyr::select(-Table_type) colnames(views)[1] <- "view_name" - views <- arrange(views, view_name) + views <- dplyr::arrange(views, view_name) } # return @@ -565,20 +552,18 @@ sql_server <- R6Class("sql_server", public = list( #' @param x name of the table. Quoted string, no default. temp_table_name = function(x) { - if (substr(x, 1, 1) == "#") { - - temp_table <- self$db_tables(database = "tempdb", - close_conn = FALSE) %>% - filter(grepl(paste0(x, "_"), table_name) | - x == table_name) %>% - pull(table_name) + temp_table <- self$db_tables( + database = "tempdb", + close_conn = FALSE + ) %>% + stats::filter(grepl(paste0(x, "_"), table_name) | + x == table_name) %>% + dplyr::pull(table_name) if (length(temp_table) == 1) { return(temp_table) - } else { - # if not found any matches, assume doesn't exist or is not a temp table # and therefore return original name return(x) @@ -609,7 +594,6 @@ sql_server <- R6Class("sql_server", public = list( order_object_fields = function(database = self$database, object, close_conn = TRUE) { - self$connect(database = database) # if object is temp table, get the full name @@ -618,27 +602,27 @@ sql_server <- R6Class("sql_server", public = list( } if (self$server_type == "mssql") { - # get fields & data types - data_types <- self$get(glue("SELECT column_name + data_types <- self$get(glue::glue("SELECT column_name , data_type , character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{object}'"), - close_conn = close_conn) %>% - mutate(row_id = row_number(), - order = case_when(character_maximum_length == -1L ~ 99999L, - TRUE ~ row_id)) %>% - arrange(order) + close_conn = close_conn + ) %>% + dplyr::mutate( + row_id = dplyr::row_number(), + order = dplyr::case_when( + character_maximum_length == -1L ~ 99999L, + TRUE ~ row_id + ) + ) %>% + dplyr::arrange(order) # return return(paste0(data_types$column_name, collapse = ", ")) - - } else if (self$server_type == "mysql") { - stop("function not required for My SQL databases") - } }, @@ -659,13 +643,14 @@ sql_server <- R6Class("sql_server", public = list( object_fields = function(database = self$database, objects = NULL, close_conn = TRUE) { - self$connect(database = database) # if not given any objects, use them all if (is.null(objects)) { - objects <- c(self$db_views(database = database, close_conn)$view_name, - self$db_tables(database = database, close_conn)$table_name) + objects <- c( + self$db_views(database = database, close_conn)$view_name, + self$db_tables(database = database, close_conn)$table_name + ) } # create list @@ -673,24 +658,26 @@ sql_server <- R6Class("sql_server", public = list( # get fields for each object for (obj in objects) { - obj_name <- self$temp_table_name(obj) - obj_field_query <- glue("SELECT column_name as col_name + obj_field_query <- glue::glue("SELECT column_name as col_name FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{obj_name}' AND TABLE_SCHEMA = '{database}'") # if MS SQL server, remove reference to TABLE_SCHEMA if (self$server_type %in% "mssql") { - obj_field_query <- gsub("AND TABLE_SCHEMA", - " -- AND TABLE_SCHEMA", - obj_field_query) + obj_field_query <- gsub( + "AND TABLE_SCHEMA", + " -- AND TABLE_SCHEMA", + obj_field_query + ) } field_list[[obj]] <- self$get(obj_field_query, - close_conn = close_conn) %>% - pull(col_name) + close_conn = close_conn + ) %>% + dplyr::pull(col_name) } # return @@ -746,7 +733,6 @@ sql_server <- R6Class("sql_server", public = list( date_filter = NULL, date_field = NULL, close_conn = TRUE) { - # set database in connection self$connect(database = database) @@ -758,9 +744,9 @@ sql_server <- R6Class("sql_server", public = list( # set data filter if (!is.null(date_filter) & !is.null(date_field)) { if (self$server_type == "mysql") { - date_filter <- glue("WHERE `{date_field}` >= '{date_filter}'") + date_filter <- glue::glue("WHERE `{date_field}` >= '{date_filter}'") } else { - date_filter <- glue("WHERE [{date_field}] >= '{date_filter}'") + date_filter <- glue::glue("WHERE [{date_field}] >= '{date_filter}'") } } else if (!is.null(date_filter) | !is.null(date_field)) { message("date_filter & date_field need setting for a date filter to be applied") @@ -774,73 +760,83 @@ sql_server <- R6Class("sql_server", public = list( top_n_rows <- "" } else if (!is.null(row_limit) & !is.null(date_field)) { if (self$server_type == "mysql") { - order_by <- glue("ORDER BY `{date_field}` DESC LIMIT {row_limit}") + order_by <- glue::glue("ORDER BY `{date_field}` DESC LIMIT {row_limit}") top_n_rows <- "" } else if (self$server_type == "mssql") { - order_by <- glue("ORDER BY [{date_field}] DESC") - top_n_rows <- glue("TOP {row_limit}") + order_by <- glue::glue("ORDER BY [{date_field}] DESC") + top_n_rows <- glue::glue("TOP {row_limit}") } } else if (!is.null(row_limit) & is.null(date_field)) { message("row_limit not applied when date_field is null") } # get list of all objects - db_objects <- rbind(self$db_views(database = database, close_conn) %>% - rename(obj_name = view_name) %>% - mutate(type = "view"), - self$db_tables(database = database, close_conn) %>% - rename(obj_name = table_name) %>% - mutate(type = "user_table")) + db_objects <- rbind( + self$db_views(database = database, close_conn) %>% + dplyr::rename(obj_name = view_name) %>% + dplyr::mutate(type = "view"), + self$db_tables(database = database, close_conn) %>% + dplyr::rename(obj_name = table_name) %>% + dplyr::mutate(type = "user_table") + ) # if object supplied, filter db_objects for them if (!is.null(objects)) { - # for each object, check if temp table and get full name if so objects <- sapply(objects, function(x) { if (substr(x, 1, 1) == "#") { return(self$temp_table_name(x)) } else { - return(x) - } - }) %>% + return(x) + } + }) %>% unname() - objects <- filter(db_objects, obj_name %in% objects) + objects <- stats::filter(db_objects, obj_name %in% objects) } else { objects <- db_objects } # add in fields & syntax for SQL (important if table names have spaces) objects <- objects %>% - mutate(obj_name_ = case_when(self$server_type == "mysql" ~ glue("`{obj_name}`"), - self$server_type == "mssql" ~ glue("[{obj_name}]")), - schema_ = case_when(!is.na(schema) ~ paste0(schema, "."), - TRUE ~ ""), - full_obj_name = paste0(database, ".", - schema_, - obj_name_), - field_id = row_number()) %>% - select(-schema_, -obj_name_) + dplyr::mutate( + obj_name_ = dplyr::case_when( + self$server_type == "mysql" ~ glue::glue("`{obj_name}`"), + self$server_type == "mssql" ~ glue::glue("[{obj_name}]") + ), + schema_ = dplyr::case_when( + !is.na(schema) ~ paste0(schema, "."), + TRUE ~ "" + ), + full_obj_name = paste0( + database, ".", + schema_, + obj_name_ + ), + field_id = dplyr::row_number() + ) %>% + dplyr::select(-schema_, -obj_name_) # create list field_list <- list() # for each object for (id in objects$field_id) { - - #id <- objects$field_id[1] - obj_ref <- filter(objects, field_id == id) + # id <- objects$field_id[1] + obj_ref <- stats::filter(objects, field_id == id) obj <- obj_ref$obj_name obj_full_name <- obj_ref$full_obj_name print(obj) # get fields - fields <- self$object_fields(database = database, - objects = obj, - close_conn = close_conn)[[1]] + fields <- self$object_fields( + database = database, + objects = obj, + close_conn = close_conn + )[[1]] # write query - obj_md_query <- glue("SELECT COLUMN_NAME as col_name + obj_md_query <- glue::glue("SELECT COLUMN_NAME as col_name , IS_NULLABLE as nullable , DATA_TYPE as data_type , NUMERIC_PRECISION as num_precision @@ -852,64 +848,69 @@ sql_server <- R6Class("sql_server", public = list( # if MS SQL server, remove reference to TABLE_SCHEMA if (self$server_type %in% "mssql") { - obj_md_query <- gsub("AND TABLE_SCHEMA", - " -- AND TABLE_SCHEMA", - obj_md_query) + obj_md_query <- gsub( + "AND TABLE_SCHEMA", + " -- AND TABLE_SCHEMA", + obj_md_query + ) } - - + + # set database in connection self$connect(database = database) # get basic meta from SQL obj_MD <- self$get(obj_md_query, - close_conn = close_conn) %>% - mutate(max_len = case_when(!is.na(max_len) ~ paste0("(", max_len, ")"), - TRUE ~ ""), - col_type = glue("{data_type}{max_len}"), - nullable = tolower(nullable)) %>% - select(col_name, data_type, col_type, everything(), -max_len) + close_conn = close_conn + ) %>% + dplyr::mutate( + max_len = dplyr::case_when( + !is.na(max_len) ~ paste0("(", max_len, ")"), + TRUE ~ "" + ), + col_type = glue::glue("{data_type}{max_len}"), + nullable = tolower(nullable) + ) %>% + dplyr::select(col_name, data_type, col_type, dplyr::everything(), -max_len) # get indexes if (self$server_type == "mssql") { - - indexes <- self$get(glue("select * + indexes <- self$get(glue::glue("select * from sys.indexes where object_id = (select top 1 object_id from sys.objects where [name] = '{obj}' AND [type_desc] = '{obj_ref$type}') AND [name] IS NOT NULL"), - close_conn = close_conn) %>% + close_conn = close_conn + ) %>% janitor::clean_names() %>% - select(name) %>% - rename(col_name = name) %>% - mutate(index = "yes") - + dplyr::select(name) %>% + dplyr::rename(col_name = name) %>% + dplyr::mutate(index = "yes") } else if (self$server_type == "mysql") { - - indexes <- self$get(glue("SHOW INDEX FROM {obj_full_name}"), - close_conn = close_conn) %>% + indexes <- self$get(glue::glue("SHOW INDEX FROM {obj_full_name}"), + close_conn = close_conn + ) %>% janitor::clean_names() %>% - select(column_name) %>% - rename(col_name = column_name) %>% - mutate(index = "yes") + dplyr::select(column_name) %>% + dplyr::rename(col_name = column_name) %>% + dplyr::mutate(index = "yes") } # add index details - obj_MD <- left_join(obj_MD, indexes, by = "col_name") + obj_MD <- dplyr::left_join(obj_MD, indexes, by = "col_name") # add further details if (details == "TRUE") { - # record further info on fields obj_details <- data.frame() # get first date time field if date field not provided if (is.null(date_field)) { date_field_null_calc <- obj_MD %>% - filter(data_type %in% c("date", "datetime")) %>% - pull(col_name) + stats::filter(data_type %in% c("date", "datetime")) %>% + dplyr::pull(col_name) if (length(date_field_null_calc) > 0) { date_field_null_calc <- date_field_null_calc[1] } else { @@ -920,52 +921,53 @@ sql_server <- R6Class("sql_server", public = list( } # create temp table from main dataset - temp_table <- glue("meta_temp_{gsub('[[:punct:] ]+','', now())}") + temp_table <- glue::glue("meta_temp_{gsub('[[:punct:] ]+','', now())}") if (self$server_type == "mysql") { + temp_table <- glue::glue("{database}.{temp_table}") - temp_table <- glue("{database}.{temp_table}") - - self$run(glue("CREATE TEMPORARY TABLE {temp_table} + self$run(glue::glue("CREATE TEMPORARY TABLE {temp_table} SELECT {top_n_rows} * FROM {obj_full_name} {date_filter} {order_by}"), - close_conn = FALSE) - + close_conn = FALSE + ) } else if (self$server_type == "mssql") { - - temp_table <- glue("#{temp_table}") + temp_table <- glue::glue("#{temp_table}") self$connect() DBI::dbExecute(self$conn, - glue("SELECT {top_n_rows} * + glue::glue("SELECT {top_n_rows} * INTO {temp_table} FROM {obj_full_name} {date_filter} {order_by}"), - immediate = TRUE) + immediate = TRUE + ) } # number of rows - n_rows <- self$get(glue("SELECT count(*) as n + n_rows <- self$get(glue::glue("SELECT count(*) as n FROM {temp_table}"), - close_conn = FALSE) %>% - pull(n) + close_conn = FALSE + ) %>% + dplyr::pull(n) # set progress bar i <- 0 - pb <- txtProgressBar(min = i, max = length(fields), initial = 0) + pb <- utils::txtProgressBar(min = i, max = length(fields), initial = 0) # loop through fields and get relevant details for (field in fields) { + # field <- fields[1] - #field <- fields[1] - - tidy_field <- case_when(self$server_type == "mssql" ~ glue("[{field}]"), - self$server_type == "mysql" ~ glue("`{field}`")) + tidy_field <- dplyr::case_when( + self$server_type == "mssql" ~ glue::glue("[{field}]"), + self$server_type == "mysql" ~ glue::glue("`{field}`") + ) # percent complete - proportion_complete <- self$get(glue("SELECT count(*) as n, completed + proportion_complete <- self$get(glue::glue("SELECT count(*) as n, completed from ( SELECT case when {tidy_field} is null then 'missing' ELSE 'complete' end as completed @@ -973,28 +975,34 @@ sql_server <- R6Class("sql_server", public = list( ) as b GROUP BY completed "), - close_conn = FALSE) %>% + close_conn = FALSE + ) %>% tidyr::complete(completed = c("missing", "complete")) %>% - mutate(n = as.integer(n), - n = case_when(is.na(n) ~ 0, - TRUE ~ n)) %>% + dplyr::mutate( + n = as.integer(n), + n = dplyr::case_when( + is.na(n) ~ 0, + TRUE ~ n + ) + ) %>% tidyr::pivot_wider(values_from = "n", names_from = "completed") %>% - mutate(proportion_complete = complete/(missing+complete)) %>% - pull(proportion_complete) + dplyr::mutate(proportion_complete = complete / (missing + complete)) %>% + dplyr::pull(proportion_complete) # number of unique values - n_unique_vals <- self$get(glue("SELECT count(*) as n + n_unique_vals <- self$get(glue::glue("SELECT count(*) as n from ( SELECT distinct {tidy_field} FROM {temp_table} WHERE {tidy_field} IS NOT NULL ) as b "), - close_conn = FALSE) %>% - pull(n) + close_conn = FALSE + ) %>% + dplyr::pull(n) # calculate proportion of completed values are unique - prop_completed_vals_unique <- n_unique_vals/(n_rows*proportion_complete) + prop_completed_vals_unique <- n_unique_vals / (n_rows * proportion_complete) prop_completed_vals_unique <- round(prop_completed_vals_unique, 3) # tidy perc_complete @@ -1002,33 +1010,38 @@ sql_server <- R6Class("sql_server", public = list( # if have a date field, get the date of the last non-null record if (!is.null(date_field_null_calc)) { - date_of_last_non_null <- self$get(glue("SELECT max({date_field_null_calc}) as max_date + date_of_last_non_null <- self$get(glue::glue("SELECT max({date_field_null_calc}) as max_date FROM {temp_table} WHERE {tidy_field} IS NOT NULL"), - close_conn = FALSE) %>% - pull(max_date) %>% + close_conn = FALSE + ) %>% + dplyr::pull(max_date) %>% paste0(., " (from field [", date_field_null_calc, "])") } else { date_of_last_non_null <- NA_character_ } # add to data frame - obj_details <- rbind(obj_details, - data.frame(col_name = field, - prop_complete = prop_complete, - n_unique_vals = n_unique_vals, - prop_completed_vals_unique = prop_completed_vals_unique, - date_last_non_null_value = date_of_last_non_null, - n_rows = n_rows)) + obj_details <- rbind( + obj_details, + data.frame( + col_name = field, + prop_complete = prop_complete, + n_unique_vals = n_unique_vals, + prop_completed_vals_unique = prop_completed_vals_unique, + date_last_non_null_value = date_of_last_non_null, + n_rows = n_rows + ) + ) # update progress bar i <- i + 1 - setTxtProgressBar(pb, i) + utils::setTxtProgressBar(pb, i) } # join to main meta data and add fields obj_MD <- obj_MD %>% - left_join(obj_details, by = "col_name") + dplyr::left_join(obj_details, by = "col_name") # drop temporary table if exists if (self$server_type == "mysql") { @@ -1045,10 +1058,4 @@ sql_server <- R6Class("sql_server", public = list( # return data return(field_list) } - )) - - - - - diff --git a/R/utils.R b/R/utils.R index 22e6130..2a5206c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -19,20 +19,20 @@ #' @export get_env_var <- function(var_name) { - # if running on server, use Sys.getenv() if (Sys.getenv()["USERNAME"][[1]] == "rstudio-connect") { - return(Sys.getenv(var_name)) # if running locally, try keyring first } else { - # try keyring - kr <- try({ - var <- keyring::key_get(var_name) - TRUE }, - silent = TRUE) + kr <- try( + { + var <- keyring::key_get(var_name) + TRUE + }, + silent = TRUE + ) # if keyring worked, return if (kr[1] == TRUE) { @@ -40,30 +40,30 @@ get_env_var <- function(var_name) { # otherwise, try Sys.getenv } else { - - sys_env <- try({ - var <- Sys.getenv(var_name) - TRUE}, - silent = TRUE) + sys_env <- try( + { + var <- Sys.getenv(var_name) + TRUE + }, + silent = TRUE + ) # if Sys.gentenv worked, give warning and return if (sys_env[1] == TRUE & var != "") { - - warning(glue( + warning(glue::glue( "variable {var_name} found in Renviron file but not windows credentials. ", "Update your process to use keyring and not .Renviron file by doing the ", "following: \n", " 1. Run: keyring::key_set(service = '{var_name}') \n", " 2. Enter the variable value in the text box \n", " 3. Delete the variable {var_name} from your .Renviron file \n" - ) - ) + )) return(var) # otherwise, variable not found } else { - stop(glue("variable {var_name} not found using keyring or Sys.getenv()")) + stop(glue::glue("variable {var_name} not found using keyring or Sys.getenv()")) } } } diff --git a/R/zzz.R b/R/zzz.R index 50c8894..730713b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,3 @@ -#.onAttach <- function(libname, pkgname) { +# .onAttach <- function(libname, pkgname) { # packageStartupMessage("WARNING: This package is still in development, use at own risk.") -#} +# } diff --git a/README.Rmd b/README.Rmd index a74cfd5..f5882ac 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,7 +8,6 @@ knitr::opts_chunk$set( comment = "#>", fig.path = "tools/README-" ) - ``` # SQL R Tools @@ -69,7 +68,6 @@ again* To set up GitHub PAT in the Git Credential Store & install package: ```{r, install, eval=FALSE} - # Install required packages install.packages("usethis") install.packages("gitcreds") @@ -77,29 +75,28 @@ install.packages("remotes") install.packages("pak") # set up GitHub PAT. Note: if a GitHub PAT has already been saved, the following -# will give an option to replace the credentials. Run the function, select the -# relevant option and update the PAT with the one just copied from GitHub. +# will give an option to replace the credentials. Run the function, select the +# relevant option and update the PAT with the one just copied from GitHub. gitcreds::gitcreds_set() # Check has been set properly, the below should return a list with details of: -# protocol, host, username and password. None of these should be NA, the -# password should have "<-- hidden -->". +# protocol, host, username and password. None of these should be NA, the +# password should have "<-- hidden -->". gitcreds::gitcreds_get() # If the above doesn't look correct: -# 1. Make sure you don't have GITHUB_PAT set in your Renviron file. If you do, +# 1. Make sure you don't have GITHUB_PAT set in your Renviron file. If you do, # delete it, restart R, and check it's no longer in use by running -# Sys.getenv("GITHUB_PAT"), this should return "". -# 2. Delete your GITHUB_PAT on GitHub at https://github.com/settings/tokens and -# re-create the PAT using the instructions above (this may have failed if -# you already had a GitHub PAT set up). +# Sys.getenv("GITHUB_PAT"), this should return "". +# 2. Delete your GITHUB_PAT on GitHub at https://github.com/settings/tokens and +# re-create the PAT using the instructions above (this may have failed if +# you already had a GitHub PAT set up). -# If the output of gitcreds::gitcreds_get() still looks incorrect, contact the -# author of this package. +# If the output of gitcreds::gitcreds_get() still looks incorrect, contact the +# author of this package. # install the package (see notes below if this fails) pak::pkg_install("Notts-HC/SQLRtools") - ``` If you have a `GITHUB_PAT` set up in the Renviron file **delete it**. The @@ -128,8 +125,6 @@ Sys.setenv(GITHUB_PAT = gitcreds::gitcreds_get(use_cache = FALSE)$password) # install using remotes remotes::install_github("Notts-HC/SQLRtools") - - ``` ## About @@ -154,7 +149,6 @@ unloadNamespace("SQLRtools") ```{r, error = TRUE} covr::package_coverage() - ``` ## Using the package @@ -167,23 +161,25 @@ server. The below gives examples of connecting to a MSSQL server and MYSQL server: ```{r sql connection, eval=FALSE} - library(SQLRtools) # set connection to MS SQL server -ms_sql_server <- sql_server$new(driver = "SQL Server", - server = get_env_var("MSSQL_SERVER"), - database = get_env_var("MSSQL_DATABASE")) +ms_sql_server <- sql_server$new( + driver = "SQL Server", + server = get_env_var("MSSQL_SERVER"), + database = get_env_var("MSSQL_DATABASE") +) # set connect to MySQL server -my_sql_server <- sql_server$new(driver = "MySQL ODBC 8.0 Unicode Driver", - server = get_env_var("HOST_NAME"), - database = get_env_var("MYSQL_DB"), - port = get_env_var("MYSQL_PORT"), - uid = get_env_var("MYSQL_USER"), - pwd = get_env_var("MYSQL_PASSWORD")) - +my_sql_server <- sql_server$new( + driver = "MySQL ODBC 8.0 Unicode Driver", + server = get_env_var("HOST_NAME"), + database = get_env_var("MYSQL_DB"), + port = get_env_var("MYSQL_PORT"), + uid = get_env_var("MYSQL_USER"), + pwd = get_env_var("MYSQL_PASSWORD") +) ``` The methods listed above can now be used with these connections to: @@ -191,31 +187,31 @@ The methods listed above can now be used with these connections to: ##### Upload & query data ```{r basic sql examples, eval=FALSE} - # create a basic temp table -my_data <- data.frame(a = c("a", "b", "c"), - b = 1:3) +my_data <- data.frame( + a = c("a", "b", "c"), + b = 1:3 +) # upload as a temporary table - note that generally close_conn should be TRUE -# (which is the default setting), but it needs to be FALSE here so the +# (which is the default setting), but it needs to be FALSE here so the # connection isn't shut after uploading the temporary table, as this would drop -# the temporary table straight away. -ms_sql_server$upload(data = my_data, - table_name = "#SQLRtools_example", - close_conn = FALSE) +# the temporary table straight away. +ms_sql_server$upload( + data = my_data, + table_name = "#SQLRtools_example", + close_conn = FALSE +) # get the data -sql_data <- ms_sql_server$get("SELECT * +sql_data <- ms_sql_server$get("SELECT * FROM #SQLRtools_example") - - ``` ##### Explore databases & their objects ```{r details of objects on server, eval=FALSE} - # get databases in server ms_sql_dbs <- ms_sql_server$databases() @@ -226,50 +222,57 @@ my_sql_db_tables <- ms_sql_server$db_tables(database = ms_sql_dbs$name[20]) my_sql_db_views <- ms_sql_server$db_views(database = ms_sql_dbs$name[20]) # get meta data of table in given tables -my_sql_meta_data <- ms_sql_server$meta_data(database = ms_sql_dbs$name[20], - objects = my_sql_db_views$view_name[1:5], - details = FALSE) +my_sql_meta_data <- ms_sql_server$meta_data( + database = ms_sql_dbs$name[20], + objects = my_sql_db_views$view_name[1:5], + details = FALSE +) names(my_sql_meta_data)[1] View(my_sql_meta_data[1][[1]]) - ``` ##### Avoid the "Invalid Descriptor Index" issue ```{r invalid descriptor index example, eval=FALSE} - # create a basic temp table -my_data <- data.frame(a = c("a", "b", "c"), - b = 1:3) +my_data <- data.frame( + a = c("a", "b", "c"), + b = 1:3 +) -# upload as a temporary table -ms_sql_server$upload(data = my_data, - table_name = "#SQLRtools_example", - close_conn = FALSE) +# upload as a temporary table +ms_sql_server$upload( + data = my_data, + table_name = "#SQLRtools_example", + close_conn = FALSE +) # change the data type of column a to varchar(max) ms_sql_server$run("ALTER TABLE #SQLRtools_example ALTER COLUMN a varchar(max);", - close_conn = FALSE) + close_conn = FALSE +) # try extracting the data ms_sql_server$get("SELECT * FROM #SQLRtools_example", - close_conn = FALSE) + close_conn = FALSE +) # use method order_object_fields to avoid issue # get all the fields ordered by data type -table_fields <- ms_sql_server$order_object_fields(object = "#SQLRtools_example", - close_conn = FALSE) +table_fields <- ms_sql_server$order_object_fields( + object = "#SQLRtools_example", + close_conn = FALSE +) # now extract all the data -ms_sql_server$get(glue("SELECT {table_fields} +ms_sql_server$get(glue("SELECT {table_fields} FROM #SQLRtools_example")) -# note that the above table only has 2 fields. Where your trying to run -# select * on a table with a lot of fields and multiple varchar(max) or -# geometry feilds, the above makes life a lot easier (notwithstanding the fact -# it's good practice to avoid 'select *' in SQL where possible). - +# note that the above table only has 2 fields. Where your trying to run +# select * on a table with a lot of fields and multiple varchar(max) or +# geometry feilds, the above makes life a lot easier (notwithstanding the fact +# it's good practice to avoid 'select *' in SQL where possible). ``` diff --git a/SQLRtools.Rproj b/SQLRtools.Rproj index 21a4da0..09a096e 100644 --- a/SQLRtools.Rproj +++ b/SQLRtools.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: db9d44e7-f57d-4157-a5ba-a48925b20d99 RestoreWorkspace: Default SaveWorkspace: Default diff --git a/tests/testthat/test-README.R b/tests/testthat/test-README.R index 70b38e4..8e80d98 100644 --- a/tests/testthat/test-README.R +++ b/tests/testthat/test-README.R @@ -1,4 +1,3 @@ - # Test README example code #------------------------------------------------------------------------------- @@ -8,32 +7,39 @@ # Description: check example code in README works testthat::test_that("README", { - # set connection to MS SQL server - ms_sql_server <- sql_server$new(driver = "SQL Server", - server = get_env_var("MSSQL_SERVER"), - database = get_env_var("MSSQL_DATABASE")) + ms_sql_server <- sql_server$new( + driver = "SQL Server", + server = get_env_var("MSSQL_SERVER"), + database = get_env_var("MSSQL_DATABASE") + ) # set connect to MySQL server - my_sql_server <- sql_server$new(driver = "MySQL ODBC 8.0 Unicode Driver", - server = get_env_var("HOST_NAME"), - database = get_env_var("MYSQL_DB"), - port = get_env_var("MYSQL_PORT"), - uid = get_env_var("MYSQL_USER"), - pwd = get_env_var("MYSQL_PASSWORD")) + my_sql_server <- sql_server$new( + driver = "MySQL ODBC 8.0 Unicode Driver", + server = get_env_var("HOST_NAME"), + database = get_env_var("MYSQL_DB"), + port = get_env_var("MYSQL_PORT"), + uid = get_env_var("MYSQL_USER"), + pwd = get_env_var("MYSQL_PASSWORD") + ) # create a basic temp table - my_data <- data.frame(a = c("a", "b", "c"), - b = 1:3) + my_data <- data.frame( + a = c("a", "b", "c"), + b = 1:3 + ) # upload as a temporary table - note that generally close_conn should be TRUE # (which is the default setting), but it needs to be FALSE here so the # connection isn't shut after uploading the temporary table, as this would drop # the temporary table straight away. - ms_sql_server$upload(data = my_data, - table_name = "#SQLRtools_example", - close_conn = FALSE) + ms_sql_server$upload( + data = my_data, + table_name = "#SQLRtools_example", + close_conn = FALSE + ) # get the data sql_data <- ms_sql_server$get("SELECT * @@ -54,51 +60,66 @@ testthat::test_that("README", { my_sql_db_views <- ms_sql_server$db_views(database = ms_sql_dbs$name[20]) # get meta data of table in given tables - my_sql_meta_data <- ms_sql_server$meta_data(database = ms_sql_dbs$name[20], - objects = my_sql_db_views$view_name[1:5], - details = FALSE) + my_sql_meta_data <- ms_sql_server$meta_data( + database = ms_sql_dbs$name[20], + objects = my_sql_db_views$view_name[1:5], + details = FALSE + ) - testthat::expect_equal(names(my_sql_meta_data), - my_sql_db_views$view_name[1:5]) + testthat::expect_equal( + names(my_sql_meta_data), + my_sql_db_views$view_name[1:5] + ) - testthat::expect_equal(ms_sql_server$database, - get_env_var("MSSQL_DATABASE")) + testthat::expect_equal( + ms_sql_server$database, + get_env_var("MSSQL_DATABASE") + ) # Avoid the "Invalid Descriptor Index" issue # create a basic temp table - my_data <- data.frame(a = c("a", "b", "c"), - b = 1:3) + my_data <- data.frame( + a = c("a", "b", "c"), + b = 1:3 + ) # upload as a temporary table - ms_sql_server$upload(data = my_data, - table_name = "#SQLRtools_example", - close_conn = FALSE) + ms_sql_server$upload( + data = my_data, + table_name = "#SQLRtools_example", + close_conn = FALSE + ) # change the data type of column a to varchar(max) ms_sql_server$run("ALTER TABLE #SQLRtools_example ALTER COLUMN a varchar(max);", - close_conn = FALSE) + close_conn = FALSE + ) # try extracting the data testthat::expect_error( suppressWarnings( ms_sql_server$get("SELECT * FROM #SQLRtools_example", - close_conn = FALSE) + close_conn = FALSE + ) ) ) # use method order_object_fields to avoid issue # get all the fields ordered by data type - table_fields <- ms_sql_server$order_object_fields(object = "#SQLRtools_example", - close_conn = FALSE) + table_fields <- ms_sql_server$order_object_fields( + object = "#SQLRtools_example", + close_conn = FALSE + ) # now extract all the data - tempdata <- ms_sql_server$get(glue("SELECT {table_fields} + tempdata <- ms_sql_server$get(glue::glue("SELECT {table_fields} FROM #SQLRtools_example", - close_conn = TRUE)) + close_conn = TRUE + )) testthat::expect_equal(my_data[c("b", "a")], tempdata) diff --git a/tests/testthat/test-mssql_server.R b/tests/testthat/test-mssql_server.R index d5a0c02..b54d25e 100644 --- a/tests/testthat/test-mssql_server.R +++ b/tests/testthat/test-mssql_server.R @@ -1,4 +1,3 @@ - # Test sql_server class with MSSQL server #------------------------------------------------------------------------------- @@ -9,9 +8,11 @@ # with a MS SQL server. # create mysql server object -mssql_serv <- sql_server$new(driver = "SQL Server", - server = get_env_var("MSSQL_SERVER"), - database = get_env_var("MSSQL_DATABASE")) +mssql_serv <- sql_server$new( + driver = "SQL Server", + server = get_env_var("MSSQL_SERVER"), + database = get_env_var("MSSQL_DATABASE") +) # set name of test table test_table_name <- "#SQLRtools_test_table" @@ -19,39 +20,46 @@ test_table_name <- "#SQLRtools_test_table" suppressMessages(mssql_serv$drop_table(test_table_name)) # create dummy data to be uploaded -test_data <- data.frame(Int_field = 1:200, - char_field_1 = stri_rand_strings(200, sample(5:11, 5, replace = TRUE), '[a-zA-Z]'), - char_field_2 = stri_rand_strings(200, sample(5:11, 5, replace = TRUE), '[a-zA-Z]'), - date_field = sample(seq(as.Date('2018/01/01'), as.Date('2024/01/01'), by = "day"), 200), - date_time_field = sample(seq(as_datetime('2018-01-01 00:00:00'), - as_datetime('2024-01-01 00:00:00'), - by = "min"), 200)) +test_data <- data.frame( + Int_field = 1:200, + char_field_1 = stringi::stri_rand_strings(200, sample(5:11, 5, replace = TRUE), "[a-zA-Z]"), + char_field_2 = stringi::stri_rand_strings(200, sample(5:11, 5, replace = TRUE), "[a-zA-Z]"), + date_field = sample(seq(as.Date("2018/01/01"), as.Date("2024/01/01"), by = "day"), 200), + date_time_field = sample(seq(lubridate::as_datetime("2018-01-01 00:00:00"), + lubridate::as_datetime("2024-01-01 00:00:00"), + by = "min" + ), 200) +) # 1. uploading data ------------------------------------------------------------ testthat::test_that("mssql_server - upload method", { - # upload table in one go - upload_outcome <- mssql_serv$upload(data = test_data, - table_name = test_table_name, - close_conn = FALSE) + upload_outcome <- mssql_serv$upload( + data = test_data, + table_name = test_table_name, + close_conn = FALSE + ) # table exists table_exists <- mssql_serv$table_exists(test_table_name, - close_conn = FALSE) + close_conn = FALSE + ) # n rows - table_rows <- mssql_serv$get(glue("SELECT count(*) as n + table_rows <- mssql_serv$get(glue::glue("SELECT count(*) as n from {test_table_name}"), - close_conn = FALSE) %>% - pull(n) %>% + close_conn = FALSE + ) %>% + dplyr::pull(n) %>% as.integer() # field names - table_fields <- mssql_serv$get(glue("SELECT TOP 0 * + table_fields <- mssql_serv$get(glue::glue("SELECT TOP 0 * from {test_table_name}"), - close_conn = FALSE) + close_conn = FALSE + ) # tests @@ -62,25 +70,30 @@ testthat::test_that("mssql_server - upload method", { # test errors if try further batch upload without specifying append testthat::expect_error( - mssql_serv$upload(data = test_data, - table_name = test_table_name, - schema_name = Sys.getenv("MYSQL_DB"), - batch_upload = 100, - close_conn = FALSE) + mssql_serv$upload( + data = test_data, + table_name = test_table_name, + schema_name = Sys.getenv("MYSQL_DB"), + batch_upload = 100, + close_conn = FALSE + ) ) # append to the data using batch upload of 100 rows at a time - batch_upload_outcome <- mssql_serv$upload(data = test_data, - table_name = test_table_name, - batch_upload = 50, - append_data = TRUE, - close_conn = FALSE) + batch_upload_outcome <- mssql_serv$upload( + data = test_data, + table_name = test_table_name, + batch_upload = 50, + append_data = TRUE, + close_conn = FALSE + ) # n rows - table_rows <- mssql_serv$get(glue("SELECT count(*) as n + table_rows <- mssql_serv$get(glue::glue("SELECT count(*) as n from {test_table_name}"), - close_conn = FALSE) %>% - pull(n) %>% + close_conn = FALSE + ) %>% + dplyr::pull(n) %>% as.integer() # tests @@ -91,120 +104,147 @@ testthat::test_that("mssql_server - upload method", { # 2. Get data ------------------------------------------------------------------ testthat::test_that("mssql_server - get data", { - # extract the data just upload # note that date_field is getting turned into char as read into R (meta data # check below will confirm if being held as a date in SQL) - db_data <- mssql_serv$get(glue("SELECT * FROM {test_table_name}"), - close_conn = FALSE) %>% - mutate(date_field = as.Date(date_field)) + db_data <- mssql_serv$get(glue::glue("SELECT * FROM {test_table_name}"), + close_conn = FALSE + ) %>% + dplyr::mutate(date_field = as.Date(date_field)) # test matches the data uploaded testthat::expect_equal(rbind(test_data, test_data), db_data) - }) # 3. Run & order by fields methods --------------------------------------------- testthat::test_that("mssql_server - run & order by fields", { - # change the data type of column a to varchar(max) - mssql_serv$run(glue("ALTER TABLE {test_table_name} + mssql_serv$run(glue::glue("ALTER TABLE {test_table_name} ALTER COLUMN char_field_1 varchar(max);"), - close_conn = FALSE) + close_conn = FALSE + ) # now expect error extracting the data testthat::expect_error( - suppressWarnings(mssql_serv$get(glue("SELECT * FROM {test_table_name}"), - close_conn = FALSE)) + suppressWarnings(mssql_serv$get(glue::glue("SELECT * FROM {test_table_name}"), + close_conn = FALSE + )) ) # use method order_object_fields to avoid issue # get all the fields ordered by data type - table_fields <- mssql_serv$order_object_fields(object = test_table_name, - close_conn = FALSE) + table_fields <- mssql_serv$order_object_fields( + object = test_table_name, + close_conn = FALSE + ) # now extract all the data - extract_ordered_feilds <- mssql_serv$get(glue("SELECT {table_fields} + extract_ordered_feilds <- mssql_serv$get(glue::glue("SELECT {table_fields} FROM {test_table_name}"), - close_conn = FALSE) %>% - mutate(date_field = as.Date(date_field)) + close_conn = FALSE + ) %>% + dplyr::mutate(date_field = as.Date(date_field)) # tests - testthat::expect_equal(colnames(extract_ordered_feilds), - c("Int_field", "char_field_2", "date_field", - "date_time_field", "char_field_1")) - testthat::expect_equal(rbind(test_data, test_data), - select(extract_ordered_feilds, - Int_field, char_field_1, char_field_2, - date_field, date_time_field)) - + testthat::expect_equal( + colnames(extract_ordered_feilds), + c( + "Int_field", "char_field_2", "date_field", + "date_time_field", "char_field_1" + ) + ) + testthat::expect_equal( + rbind(test_data, test_data), + dplyr::select( + extract_ordered_feilds, + Int_field, char_field_1, char_field_2, + date_field, date_time_field + ) + ) }) # 3. Meta data ----------------------------------------------------------------- testthat::test_that("mssql_server - meta data", { - # extract the data just upload db_tables <- mssql_serv$db_tables(close_conn = FALSE) # check has return the test table testthat::expect_true(mssql_serv$temp_table_name(test_table_name) - %in% db_tables$table_name) + %in% db_tables$table_name) # get the fields of the table - table_fields <- mssql_serv$object_fields(objects = test_table_name, - close_conn = FALSE) + table_fields <- mssql_serv$object_fields( + objects = test_table_name, + close_conn = FALSE + ) # check returned the same field names - testthat::expect_equal(sort(table_fields[[1]]), - sort(colnames(test_data))) + testthat::expect_equal( + sort(table_fields[[1]]), + sort(colnames(test_data)) + ) # get meta data - meta_data <- mssql_serv$meta_data(objects = test_table_name, - detail = TRUE, - close_conn = FALSE)[[1]] + meta_data <- mssql_serv$meta_data( + objects = test_table_name, + detail = TRUE, + close_conn = FALSE + )[[1]] # checks testthat::expect_equal(sort(table_fields[[1]]), sort(meta_data$col_name)) - testthat::expect_equal(c("date", "datetime", "int", "varchar", "varchar" ), - sort(meta_data$data_type)) + testthat::expect_equal( + c("date", "datetime", "int", "varchar", "varchar"), + sort(meta_data$data_type) + ) testthat::expect_equal("yes", unique(meta_data$nullable)) - testthat::expect_equal(colnames(meta_data), - c("col_name", "data_type", "col_type", "nullable", - "num_precision", "datetime_precision", "index", - "prop_complete", "n_unique_vals", "prop_completed_vals_unique", - "date_last_non_null_value", "n_rows")) + testthat::expect_equal( + colnames(meta_data), + c( + "col_name", "data_type", "col_type", "nullable", + "num_precision", "datetime_precision", "index", + "prop_complete", "n_unique_vals", "prop_completed_vals_unique", + "date_last_non_null_value", "n_rows" + ) + ) testthat::expect_true(grepl("(from field \\[date_field\\])", meta_data$date_last_non_null_value[1])) # filter the data by date in meta data date_to_filter <- max(test_data$date_field) - 150 - meta_data2 <- mssql_serv$meta_data(objects = test_table_name, - detail = TRUE, - row_limit = 1000, - date_filter = date_to_filter, - date_field = "date_field", - close_conn = FALSE)[[1]] - testthat::expect_equal(meta_data2$n_rows[1], nrow(filter(test_data, date_field >= date_to_filter))*2) + meta_data2 <- mssql_serv$meta_data( + objects = test_table_name, + detail = TRUE, + row_limit = 1000, + date_filter = date_to_filter, + date_field = "date_field", + close_conn = FALSE + )[[1]] + testthat::expect_equal(meta_data2$n_rows[1], nrow(stats::filter(test_data, date_field >= date_to_filter)) * 2) testthat::expect_true(grepl("(from field \\[date_field\\])", meta_data2$date_last_non_null_value[1])) # drop the date field and run meta again - mssql_serv$run(glue("ALTER TABLE {test_table_name} + mssql_serv$run(glue::glue("ALTER TABLE {test_table_name} DROP COLUMN date_field;"), - close_conn = FALSE) + close_conn = FALSE + ) # drop the date field and run meta again - mssql_serv$run(glue("ALTER TABLE {test_table_name} + mssql_serv$run(glue::glue("ALTER TABLE {test_table_name} DROP COLUMN date_time_field;"), - close_conn = FALSE) + close_conn = FALSE + ) # get meta data - meta_data3 <- mssql_serv$meta_data(objects = test_table_name, - detail = TRUE, - close_conn = FALSE)[[1]] + meta_data3 <- mssql_serv$meta_data( + objects = test_table_name, + detail = TRUE, + close_conn = FALSE + )[[1]] testthat::expect_true(is.na(meta_data3$date_last_non_null_value[1])) }) @@ -212,19 +252,16 @@ testthat::test_that("mssql_server - meta data", { # 4. Drop table ---------------------------------------------------------------- testthat::test_that("mssql_server - drop table", { - # drop table mssql_serv$drop_table(test_table_name, close_conn = FALSE) # check if it exists testthat::expect_equal("no", mssql_serv$table_exists(test_table_name)) - }) # 5. drop connection ----------------------------------------------------------- testthat::test_that("mssql_server - close connection", { - # drop table mssql_serv$close_connection() @@ -233,24 +270,9 @@ testthat::test_that("mssql_server - close connection", { # reconnect & check temp table no longer exists testthat::expect_error( - mssql_serv$get(glue("SELECT * FROM {test_table_name}")) + mssql_serv$get(glue::glue("SELECT * FROM {test_table_name}")) ) # close connection mssql_serv$close_connection() - }) - - - - - - - - - - - - - - diff --git a/tests/testthat/test-mysql_server.R b/tests/testthat/test-mysql_server.R index 2aa330a..eb7ed2a 100644 --- a/tests/testthat/test-mysql_server.R +++ b/tests/testthat/test-mysql_server.R @@ -1,4 +1,3 @@ - # Test sql_server class with MySQL server #------------------------------------------------------------------------------- @@ -9,12 +8,14 @@ # with a MYSQL server. # create mysql server object -mysql_serv <- sql_server$new(driver = "MySQL ODBC 8.0 Unicode Driver", - server = get_env_var("HOST_NAME"), - database = get_env_var("MYSQL_DB"), - port = get_env_var("MYSQL_PORT"), - uid = get_env_var("MYSQL_USER"), - pwd = get_env_var("MYSQL_PASSWORD")) +mysql_serv <- sql_server$new( + driver = "MySQL ODBC 8.0 Unicode Driver", + server = get_env_var("HOST_NAME"), + database = get_env_var("MYSQL_DB"), + port = get_env_var("MYSQL_PORT"), + uid = get_env_var("MYSQL_USER"), + pwd = get_env_var("MYSQL_PASSWORD") +) # set name of test table test_table_name <- "SQLRtools_test_table" @@ -23,35 +24,39 @@ test_table_name <- "SQLRtools_test_table" suppressMessages(mysql_serv$drop_table(test_table_name)) # create dummy data to be uploaded -test_data <- data.frame(Int_field = 1:200, - char_field_1 = stri_rand_strings(200, sample(5:11, 5, replace = TRUE), '[a-zA-Z]'), - char_field_2 = stri_rand_strings(200, sample(5:11, 5, replace = TRUE), '[a-zA-Z]'), - date_field = sample(seq(as.Date('2018/01/01'), as.Date('2024/01/01'), by = "day"), 200), - date_time_field = sample(seq(as_datetime('2018-01-01 00:00:00'), - as_datetime('2024-01-01 00:00:00'), - by = "min"), 200)) +test_data <- data.frame( + Int_field = 1:200, + char_field_1 = stringi::stri_rand_strings(200, sample(5:11, 5, replace = TRUE), "[a-zA-Z]"), + char_field_2 = stringi::stri_rand_strings(200, sample(5:11, 5, replace = TRUE), "[a-zA-Z]"), + date_field = sample(seq(as.Date("2018/01/01"), as.Date("2024/01/01"), by = "day"), 200), + date_time_field = sample(seq(lubridate::as_datetime("2018-01-01 00:00:00"), + lubridate::as_datetime("2024-01-01 00:00:00"), + by = "min" + ), 200) +) # 1. uploading data ------------------------------------------------------------ testthat::test_that("mysql_server - upload method", { - # upload table in one go - upload_outcome <- mysql_serv$upload(data = test_data, - table_name = test_table_name, - schema_name = get_env_var("MYSQL_DB")) + upload_outcome <- mysql_serv$upload( + data = test_data, + table_name = test_table_name, + schema_name = get_env_var("MYSQL_DB") + ) # table exists table_exists <- mysql_serv$table_exists(test_table_name) # n rows - table_rows <- mysql_serv$get(glue("SELECT count(*) as n + table_rows <- mysql_serv$get(glue::glue("SELECT count(*) as n from {test_table_name}")) %>% - pull(n) %>% + dplyr::pull(n) %>% as.integer() # field names - table_fields <- mysql_serv$get(glue("SELECT * + table_fields <- mysql_serv$get(glue::glue("SELECT * from {test_table_name} LIMIT 0")) @@ -64,23 +69,27 @@ testthat::test_that("mysql_server - upload method", { # test errors if try further batch upload without specifying append testthat::expect_error( - mysql_serv$upload(data = test_data, - table_name = test_table_name, - schema_name = get_env_var("MYSQL_DB"), - batch_upload = 100) + mysql_serv$upload( + data = test_data, + table_name = test_table_name, + schema_name = get_env_var("MYSQL_DB"), + batch_upload = 100 ) + ) # append to the data using batch upload of 100 rows at a time - batch_upload_outcome <- mysql_serv$upload(data = test_data, - table_name = test_table_name, - schema_name = get_env_var("MYSQL_DB"), - batch_upload = 10, - append_data = TRUE) + batch_upload_outcome <- mysql_serv$upload( + data = test_data, + table_name = test_table_name, + schema_name = get_env_var("MYSQL_DB"), + batch_upload = 10, + append_data = TRUE + ) # n rows - table_rows <- mysql_serv$get(glue("SELECT count(*) as n + table_rows <- mysql_serv$get(glue::glue("SELECT count(*) as n from {test_table_name}")) %>% - pull(n) %>% + dplyr::pull(n) %>% as.integer() # tests @@ -91,19 +100,16 @@ testthat::test_that("mysql_server - upload method", { # 2. Getting data -------------------------------------------------------------- testthat::test_that("mysql_server - get method", { - # extract the data just upload - db_data <- mysql_serv$get(glue("SELECT * FROM {test_table_name}")) + db_data <- mysql_serv$get(glue::glue("SELECT * FROM {test_table_name}")) # test matches the data uploaded testthat::expect_equal(rbind(test_data, test_data), db_data) - }) # 3. Meta data ----------------------------------------------------------------- testthat::test_that("mysql_server - meta data", { - # extract the data just upload db_tables <- mysql_serv$db_tables() @@ -114,28 +120,38 @@ testthat::test_that("mysql_server - meta data", { table_fields <- mysql_serv$object_fields(objects = test_table_name) # check returned the same field names - testthat::expect_equal(sort(table_fields[[1]]), - sort(colnames(test_data))) + testthat::expect_equal( + sort(table_fields[[1]]), + sort(colnames(test_data)) + ) # get meta data original_meta <- mysql_serv$meta_data(objects = test_table_name)[[1]] # checks testthat::expect_equal(sort(table_fields[[1]]), sort(original_meta$col_name)) - testthat::expect_equal(c("date", "datetime", "int", "text", "text" ), - sort(original_meta$data_type)) + testthat::expect_equal( + c("date", "datetime", "int", "text", "text"), + sort(original_meta$data_type) + ) testthat::expect_equal("yes", unique(original_meta$nullable)) # get detailed meta data - detailed_meta <- mysql_serv$meta_data(objects = test_table_name, - details = TRUE)[[1]] + detailed_meta <- mysql_serv$meta_data( + objects = test_table_name, + details = TRUE + )[[1]] # checks on detailed meta - testthat::expect_equal(colnames(detailed_meta), - c("col_name", "data_type", "col_type", "nullable", - "num_precision", "datetime_precision", "index", - "prop_complete", "n_unique_vals", "prop_completed_vals_unique", - "date_last_non_null_value", "n_rows")) + testthat::expect_equal( + colnames(detailed_meta), + c( + "col_name", "data_type", "col_type", "nullable", + "num_precision", "datetime_precision", "index", + "prop_complete", "n_unique_vals", "prop_completed_vals_unique", + "date_last_non_null_value", "n_rows" + ) + ) testthat::expect_true(grepl("(from field \\[date_field\\])", detailed_meta$date_last_non_null_value[1])) # get the fields of all tables @@ -146,8 +162,10 @@ testthat::test_that("mysql_server - meta data", { all_views <- mysql_serv$db_views() # check have the same number of tables - testthat::expect_equal(length(all_table_fields), - nrow(all_tables) + nrow(all_views)) + testthat::expect_equal( + length(all_table_fields), + nrow(all_tables) + nrow(all_views) + ) # get all databases all_dbs <- mysql_serv$databases() @@ -160,51 +178,38 @@ testthat::test_that("mysql_server - meta data", { ) # drop the date field and run meta again - mysql_serv$run(glue("ALTER TABLE {test_table_name} + mysql_serv$run(glue::glue("ALTER TABLE {test_table_name} DROP COLUMN date_field;"), - close_conn = FALSE) + close_conn = FALSE + ) # get meta data - meta_data2 <- mysql_serv$meta_data(objects = test_table_name, - detail = TRUE, - close_conn = FALSE)[[1]] + meta_data2 <- mysql_serv$meta_data( + objects = test_table_name, + detail = TRUE, + close_conn = FALSE + )[[1]] testthat::expect_true(grepl("(from field \\[date_time_field\\])", meta_data2$date_last_non_null_value[1])) - }) # 4. Dropping table ------------------------------------------------------------ testthat::test_that("mysql_server - drop table", { - # drop table mysql_serv$drop_table(test_table_name) # check if it exists testthat::expect_equal("no", mysql_serv$table_exists(test_table_name)) - }) # 5. drop connection ----------------------------------------------------------- testthat::test_that("mysql_serv - close connection", { - # drop table mysql_serv$close_connection() # check if it exists testthat::expect_false(DBI::dbIsValid(mysql_serv$conn)) - }) - - - - - - - - - - - diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index c0fd02a..15f646c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,4 +1,3 @@ - # Test util functions #------------------------------------------------------------------------------# @@ -8,7 +7,6 @@ # 1. get_env_var() ------------------------------------------------------------- testthat::test_that("utils - get_env_var", { - test_var_val <- "SQLRtools_get_env_var_TEST" # create test var in renviron @@ -21,8 +19,10 @@ testthat::test_that("utils - get_env_var", { ) # create var using keyring - keyring::key_set_with_value(service = "SQLRtools_TEST_VAR", - password = test_var_val) + keyring::key_set_with_value( + service = "SQLRtools_TEST_VAR", + password = test_var_val + ) # get the variable again, not expecting warning as should come from keyring kr_var <- get_env_var("SQLRtools_TEST_VAR") @@ -35,7 +35,4 @@ testthat::test_that("utils - get_env_var", { testthat::expect_error(get_env_var("SQLRtools_TEST_VAR")) testthat::expect_match(renv_var, test_var_val) testthat::expect_match(kr_var, test_var_val) - }) - -