Skip to content

Commit

Permalink
rename args fail_on_ to stop_on_; add stop_on_fail; add tests; closes #8
Browse files Browse the repository at this point in the history
  • Loading branch information
DavZim committed Oct 13, 2023
1 parent f0e718f commit 283eaf5
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 31 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: dataverifyr
Type: Package
Title: A Lightweight, Flexible, and Fast Data Validation Package that Can Handle All Sizes of Data
Version: 0.1.6.9002
Version: 0.1.6.9003
Authors@R: c(
person(given = "David",
family = "Zimmermann-Kollenda",
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@

# dataverifyr 0.1.7

* rename args `fail_on_X` to `stop_on_X`
* adds `stop_on_fail` to [`check_data()`], so that the examples using `read_custom()` make sense (eg in the Readme); thanks [FedericoComoglio](https://github.com/FedericoComoglio) for pointing it out!
* fix bug when dplyr backend is used + add tests; thanks [FedericoComoglio](https://github.com/FedericoComoglio) for reporting the bug!
* expose [`detect_backend()`] to allow user the check which backend is used

# dataverifyr 0.1.6

* allow multiline rules in yaml file
Expand Down
30 changes: 16 additions & 14 deletions R/check_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
#' [`arrow::arrow_table`], [`arrow::open_dataset`], or [`dplyr::tbl`] (SQL connection)
#' @param rules a list of [`rule`]s
#' @param xname optional, a name for the x variable (only used for errors)
#' @param fail_on_warn if the function should throw an error on a warning
#' @param fail_on_error if the function should throw an error on a failed rule
#' @param stop_on_fail when any of the rules fail, throw an error with stop
#' @param stop_on_warn when a warning is found in the code execution, throw an error with stop
#' @param stop_on_error when an error is found in the code execution, throw an error with stop
#'
#' @return a data.frame-like object with one row for each rule and its results
#' @export
Expand All @@ -22,7 +23,8 @@
#'
#' check_data(mtcars, rs)
check_data <- function(x, rules, xname = deparse(substitute(x)),
fail_on_warn = FALSE, fail_on_error = FALSE) {
stop_on_fail = FALSE, stop_on_warn = FALSE,
stop_on_error = FALSE) {

# if rules is a yaml file, read it in
if (length(rules) == 1 && is.character(rules)) rules <- read_rules(rules)
Expand All @@ -42,19 +44,19 @@ check_data <- function(x, rules, xname = deparse(substitute(x)),

res <- check_(x, rules, backend = backend)

# fails on warning and/or error
if (fail_on_warn && any(res$warn != "") ||
fail_on_error && any(res$error != "")) {
warn <- fail_on_warn && any(res$warn != "")
err <- fail_on_error && any(res$error != "")
# stops on fail, warning and/or error
fail <- stop_on_fail && any(res$fail != 0)
warn <- stop_on_warn && any(res$warn != "")
err <- stop_on_error && any(res$error != "")

txt <- sprintf("In dataset '%s' found %s%s%s",
xname,
if (warn) sprintf("%s warnings", sum(res$warn != "")),
if (warn && err) " and ",
if (err) sprintf("%s errors", sum(res$error != "")))
stop(txt)
if (fail || warn || err) {
tt <- paste(c(
if (fail) sprintf("%s rule fails", sum(res$fail != 0)),
if (warn) sprintf("%s warnings", sum(res$warn != "")),
if (err) sprintf("%s errors", sum(res$error != ""))
), collapse = ", ")

stop(sprintf("In dataset '%s' found %s", xname, tt))
}

res
Expand Down
7 changes: 6 additions & 1 deletion R/rule.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
#' [arrow documentation](https://arrow.apache.org/docs/r/reference/acero.html#function-mappings)).
#' The expression can be given as a string as well.
#' @param name an optional name for the rule for reference
#' @param allow_na does the rule allow for NA values? default value is FALSE
#' @param allow_na does the rule allow for NA values in the data? default value is FALSE.
#' Note that when NAs are introduced in the expression, `allow_na` has no effect.
#' Eg when the rule `as.numeric(vs) %in% c(0, 1)` finds the values of `vs` as
#' `c("1", "A")`, the rule will throw a fail regardless of the value of `allow_na`
#' as the NA is introduced in the expression and is not found in the original data.
#' However, when the values of `vs` are `c("1", NA)`, `allow_na` will have an effect.
#' @param negate is the rule negated, only applies to the expression not allow_na,
#' that is, if `expr = mpg > 10`, `allow_na = TRUE`, and `negate = TRUE`, it would
#' match all `mpg <= 10` as well as NAs.
Expand Down
6 changes: 3 additions & 3 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ read_custom <- function(file, rules) {
data <- read.csv(file) # or however you read in your data
# if the check_data detects a fail: the read_custom function will stop
check_data(data, rules, xname = file,
fail_on_error = TRUE, fail_on_warn = TRUE)
stop_on_fail = TRUE, stop_on_warn = TRUE, stop_on_error = TRUE)
# ...
data
}
Expand All @@ -119,8 +119,8 @@ data <- read_custom("correct_data.csv", rules)
# an error is thrown when warnings or errors are found
data <- read_custom("wrong_data.csv", rules)
#> Error in check_data(data, rules, fail_on_error = TRUE, fail_on_warn = TRUE) :
#> In dataset 'wrong_data.csv' found 1 warnings and 1 errors
#> Error in check_data(data, rules, stop_on_fail = TRUE, stop_on_error = TRUE, stop_on_warn = TRUE) :
#> In dataset 'wrong_data.csv' found 2 rule fails, 1 warnings, 1 errors
```


Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ read_custom <- function(file, rules) {
data <- read.csv(file) # or however you read in your data
# if the check_data detects a fail: the read_custom function will stop
check_data(data, rules, xname = file,
fail_on_error = TRUE, fail_on_warn = TRUE)
stop_on_fail = TRUE, stop_on_warn = TRUE, stop_on_error = TRUE)
# ...
data
}
Expand All @@ -171,8 +171,8 @@ data <- read_custom("correct_data.csv", rules)

# an error is thrown when warnings or errors are found
data <- read_custom("wrong_data.csv", rules)
#> Error in check_data(data, rules, fail_on_error = TRUE, fail_on_warn = TRUE) :
#> In dataset 'wrong_data.csv' found 1 warnings and 1 errors
#> Error in check_data(data, rules, stop_on_fail = TRUE, stop_on_error = TRUE, stop_on_warn = TRUE) :
#> In dataset 'wrong_data.csv' found 2 rule fails, 1 warnings, 1 errors
```

## Backends
Expand Down
11 changes: 7 additions & 4 deletions man/check_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/rule.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 83 additions & 4 deletions tests/testthat/test-check_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rules <- ruleset(
rule(cyl %in% c(4, 6, 8), "r1"),
rule(mpg < 10 & mpg > 34, "r2", negate = TRUE),
rule(disp > 100, "r3", allow_na = TRUE), # data validation "fails"
rule(as.numeric(hp) > 0 & as.numeric(hp) < 400, "r4"), # creates warning + 1 NA
rule(as.numeric(hp) > 0 & as.numeric(hp) < 400, "r4"), # creates warning + 1 NA -> result in fail
rule(does_not_exist %in% c("a", "b", "c"), "r5") # creates a stop
)

Expand Down Expand Up @@ -210,7 +210,86 @@ test_that("Test extra functionality", {
expect_equal(res[, !"time"], res2[, !"time"])
unlink(rule_file)

# fails on warn and fail on error work =====
expect_error(check_data(data, rules, fail_on_warn = TRUE))
expect_error(check_data(data, rules, fail_on_error = TRUE))
# stop on warn and fail on error work =====
expect_error(check_data(data, rules, stop_on_fail = TRUE))
expect_error(check_data(data, rules, stop_on_warn = TRUE))
expect_error(check_data(data, rules, stop_on_error = TRUE))
})

test_that("Special case where a warning with allowed missing values returned a fail", {
rules <- ruleset(
rule(as.numeric(vs) %in% c(0, 1), allow_na = TRUE)
# conversion will introduce warning but allow_na should pass it
)
data <- mtcars
data$vs <- as.character(data$vs)
data$vs[1] <- "asd"

res <- check_data(data, rules)
expect_equal(res$fail, 1)
expect_equal(res$warn, "NAs introduced by coercion")
})


test_that("Extra tests for stop_on_fail with custom reader", {
rules <- ruleset(
rule(mpg > 10 & mpg < 30), # mpg goes up to 34
rule(cyl %in% c(4, 8)), # missing 6 cyl
rule(as.numeric(vs) %in% c(0, 1), allow_na = TRUE) # conversion can introduce warning
)

read_custom <- function(file, rules) {
data <- read.csv(file)
# expected: if the check_data detects a fail: the read_custom function will stop
check_data(data, rules, xname = file,
stop_on_fail = TRUE, stop_on_warn = TRUE, stop_on_error = TRUE)
data
}

d <- mtcars
d$name <- rownames(d)
rownames(d) <- NULL

# normal use case, no fails, warnings, errors
data_ok <- d[d$mpg <= 30 & d$cyl != 6, ]
rownames(data_ok) <- NULL

file_ok <- tempfile(fileext = ".csv")
write.csv(data_ok, file_ok, row.names = FALSE)

data_ok_got <- read_custom(file_ok, rules)
expect_equal(data_ok_got, data_ok)

# fail use case, no warnings, errors
data_fail <- d
file_fail <- tempfile(fileext = ".csv")
write.csv(data_fail, file_fail, row.names = FALSE)

expect_error(
read_custom(file_fail, rules),
"In dataset '.*' found 2 rule fails"
)

# warn use case, no fails, no errors
data_warn <- data_ok
data_warn$vs <- as.character(data_warn$vs)
data_warn$vs[3] <- "asd" # will throw warning

file_warn <- tempfile(fileext = ".csv")
write.csv(data_warn, file_warn, row.names = FALSE)

# see `allow_na` in `?rule` for an explanation why this fails
expect_error(
read_custom(file_warn, rules),
"In dataset '.*' found 1 rule fails, 1 warnings"
)

# error use case results in rule fails as well, no warnings
rules_error <- ruleset(
rule(stop("Not going to work..."))
)
expect_error(
read_custom(file_ok, rules_error),
"In dataset '.*' found 1 rule fails, 1 errors"
)
})

0 comments on commit 283eaf5

Please sign in to comment.