Skip to content

Commit 213b9d5

Browse files
authored
Add row_sums() (#552)
* Draft `row_sums()` as complement to `row_means()` * version
1 parent 5ce207b commit 213b9d5

File tree

6 files changed

+151
-63
lines changed

6 files changed

+151
-63
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: datawizard
33
Title: Easy Data Wrangling and Statistical Transformations
4-
Version: 0.13.0.6
4+
Version: 0.13.0.7
55
Authors@R: c(
66
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
77
comment = c(ORCID = "0000-0003-1995-6531")),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ export(reverse)
298298
export(reverse_scale)
299299
export(row_count)
300300
export(row_means)
301+
export(row_sums)
301302
export(row_to_colnames)
302303
export(rowid_as_column)
303304
export(rownames_as_column)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ CHANGES
1111
variables, can now also be a character vector with quoted variable names,
1212
including a colon to indicate a range of several variables (e.g. `"cyl:gear"`).
1313

14+
* New function `row_sums()`, to calculate row sums (optionally with minimum
15+
amount of valid values), as complement to `row_means()`.
16+
1417
* New function `row_count()`, to count specific values row-wise.
1518

1619
BUG FIXES

R/row_means.R

Lines changed: 100 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,41 @@
1-
#' @title Row means (optionally with minimum amount of valid values)
1+
#' @title Row means or sums (optionally with minimum amount of valid values)
22
#' @name row_means
3-
#' @description This function is similar to the SPSS `MEAN.n` function and computes
4-
#' row means from a data frame or matrix if at least `min_valid` values of a row are
5-
#' valid (and not `NA`).
3+
#' @description This function is similar to the SPSS `MEAN.n` or `SUM.n`
4+
#' function and computes row means or row sums from a data frame or matrix if at
5+
#' least `min_valid` values of a row are valid (and not `NA`).
66
#'
7-
#' @param data A data frame with at least two columns, where row means are applied.
7+
#' @param data A data frame with at least two columns, where row means or row
8+
#' sums are applied.
89
#' @param min_valid Optional, a numeric value of length 1. May either be
910
#' - a numeric value that indicates the amount of valid values per row to
10-
#' calculate the row mean;
11+
#' calculate the row mean or row sum;
1112
#' - or a value between `0` and `1`, indicating a proportion of valid values per
12-
#' row to calculate the row mean (see 'Details').
13+
#' row to calculate the row mean or row sum (see 'Details').
1314
#' - `NULL` (default), in which all cases are considered.
1415
#'
1516
#' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.
1617
#' @param digits Numeric value indicating the number of decimal places to be
1718
#' used for rounding mean values. Negative values are allowed (see 'Details').
1819
#' By default, `digits = NULL` and no rounding is used.
1920
#' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values
20-
#' before calculating row means. Only applies if `min_valuid` is not specified.
21+
#' before calculating row means or row sums. Only applies if `min_valid` is not
22+
#' specified.
2123
#' @param verbose Toggle warnings.
2224
#' @inheritParams extract_column_names
2325
#'
24-
#' @return A vector with row means for those rows with at least `n` valid values.
26+
#' @return A vector with row means (for `row_means()`) or row sums (for
27+
#' `row_sums()`) for those rows with at least `n` valid values.
2528
#'
26-
#' @details Rounding to a negative number of `digits` means rounding to a power of
27-
#' ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest hundred.
28-
#' For `min_valid`, if not `NULL`, `min_valid` must be a numeric value from `0`
29-
#' to `ncol(data)`. If a row in the data frame has at least `min_valid`
30-
#' non-missing values, the row mean is returned. If `min_valid` is a non-integer
31-
#' value from 0 to 1, `min_valid` is considered to indicate the proportion of
32-
#' required non-missing values per row. E.g., if `min_valid = 0.75`, a row must
33-
#' have at least `ncol(data) * min_valid` non-missing values for the row mean
34-
#' to be calculated. See 'Examples'.
29+
#' @details Rounding to a negative number of `digits` means rounding to a power
30+
#' of ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest
31+
#' hundred. For `min_valid`, if not `NULL`, `min_valid` must be a numeric value
32+
#' from `0` to `ncol(data)`. If a row in the data frame has at least `min_valid`
33+
#' non-missing values, the row mean or row sum is returned. If `min_valid` is a
34+
#' non-integer value from 0 to 1, `min_valid` is considered to indicate the
35+
#' proportion of required non-missing values per row. E.g., if
36+
#' `min_valid = 0.75`, a row must have at least `ncol(data) * min_valid`
37+
#' non-missing values for the row mean or row sum to be calculated. See
38+
#' 'Examples'.
3539
#'
3640
#' @examples
3741
#' dat <- data.frame(
@@ -49,6 +53,7 @@
4953
#'
5054
#' # needs at least 4 non-missing values per row
5155
#' row_means(dat, min_valid = 4) # 1 valid return value
56+
#' row_sums(dat, min_valid = 4) # 1 valid return value
5257
#'
5358
#' # needs at least 3 non-missing values per row
5459
#' row_means(dat, min_valid = 3) # 2 valid return values
@@ -61,6 +66,7 @@
6166
#'
6267
#' # needs at least 50% of non-missing values per row
6368
#' row_means(dat, min_valid = 0.5) # 3 valid return values
69+
#' row_sums(dat, min_valid = 0.5)
6470
#'
6571
#' # needs at least 75% of non-missing values per row
6672
#' row_means(dat, min_valid = 0.75) # 2 valid return values
@@ -84,34 +90,52 @@ row_means <- function(data,
8490
verbose = verbose
8591
)
8692

87-
if (is.null(select) || length(select) == 0) {
88-
insight::format_error("No columns selected.")
89-
}
93+
# prepare data, sanity checks
94+
data <- .prepare_row_data(data, select, min_valid, verbose)
9095

91-
data <- .coerce_to_dataframe(data[select])
96+
# calculate row means
97+
.row_sums_or_means(data, min_valid, digits, remove_na, fun = "mean")
98+
}
9299

93-
# n must be a numeric, non-missing value
94-
if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
95-
insight::format_error("`min_valid` must be a numeric value of length 1.")
96-
}
97100

98-
# make sure we only have numeric values
99-
numeric_columns <- vapply(data, is.numeric, TRUE)
100-
if (!all(numeric_columns)) {
101-
if (verbose) {
102-
insight::format_alert("Only numeric columns are considered for calculation.")
103-
}
104-
data <- data[numeric_columns]
105-
}
101+
#' @rdname row_means
102+
#' @export
103+
row_sums <- function(data,
104+
select = NULL,
105+
exclude = NULL,
106+
min_valid = NULL,
107+
digits = NULL,
108+
ignore_case = FALSE,
109+
regex = FALSE,
110+
remove_na = FALSE,
111+
verbose = TRUE) {
112+
# evaluate arguments
113+
select <- .select_nse(select,
114+
data,
115+
exclude,
116+
ignore_case = ignore_case,
117+
regex = regex,
118+
verbose = verbose
119+
)
120+
121+
# prepare data, sanity checks
122+
data <- .prepare_row_data(data, select, min_valid, verbose)
123+
124+
# calculate row sums
125+
.row_sums_or_means(data, min_valid, digits, remove_na, fun = "sum")
126+
}
106127

107-
# check if we have a data framme with at least two columns
108-
if (ncol(data) < 2) {
109-
insight::format_error("`data` must be a data frame with at least two numeric columns.")
110-
}
111128

112-
# proceed here if min_valid is not NULL
129+
# helper ------------------------
130+
131+
# calculate row means or sums
132+
.row_sums_or_means <- function(data, min_valid, digits, remove_na, fun) {
113133
if (is.null(min_valid)) {
114-
out <- rowMeans(data, na.rm = remove_na)
134+
# calculate row means or sums for complete data
135+
out <- switch(fun,
136+
mean = rowMeans(data, na.rm = remove_na),
137+
rowSums(data, na.rm = remove_na)
138+
)
115139
} else {
116140
# is 'min_valid' indicating a proportion?
117141
decimals <- min_valid %% 1
@@ -124,9 +148,12 @@ row_means <- function(data,
124148
insight::format_error("`min_valid` must be smaller or equal to number of columns in data frame.")
125149
}
126150

127-
# row means
151+
# row means or sums
128152
to_na <- rowSums(is.na(data)) > ncol(data) - min_valid
129-
out <- rowMeans(data, na.rm = TRUE)
153+
out <- switch(fun,
154+
mean = rowMeans(data, na.rm = TRUE),
155+
rowSums(data, na.rm = TRUE)
156+
)
130157
out[to_na] <- NA
131158
}
132159

@@ -137,3 +164,34 @@ row_means <- function(data,
137164

138165
out
139166
}
167+
168+
169+
# check that data is in shape for row means or row sums
170+
.prepare_row_data <- function(data, select, min_valid, verbose) {
171+
if (is.null(select) || length(select) == 0) {
172+
insight::format_error("No columns selected.")
173+
}
174+
175+
data <- .coerce_to_dataframe(data[select])
176+
177+
# n must be a numeric, non-missing value
178+
if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
179+
insight::format_error("`min_valid` must be a numeric value of length 1.")
180+
}
181+
182+
# make sure we only have numeric values
183+
numeric_columns <- vapply(data, is.numeric, TRUE)
184+
if (!all(numeric_columns)) {
185+
if (verbose) {
186+
insight::format_alert("Only numeric columns are considered for calculation.")
187+
}
188+
data <- data[numeric_columns]
189+
}
190+
191+
# check if we have a data framme with at least two columns
192+
if (ncol(data) < 2) {
193+
insight::format_error("`data` must be a data frame with at least two numeric columns.")
194+
}
195+
196+
data
197+
}

man/row_means.Rd

Lines changed: 37 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-row_means.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
test_that("row_means", {
1+
test_that("row_means/sums", {
22
d_mn <- data.frame(
33
c1 = c(1, 2, NA, 4),
44
c2 = c(NA, 2, NA, 5),
@@ -14,14 +14,21 @@ test_that("row_means", {
1414
expect_equal(row_means(d_mn, min_valid = 2, digits = 1), c(1.5, 2.8, NA, 5.7), tolerance = 1e-1)
1515
expect_message(row_means(iris), regex = "Only numeric")
1616
expect_equal(row_means(iris, verbose = FALSE), rowMeans(iris[, 1:4]), tolerance = 1e-3, ignore_attr = TRUE)
17+
expect_equal(row_sums(d_mn, min_valid = 4), c(NA, 11, NA, NA), tolerance = 1e-3)
18+
expect_equal(row_sums(d_mn, min_valid = 3), c(NA, 11, NA, 17), tolerance = 1e-3)
19+
expect_message(row_sums(iris), regex = "Only numeric")
1720
})
1821

19-
test_that("row_means, errors or messages", {
22+
test_that("row_means/sums, errors or messages", {
2023
data(iris)
2124
expect_error(expect_warning(row_means(iris, select = "abc")), regex = "No columns")
25+
expect_error(expect_warning(row_sums(iris, select = "abc")), regex = "No columns")
2226
expect_error(row_means(iris[1], min_valid = 1), regex = "two numeric")
2327
expect_error(row_means(iris, min_valid = 1:4), regex = "numeric value")
2428
expect_error(row_means(iris, min_valid = "a"), regex = "numeric value")
2529
expect_message(row_means(iris[1:3, ], min_valid = 3), regex = "Only numeric")
2630
expect_silent(row_means(iris[1:3, ], min_valid = 3, verbose = FALSE))
31+
expect_error(row_sums(iris[1], min_valid = 1), regex = "two numeric")
32+
expect_message(row_sums(iris[1:3, ], min_valid = 3), regex = "Only numeric")
33+
expect_silent(row_sums(iris[1:3, ], min_valid = 3, verbose = FALSE))
2734
})

0 commit comments

Comments
 (0)