1- # ' @title Row means (optionally with minimum amount of valid values)
1+ # ' @title Row means or sums (optionally with minimum amount of valid values)
22# ' @name row_means
3- # ' @description This function is similar to the SPSS `MEAN.n` function and computes
4- # ' row means from a data frame or matrix if at least `min_valid` values of a row are
5- # ' valid (and not `NA`).
3+ # ' @description This function is similar to the SPSS `MEAN.n` or `SUM.n`
4+ # ' function and computes row means or row sums from a data frame or matrix if at
5+ # ' least `min_valid` values of a row are valid (and not `NA`).
66# '
7- # ' @param data A data frame with at least two columns, where row means are applied.
7+ # ' @param data A data frame with at least two columns, where row means or row
8+ # ' sums are applied.
89# ' @param min_valid Optional, a numeric value of length 1. May either be
910# ' - a numeric value that indicates the amount of valid values per row to
10- # ' calculate the row mean;
11+ # ' calculate the row mean or row sum ;
1112# ' - or a value between `0` and `1`, indicating a proportion of valid values per
12- # ' row to calculate the row mean (see 'Details').
13+ # ' row to calculate the row mean or row sum (see 'Details').
1314# ' - `NULL` (default), in which all cases are considered.
1415# '
1516# ' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.
1617# ' @param digits Numeric value indicating the number of decimal places to be
1718# ' used for rounding mean values. Negative values are allowed (see 'Details').
1819# ' By default, `digits = NULL` and no rounding is used.
1920# ' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values
20- # ' before calculating row means. Only applies if `min_valuid` is not specified.
21+ # ' before calculating row means or row sums. Only applies if `min_valid` is not
22+ # ' specified.
2123# ' @param verbose Toggle warnings.
2224# ' @inheritParams extract_column_names
2325# '
24- # ' @return A vector with row means for those rows with at least `n` valid values.
26+ # ' @return A vector with row means (for `row_means()`) or row sums (for
27+ # ' `row_sums()`) for those rows with at least `n` valid values.
2528# '
26- # ' @details Rounding to a negative number of `digits` means rounding to a power of
27- # ' ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest hundred.
28- # ' For `min_valid`, if not `NULL`, `min_valid` must be a numeric value from `0`
29- # ' to `ncol(data)`. If a row in the data frame has at least `min_valid`
30- # ' non-missing values, the row mean is returned. If `min_valid` is a non-integer
31- # ' value from 0 to 1, `min_valid` is considered to indicate the proportion of
32- # ' required non-missing values per row. E.g., if `min_valid = 0.75`, a row must
33- # ' have at least `ncol(data) * min_valid` non-missing values for the row mean
34- # ' to be calculated. See 'Examples'.
29+ # ' @details Rounding to a negative number of `digits` means rounding to a power
30+ # ' of ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest
31+ # ' hundred. For `min_valid`, if not `NULL`, `min_valid` must be a numeric value
32+ # ' from `0` to `ncol(data)`. If a row in the data frame has at least `min_valid`
33+ # ' non-missing values, the row mean or row sum is returned. If `min_valid` is a
34+ # ' non-integer value from 0 to 1, `min_valid` is considered to indicate the
35+ # ' proportion of required non-missing values per row. E.g., if
36+ # ' `min_valid = 0.75`, a row must have at least `ncol(data) * min_valid`
37+ # ' non-missing values for the row mean or row sum to be calculated. See
38+ # ' 'Examples'.
3539# '
3640# ' @examples
3741# ' dat <- data.frame(
4953# '
5054# ' # needs at least 4 non-missing values per row
5155# ' row_means(dat, min_valid = 4) # 1 valid return value
56+ # ' row_sums(dat, min_valid = 4) # 1 valid return value
5257# '
5358# ' # needs at least 3 non-missing values per row
5459# ' row_means(dat, min_valid = 3) # 2 valid return values
6166# '
6267# ' # needs at least 50% of non-missing values per row
6368# ' row_means(dat, min_valid = 0.5) # 3 valid return values
69+ # ' row_sums(dat, min_valid = 0.5)
6470# '
6571# ' # needs at least 75% of non-missing values per row
6672# ' row_means(dat, min_valid = 0.75) # 2 valid return values
@@ -84,34 +90,52 @@ row_means <- function(data,
8490 verbose = verbose
8591 )
8692
87- if (is.null(select ) || length(select ) == 0 ) {
88- insight :: format_error(" No columns selected." )
89- }
93+ # prepare data, sanity checks
94+ data <- .prepare_row_data(data , select , min_valid , verbose )
9095
91- data <- .coerce_to_dataframe(data [select ])
96+ # calculate row means
97+ .row_sums_or_means(data , min_valid , digits , remove_na , fun = " mean" )
98+ }
9299
93- # n must be a numeric, non-missing value
94- if (! is.null(min_valid ) && (all(is.na(min_valid )) || ! is.numeric(min_valid ) || length(min_valid ) > 1 )) {
95- insight :: format_error(" `min_valid` must be a numeric value of length 1." )
96- }
97100
98- # make sure we only have numeric values
99- numeric_columns <- vapply(data , is.numeric , TRUE )
100- if (! all(numeric_columns )) {
101- if (verbose ) {
102- insight :: format_alert(" Only numeric columns are considered for calculation." )
103- }
104- data <- data [numeric_columns ]
105- }
101+ # ' @rdname row_means
102+ # ' @export
103+ row_sums <- function (data ,
104+ select = NULL ,
105+ exclude = NULL ,
106+ min_valid = NULL ,
107+ digits = NULL ,
108+ ignore_case = FALSE ,
109+ regex = FALSE ,
110+ remove_na = FALSE ,
111+ verbose = TRUE ) {
112+ # evaluate arguments
113+ select <- .select_nse(select ,
114+ data ,
115+ exclude ,
116+ ignore_case = ignore_case ,
117+ regex = regex ,
118+ verbose = verbose
119+ )
120+
121+ # prepare data, sanity checks
122+ data <- .prepare_row_data(data , select , min_valid , verbose )
123+
124+ # calculate row sums
125+ .row_sums_or_means(data , min_valid , digits , remove_na , fun = " sum" )
126+ }
106127
107- # check if we have a data framme with at least two columns
108- if (ncol(data ) < 2 ) {
109- insight :: format_error(" `data` must be a data frame with at least two numeric columns." )
110- }
111128
112- # proceed here if min_valid is not NULL
129+ # helper ------------------------
130+
131+ # calculate row means or sums
132+ .row_sums_or_means <- function (data , min_valid , digits , remove_na , fun ) {
113133 if (is.null(min_valid )) {
114- out <- rowMeans(data , na.rm = remove_na )
134+ # calculate row means or sums for complete data
135+ out <- switch (fun ,
136+ mean = rowMeans(data , na.rm = remove_na ),
137+ rowSums(data , na.rm = remove_na )
138+ )
115139 } else {
116140 # is 'min_valid' indicating a proportion?
117141 decimals <- min_valid %% 1
@@ -124,9 +148,12 @@ row_means <- function(data,
124148 insight :: format_error(" `min_valid` must be smaller or equal to number of columns in data frame." )
125149 }
126150
127- # row means
151+ # row means or sums
128152 to_na <- rowSums(is.na(data )) > ncol(data ) - min_valid
129- out <- rowMeans(data , na.rm = TRUE )
153+ out <- switch (fun ,
154+ mean = rowMeans(data , na.rm = TRUE ),
155+ rowSums(data , na.rm = TRUE )
156+ )
130157 out [to_na ] <- NA
131158 }
132159
@@ -137,3 +164,34 @@ row_means <- function(data,
137164
138165 out
139166}
167+
168+
169+ # check that data is in shape for row means or row sums
170+ .prepare_row_data <- function (data , select , min_valid , verbose ) {
171+ if (is.null(select ) || length(select ) == 0 ) {
172+ insight :: format_error(" No columns selected." )
173+ }
174+
175+ data <- .coerce_to_dataframe(data [select ])
176+
177+ # n must be a numeric, non-missing value
178+ if (! is.null(min_valid ) && (all(is.na(min_valid )) || ! is.numeric(min_valid ) || length(min_valid ) > 1 )) {
179+ insight :: format_error(" `min_valid` must be a numeric value of length 1." )
180+ }
181+
182+ # make sure we only have numeric values
183+ numeric_columns <- vapply(data , is.numeric , TRUE )
184+ if (! all(numeric_columns )) {
185+ if (verbose ) {
186+ insight :: format_alert(" Only numeric columns are considered for calculation." )
187+ }
188+ data <- data [numeric_columns ]
189+ }
190+
191+ # check if we have a data framme with at least two columns
192+ if (ncol(data ) < 2 ) {
193+ insight :: format_error(" `data` must be a data frame with at least two numeric columns." )
194+ }
195+
196+ data
197+ }
0 commit comments