44[ ![ DOI] ( https://joss.theoj.org/papers/10.21105/joss.04684/status.svg )] ( https://doi.org/10.21105/joss.04684 )
55[ ![ downloads] ( http://cranlogs.r-pkg.org/badges/datawizard )] ( https://cran.r-project.org/package=datawizard )
66[ ![ total] ( https://cranlogs.r-pkg.org/badges/grand-total/datawizard )] ( https://cranlogs.r-pkg.org/ )
7- [ ![ lifecycle] ( https://img.shields.io/badge/lifecycle-maturing-blue.svg )] ( https://lifecycle.r-lib.org/articles/stages.html )
87
98<!-- ***:sparkles: Hockety pockety wockety wack, prepare this data forth and back*** -->
109<!-- ***Hockety pockety wockety wock, messy data is in shock*** -->
@@ -50,11 +49,11 @@ It covers two aspects of data preparation:
5049badge] ( https://easystats.r-universe.dev/badges/datawizard )] ( https://easystats.r-universe.dev )
5150[ ![ R-CMD-check] ( https://github.com/easystats/datawizard/workflows/R-CMD-check/badge.svg?branch=main )] ( https://github.com/easystats/datawizard/actions )
5251
53- | Type | Source | Command |
54- | ----| ----| ----|
55- | Release | CRAN | ` install.packages("datawizard") ` |
52+ | Type | Source | Command |
53+ | ------------- | ------------ | -------------------------------------------------------------------------- ----|
54+ | Release | CRAN | ` install.packages("datawizard") ` |
5655| Development | r-universe | ` install.packages("datawizard", repos = "https://easystats.r-universe.dev") ` |
57- | Development | GitHub | ` remotes::install_github("easystats/datawizard") ` |
56+ | Development | GitHub | ` remotes::install_github("easystats/datawizard") ` |
5857
5958> ** Tip**
6059>
@@ -71,9 +70,10 @@ To cite the package, run the following command:
7170citation(" datawizard" )
7271To cite package ' datawizard' in publications use :
7372
74- Patil et al. , (2022 ). datawizard : An R Package for Easy Data
75- Preparation and Statistical Transformations. Journal of Open Source
76- Software , 7 (78 ), 4684 , https : // doi.org / 10.21105 / joss.04684
73+ Patil et al. , (2022 ). datawizard : An R Package for Easy
74+ Data Preparation and Statistical Transformations. Journal
75+ of Open Source Software , 7 (78 ), 4684 ,
76+ https : // doi.org / 10.21105 / joss.04684
7777
7878A BibTeX entry for LaTeX users is
7979
@@ -136,9 +136,6 @@ columns, can be achieved using `extract_column_names()` or
136136# find column names matching a pattern
137137extract_column_names(iris , starts_with(" Sepal" ))
138138# > [1] "Sepal.Length" "Sepal.Width"
139- ```
140-
141- ``` r
142139
143140# return data columns matching a pattern
144141data_select(iris , starts_with(" Sepal" )) | > head()
@@ -156,10 +153,8 @@ It is also possible to extract one or more variables:
156153``` r
157154# single variable
158155data_extract(mtcars , " gear" )
159- # > [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5 4
160- ```
161-
162- ``` r
156+ # > [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5
157+ # > [32] 4
163158
164159# more variables
165160head(data_extract(iris , ends_with(" Width" )))
220215# > 1 1 a 5 1
221216# > 2 2 b 6 2
222217# > 3 3 c 7 3
223- ```
224-
225- ``` r
226218y
227219# > c d e id
228220# > 1 6 f 100 2
229221# > 2 7 g 101 3
230222# > 3 8 h 102 4
231- ```
232-
233- ``` r
234223
235224data_merge(x , y , join = " full" )
236225# > a b c id d e
237226# > 3 1 a 5 1 <NA> NA
238227# > 1 2 b 6 2 f 100
239228# > 2 3 c 7 3 g 101
240229# > 4 NA <NA> 8 4 h 102
241- ```
242-
243- ``` r
244230
245231data_merge(x , y , join = " left" )
246232# > a b c id d e
247233# > 3 1 a 5 1 <NA> NA
248234# > 1 2 b 6 2 f 100
249235# > 2 3 c 7 3 g 101
250- ```
251-
252- ``` r
253236
254237data_merge(x , y , join = " right" )
255238# > a b c id d e
256239# > 1 2 b 6 2 f 100
257240# > 2 3 c 7 3 g 101
258241# > 3 NA <NA> 8 4 h 102
259- ```
260-
261- ``` r
262242
263243data_merge(x , y , join = " semi" , by = " c" )
264244# > a b c id
265245# > 2 2 b 6 2
266246# > 3 3 c 7 3
267- ```
268-
269- ``` r
270247
271248data_merge(x , y , join = " anti" , by = " c" )
272249# > a b c id
273250# > 1 1 a 5 1
274- ```
275-
276- ``` r
277251
278252data_merge(x , y , join = " inner" )
279253# > a b c id d e
280254# > 1 2 b 6 2 f 100
281255# > 2 3 c 7 3 g 101
282- ```
283-
284- ``` r
285256
286257data_merge(x , y , join = " bind" )
287258# > a b c id d e
@@ -322,17 +293,28 @@ data_to_wide(long_data,
322293 values_from = " value" ,
323294 id_cols = " Row_ID"
324295)
325- # > Row_ID X1 X2 X3 X4 X5
326- # > 1 1 -0.08281164 -1.12490028 -0.70632036 -0.7027895 0.07633326
327- # > 2 2 1.93468099 -0.87430362 0.96687656 0.2998642 -0.23035595
328- # > 3 3 -2.05128979 0.04386162 -0.71016648 1.1494697 0.31746484
329- # > 4 4 0.27773897 -0.58397514 -0.05917365 -0.3016415 -1.59268440
330- # > 5 5 -1.52596060 -0.82329858 -0.23094342 -0.5473394 -0.18194062
331- # > 6 6 -0.26916362 0.11059280 0.69200045 -0.3854041 1.75614174
332- # > 7 7 1.23305388 0.36472778 1.35682290 0.2763720 0.11394932
333- # > 8 8 0.63360774 0.05370100 1.78872284 0.1518608 -0.29216508
334- # > 9 9 0.35271746 1.36867235 0.41071582 -0.4313808 1.75409316
335- # > 10 10 -0.56048248 -0.38045724 -2.18785470 -1.8705001 1.80958455
296+ # > Row_ID X1 X2 X3 X4
297+ # > 1 1 -0.08281164 -1.12490028 -0.70632036 -0.7027895
298+ # > 2 2 1.93468099 -0.87430362 0.96687656 0.2998642
299+ # > 3 3 -2.05128979 0.04386162 -0.71016648 1.1494697
300+ # > 4 4 0.27773897 -0.58397514 -0.05917365 -0.3016415
301+ # > 5 5 -1.52596060 -0.82329858 -0.23094342 -0.5473394
302+ # > 6 6 -0.26916362 0.11059280 0.69200045 -0.3854041
303+ # > 7 7 1.23305388 0.36472778 1.35682290 0.2763720
304+ # > 8 8 0.63360774 0.05370100 1.78872284 0.1518608
305+ # > 9 9 0.35271746 1.36867235 0.41071582 -0.4313808
306+ # > 10 10 -0.56048248 -0.38045724 -2.18785470 -1.8705001
307+ # > X5
308+ # > 1 0.07633326
309+ # > 2 -0.23035595
310+ # > 3 0.31746484
311+ # > 4 -1.59268440
312+ # > 5 -0.18194062
313+ # > 6 1.75614174
314+ # > 7 0.11394932
315+ # > 8 -0.29216508
316+ # > 9 1.75409316
317+ # > 10 1.80958455
336318```
337319
338320### Empty rows and columns
@@ -352,22 +334,13 @@ tmp
352334# > 3 3 3 NA 3
353335# > 4 NA NA NA NA
354336# > 5 5 5 NA 5
355- ```
356-
357- ``` r
358337
359338# indices of empty columns or rows
360339empty_columns(tmp )
361340# > c
362341# > 3
363- ```
364-
365- ``` r
366342empty_rows(tmp )
367343# > [1] 4
368- ```
369-
370- ``` r
371344
372345# remove empty columns or rows
373346remove_empty_columns(tmp )
@@ -377,18 +350,12 @@ remove_empty_columns(tmp)
377350# > 3 3 3 3
378351# > 4 NA NA NA
379352# > 5 5 5 5
380- ```
381-
382- ``` r
383353remove_empty_rows(tmp )
384354# > a b c d
385355# > 1 1 1 NA 1
386356# > 2 2 NA NA NA
387357# > 3 3 3 NA 3
388358# > 5 5 5 NA 5
389- ```
390-
391- ``` r
392359
393360# remove empty columns and rows
394361remove_empty(tmp )
@@ -409,9 +376,6 @@ table(x)
409376# > x
410377# > 1 2 3 4 5 6 7 8 9 10
411378# > 2 3 5 3 7 5 5 2 11 7
412- ```
413-
414- ``` r
415379
416380# cut into 3 groups, based on distribution (quantiles)
417381table(categorize(x , split = " quantile" , n_groups = 3 ))
@@ -445,26 +409,23 @@ summary(swiss)
445409# > Mean : 41.144 Mean :19.94
446410# > 3rd Qu.: 93.125 3rd Qu.:21.70
447411# > Max. :100.000 Max. :26.60
448- ```
449-
450- ``` r
451412
452413# after
453414summary(standardize(swiss ))
454- # > Fertility Agriculture Examination Education
455- # > Min. :-2.81327 Min. :-2.1778 Min. :-1.69084 Min. :-1.0378
456- # > 1st Qu.:-0.43569 1st Qu.:-0.6499 1st Qu.:-0.56273 1st Qu.:-0.5178
457- # > Median : 0.02061 Median : 0.1515 Median :-0.06134 Median :-0.3098
458- # > Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
459- # > 3rd Qu.: 0.66504 3rd Qu.: 0.7481 3rd Qu.: 0.69074 3rd Qu.: 0.1062
460- # > Max. : 1.78978 Max. : 1.7190 Max. : 2.57094 Max. : 4.3702
461- # > Catholic Infant.Mortality
462- # > Min. :-0.9350 Min. :-3.13886
463- # > 1st Qu.:-0.8620 1st Qu.:-0.61543
464- # > Median :-0.6235 Median : 0.01972
465- # > Mean : 0.0000 Mean : 0.00000
466- # > 3rd Qu.: 1.2464 3rd Qu.: 0.60337
467- # > Max. : 1.4113 Max. : 2.28566
415+ # > Fertility Agriculture Examination
416+ # > Min. :-2.81327 Min. :-2.1778 Min. :-1.69084
417+ # > 1st Qu.:-0.43569 1st Qu.:-0.6499 1st Qu.:-0.56273
418+ # > Median : 0.02061 Median : 0.1515 Median :-0.06134
419+ # > Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
420+ # > 3rd Qu.: 0.66504 3rd Qu.: 0.7481 3rd Qu.: 0.69074
421+ # > Max. : 1.78978 Max. : 1.7190 Max. : 2.57094
422+ # > Education Catholic Infant.Mortality
423+ # > Min. :-1.0378 Min. :- 0.9350 Min. :-3.13886
424+ # > 1st Qu.:-0.5178 1st Qu.:-0. 8620 1st Qu.:-0.61543
425+ # > Median :-0.3098 Median :-0. 6235 Median : 0.01972
426+ # > Mean : 0.0000 Mean : 0.0000 Mean : 0. 00000
427+ # > 3rd Qu.: 0.1062 3rd Qu.: 1.2464 3rd Qu.: 0.60337
428+ # > Max. : 4.3702 Max. : 1.4113 Max. : 2.28566
468429```
469430
470431### Winsorize
@@ -486,9 +447,6 @@ anscombe
486447# > 9 12 12 12 8 10.84 9.13 8.15 5.56
487448# > 10 7 7 7 8 4.82 7.26 6.42 7.91
488449# > 11 5 5 5 8 5.68 4.74 5.73 6.89
489- ```
490-
491- ``` r
492450
493451# after
494452winsorize(anscombe )
@@ -540,9 +498,6 @@ head(trees)
540498# > 4 10.5 72 16.4
541499# > 5 10.7 81 18.8
542500# > 6 10.8 83 19.7
543- ```
544-
545- ``` r
546501
547502# after
548503head(ranktransform(trees ))
575530# > Mazda RX4 21.0 6 160 110
576531# > Mazda RX4 Wag 21.0 6 160 110
577532# > Datsun 710 22.8 4 108 93
578- ```
579-
580- ``` r
581533
582534data_rotate(x )
583535# > Mazda RX4 Mazda RX4 Wag Datsun 710
0 commit comments