image source xkcd.com
from xkcd.com
tidyverse
data munging operations?filter
select
mutate
image source commons.wikimedia.org by W.carter
%>%
ds
op_a
, op_b
and op_c
# apply op_a
ds1 <- op_a(ds)
# then apply op_b
ds2 <- op_b(ds1)
# then apply op_c
ds3 <- op_c(ds2)
# do it all in one line
ds3 <- op_c(op_b(op_a(ds)))
ds3 <- op_c(op_b(op_a(ds, op_a_param=...), op_b_param=...), op_c_param=...)
ds3 <- ds %>%
op_a() %>%
op_b() %>%
op_c()
ds
, then apply op_a
, then apply op_b
, then apply op_c
”tidyverse
data wrangling operationsfilter
select
mutate
filter
result <- input %>%
filter(*condition*)
For example
welly_land <- welly %>%
filter(LandAreaSQ > 0)
# equal is '=='
welly_water <- welly %>%
filter(LandAreaSQ == 0)
# not equal is '!='
welly_land <- welly %>%
filter(LandAreaSQ != 0)
# here's another example
welly_populated <- welly %>%
filter(pop > 0)
&
(and) and |
(or) operations
# using &
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0 & pop > 0)
# filter applies all , separated operations
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0, pop > 0)
# or you can pipe things
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0) %>%
filter(pop > 0)
select
welly_reduced <- welly %>%
select(MeshblockN)
welly_reduced <- welly %>%
select(MeshblockN:UrbanAreaN)
welly_reduced <- welly %>%
select(1:3, UrbanAreaN, 7:11))
starts_with()
,
ends_with()
, contains()
welly_reduced <- welly %>%
select(-starts_with("urban"))
# also ends_with(), contains(), matches()
-
sign
welly_reduced <- welly %>%
select(-MeshblockN)
welly_reduced <- welly %>%
select(-(MeshblockN:UrbanAreaN))
welly_reduced <- welly %>%
select(-(1:3), UrbanAreaN, 7:11))
welly_reduced <- welly %>%
select(-starts_with("urban"))
# also ends_with(), contains(), matches()
mutate
result <- input %>%
mutate(sum_xy = x + y,
diff_xy = x - y,
pc_diff = diff_xy / sum_xy * 100)
across()
function to apply
a calculation only to selected columns
# to change the type of variables
result <- input %>%
mutate(across(where(is.integer), as.character))
result <- input %>%
mutate(across(matches("MeshblockN"), as.factor))
result <- input %>%
mutate(across(where(is.numeric), ~ . / total * 100))
from xkcd.com