image source xkcd.com
from xkcd.com
tidyverse data munging operations?filterselectmutateimage source commons.wikimedia.org by W.carter
%>%dsop_a, op_b and op_c
# apply op_a
ds1 <- op_a(ds)
# then apply op_b
ds2 <- op_b(ds1)
# then apply op_c
ds3 <- op_c(ds2)
# do it all in one line
ds3 <- op_c(op_b(op_a(ds)))
ds3 <- op_c(op_b(op_a(ds, op_a_param=...), op_b_param=...), op_c_param=...)
ds3 <- ds %>%
op_a() %>%
op_b() %>%
op_c()
ds, then apply op_a, then apply op_b, then apply op_c”tidyverse data wrangling operationsfilterselectmutatefilter
result <- input %>%
filter(*condition*)
For example
welly_land <- welly %>%
filter(LandAreaSQ > 0)
# equal is '=='
welly_water <- welly %>%
filter(LandAreaSQ == 0)
# not equal is '!='
welly_land <- welly %>%
filter(LandAreaSQ != 0)
# here's another example
welly_populated <- welly %>%
filter(pop > 0)
& (and) and | (or) operations
# using &
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0 & pop > 0)
# filter applies all , separated operations
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0, pop > 0)
# or you can pipe things
welly_aquanauts <- welly %>%
filter(LandAreaSQ == 0) %>%
filter(pop > 0)
select
welly_reduced <- welly %>%
select(MeshblockN)
welly_reduced <- welly %>%
select(MeshblockN:UrbanAreaN)
welly_reduced <- welly %>%
select(1:3, UrbanAreaN, 7:11))
starts_with(),
ends_with(), contains()
welly_reduced <- welly %>%
select(-starts_with("urban"))
# also ends_with(), contains(), matches()
- sign
welly_reduced <- welly %>%
select(-MeshblockN)
welly_reduced <- welly %>%
select(-(MeshblockN:UrbanAreaN))
welly_reduced <- welly %>%
select(-(1:3), UrbanAreaN, 7:11))
welly_reduced <- welly %>%
select(-starts_with("urban"))
# also ends_with(), contains(), matches()
mutate
result <- input %>%
mutate(sum_xy = x + y,
diff_xy = x - y,
pc_diff = diff_xy / sum_xy * 100)
across() function to apply
a calculation only to selected columns
# to change the type of variables
result <- input %>%
mutate(across(where(is.integer), as.character))
result <- input %>%
mutate(across(matches("MeshblockN"), as.factor))
result <- input %>%
mutate(across(where(is.numeric), ~ . / total * 100))
from xkcd.com
