November 13, 2022

dplyr package

R dplyr Package

library(dplyr)

glimpse(df)
glimpse(mutate(mammals, adult_body_mass_kg = adult_body_mass_g / 1000))

mutate() adds new variables that are functions of existing variables.
select() picks variables based on their names.
filter() picks cases based on their values.
summarise() reduces multiple values down to a single summary.
arrange() changes the ordering of the rows.

filter function in dplyr package in R:
filter(schtype == "public")
temp <- filter(temp, instrument == "Guitar")
email50_big <- email50 %>% filter(number == "big")
votes %>% filter(vote <=3)
filter(newdata_long, day == "weekday_is_monday", channel == "data_channel_is_lifestyle")
filter(days, weekday == "weekday_is_saturday" | weekday == "weekday_is_sunday" )
filter(starwars, hair_color == "none" & eye_color == "black")

dplyr package mutate function in R:
newdata_long <- mutate(newdata_long, rate_positivity = rate_positive_words/rate_negative_words, gain = n_tokens_title - n_tokens_content)
email50 <- email50 %>%
      mutate(num_char_cat = ifelse(num_char < med_num_char, "below median", "at or above median"))
votes %>% mutate(year=session+1945)
votes_processed <- votes %>%
  filter(vote <= 3) %>%
  mutate(year = session + 1945,
         country = countrycode(ccode, "cown", "country.name"))
msleep %>%
    mutate(rem_proportion = sleep_rem / sleep_total, bodywt_grams = bodywt * 1000) %>%
    head

join functions in dplyr package in R:
left_join(names, plays, by="name")
left_join(names2, plays2, by=c("name","surname"))
right_join(names, plays, by="name")
inner_join(names, plays, by="name")
full_join(names, plays, by="name")

dplyr package select function in R:
select(temp, first, last, band)
select(mammals, adult_head_body_len_mm, litter_size)
select(mammals, -adult_head_body_len_mm)
select(mammals, contains("body"))
select(mammals, starts_with("adult"))
select(mammals, ends_with("g"))
select(mammals, 1:3)
car_price <- car_price %>% dplyr::select(car_ID, car_Company, everything())
df %>% select(-b, -c, everything())
select_if(car_price[,-1], is.numeric)
select_all(mtcars, funs(toupper(.)))

select_at(.tbl, .vars, .funs = list(), ...)

select(newdata_long, day, channel, shares)
head(select(msleep, -name))
head(select(msleep, starts_with("sl")))
artists %>%
  full_join(bands, by = c("first", "last"))
bands %>%
  left_join(artists, by = c("first", "last")) %>%
  filter(instrument == "Guitar") %>%
  select(first, last, band)
starwars %>%
  select(name, ends_with("color"))
distinct(newdata_long, channel)
day_groups<- group_by(data, day)

summarise function in dplyr package:
summarise(data,  share.avg = mean(shares))
summarise(newdata_long, shares_avg = mean(shares, na.rm = T))
goal2 <-
  artists %>%
    full_join(bands,by=c("first","last")) %>%
      inner_join(songs,by=c("first","last"))
votes_processed %>%
  group_by(year) %>%
  summarise(total=n(), percent_yes=mean(vote==1))
summarise(mammals, mean_mass = mean(adult_body_mass_g, na.rm = TRUE))

starwars %>%
  group_by(species) %>%
  summarise(
    n = n(),
    mass = mean(mass, na.rm = TRUE)
  ) %>%
  filter(n > 1)

mframe %>%
  select(funding_type, raised_amount) %>%
  filter(funding_type %in% c("vente","angel","prv_equi")) %>%
  group_by(funding_type) %>%
  summarise(
    avg = mean(raised_amount, na.rm = TRUE)
  )


by_country %>% arrange(percent_yes)
newdata_arranged <- arrange(newdata_long, day, channel)
by_country %>% arrange(desc(percent_yes))
newdata_arranged <- arrange(newdata_long, desc(shares, n_tokens_title))
sample_n(airquality, size = 10)
sample_frac(airquality, size = 0.1)
count(airquality, Month)

means <- atmos %>%
  filter(year == year) %>%
  group_by(long, lat) %>%
  summarize(temp = mean(temp, na.rm = TRUE),
         pressure = mean(pressure, na.rm = TRUE),
         ozone = mean(ozone, na.rm = TRUE),
         cloudlow = mean(cloudlow, na.rm = TRUE),
         cloudmid = mean(cloudmid, na.rm = TRUE),
         cloudhigh = mean(cloudhigh, na.rm = TRUE)) %>%
  ungroup()


tbl (pronounced as tibble)
library(tibble)
hfl <- tbl_df(hflights)
tbl_df(data)
as.tibble(mtcars)

Related aRticles:   Data.Table in R    Using Databases in R