<- read_csv(file = "For_Hire_Vehicles__FHV__-_Active.csv") %>%
fhv_clean clean_names() %>%
rename(hybrid = veh)
7. Mutate
Video Tutorial
Mutate
Mutate is an incredibly powerful tool to create new columns and new variables.
Let’s grab our code to read in the clean dataframe
We create a new column with mutate by setting the name of our new column and a new value
%>%
fhv_clean mutate(city = "New York City")
You can create multiple new columns (...)
at once
%>%
fhv_clean mutate(city = "New York City",
active = TRUE) #I can overwrite column names too. I've made this active column boolean (true or false)
Mutate with logical expressions
Where mutate gets powerful is when you use it with logical expressions. Here we use if_else()
<- fhv_clean %>%
fhv_rideshare mutate(rideshare = if_else(
condition = base_name == "UBER USA, LLC",
true = "rideshare",
false = "limo"
#if it's an uber call it rideshare, if its a limo call it something else
)) #notice I named the arguments here! A good practice when the argument is not ...
Tabulate the variable we made with the count
() funtion
%>%
fhv_rideshare count(rideshare)
What if we have more than one logical expression we care about? Check out case_when
.
<- fhv_clean %>%
fhv_blackcar mutate(
ride_type = case_when(
== "UBER USA, LLC" & base_type == "BLACK-CAR" ~ "BLACK CAR RIDESHARE",
base_name != "UBER USA, LLC" & base_type == "BLACK-CAR" ~ "BLACK CAR NON-RIDESHARE",
base_name TRUE ~ base_type #if it doesn't meet either condition, return the base_type
))
Use &
and |
for and and or logical expressions with multiple conditions
%>%
fhv_blackcar count(ride_type)#now we have four categories!
Normalizing with Mutate
You can use statistical functions like mean to normalize data with mutate. mean will return the average of all the vehicle years. You can use mutate to generate a new variable that takes the distance from each observation to the mean.
%>%
fhv_clean mutate(year_norm = vehicle_year/mean(vehicle_year, na.rm = T),
year_pct = percent_rank(vehicle_year)) %>%
select(vehicle_license_number, vehicle_year, year_norm, year_pct)