--3-Functional programming
#1-Why functional programming
Why functional programming video: a long video about functional programming!
#2-Using a for loop to remove duplication
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
#We've provided some code to get you started.
#Fill in the body of the for loop to calculate the median of each column and
#store the results in output.
# Initialize output vector
output <- vector("double", ncol(df))
# Fill in the body of the for loop
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
# View the result
output
#3-Turning the for loop into a function
#Turn the for loop snippet into a function called col_median() that
#takes one argument df and returns the vector of column medians.
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
# Turn this code into col_median()
col_median <- function(df) {
output <- vector("double", ncol(df))
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_median(df)
#4-What about column means?
#We have copied and pasted the col_median() function into the editor.
#Create a col_mean() function by editing col_median() to find
#the column means instead.
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
# Create col_mean() function to find column means
col_mean <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
col_mean(df)
#5-What about column standard deviations?
# Copy and paste the col_median function into the editor.
# Edit col_median function to find the column standard deviations instead.
# Use the name col_sd for your new function.
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
# Define col_sd() function
col_sd <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- sd(df[[i]])
}
output
}
col_sd(df)
#6-Uh oh...time to write a function again
#Edit the function f() to take a second argument power.
#Edit the body of f() so that the absolute deviations raised to power are returned.
x<-c(1,2,3)
power<-3
# Add a second argument called power
f <- function(x,power) {
# Edit the body to return absolute deviations raised to power
abs(x - mean(x))^power
}
f(x,power=3)
#7-Functions can be arguments too
Functions can be arguments too video
#8-Using a function as an argument
# Find the column medians of df by specifying median as the fun argument to
#col_summary().
# Find the column means of df by specifying mean as the fun argument to
#col_summary().
# Find the column interquartile ranges of df using col_summary().
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
col_median<-function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_summary<-function(df, fun) {
output <- vector("numeric", length(df))
for (i in seq_along(df)) {
output[[i]] <- fun(df[[i]])
}
output
}
col_mean<-function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
# Find the column medians using col_median() and col_summary()
col_median(df)
col_summary(df, median)
# Find the column means using col_mean() and col_summary()
col_mean(df)
col_summary(df, mean)
# Find the column IQRs using col_summary()
col_summary(df, IQR)
#9-Introducing purrr
Introducing purrr: a long video about purrr package!
https://www.rdocumentation.org/packages/purrr/versions/0.2.2.2
http://data.library.virginia.edu/getting-started-with-the-purrr-package-in-r/
https://www.youtube.com/watch?v=A8UaL47UXYE
#10-The map functions
#Use map_dbl() to find the...
# Column means of the data frame df.
# Column medians of the data frame df.
# Column standard deviations of the data frame df.
install.packages("purrr")
library(purrr)
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
col_median<-function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
}
col_summary<-function(df, fun) {
output <- vector("numeric", length(df))
for (i in seq_along(df)) {
output[[i]] <- fun(df[[i]])
}
output
}
col_mean<-function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
}
# Load the purrr package
library(purrr)
# Use map_dbl() to find column means
map_dbl(df,mean)
# Use map_dbl() to column medians
map_dbl(df,median)
# Use map_dbl() to find column standard deviations
map_dbl(df,sd)
#11-The ... argument to the map functions
# Find the column means of planes by combining map_dbl() with mean().
# Find the column means of planes again, but this time exclude missing values
# from the calculation.
# Find the 5th percentile of each column in planes by combining map_dbl with
# quantile(). Don't forget to exclude missing values!
speed<-c(232,108,432,NA,NA,NA)
seats<-c(102,4,139,142,20,2)
engines<-c(4,1,2,2,2,1)
year<-c(1956,1975,1977,1996,2010,NA)
planes<-data.frame(year,engines,seats,speed)
planes
# Find the mean of each column
map_dbl(planes,mean)
# Find the mean of each column, excluding missing values
map_dbl(planes,mean,na.rm=TRUE)
# Find the 5th percentile of each column, excluding missing values
map_dbl(planes,quantile, probs = c(0.05),na.rm=TRUE)
#12-Picking the right map function
# Remember to choose the appropriate map function based on the output you expect
# for each of the following:
# Find which columns are numeric in df3 by combining a map function with
# is.numeric().
# Find the type of each column in df3 by combining a map function with typeof().
# Find a summary of each column in df3 by combining a map function with summary().
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
# Find the columns that are numeric
map_lgl(df,is.numeric)
# Find the type of each column
map_chr(df,typeof)
# Find a summary of each column
map(df,summary)
#13-Shortcuts
#14-Solve a simple problem first
# Examine the structure of cyl with str().
# Extract the first data frame in cyl and assign it to the variable four_cyls.
# Fit a linear regression of miles per gallon on weight using four_cyls as the data
#argument to lm().
datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl
# Examine the structure of cyl
cyl
str(cyl)
# Extract the first element into four_cyls
four_cyls<-cyl[[1]]
# Fit a linear regression of mpg on wt using four_cyls
lm(mpg ~ wt, data = four_cyls)
#15-Using an anonymous function
#Rewrite the map() call to use the anonymous function
#function(df) lm(mpg ~ wt, data = df)
datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl
# Rewrite to call an anonymous function
map(cyl, function(df) lm(mpg ~ wt, data = df))
#16-Using a formula
#Rewrite our call to map() to use the formula notation instead of an anonymous
#function.
datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl
# Rewrite to use the formula shortcut instead
map(cyl, ~ lm(mpg ~ wt, data = .))
#17-Using a string
# Assign the result from the previous exercise to the variable models
# Use map and the coef() function to extract the coefficients from each model,
#and save it in the variable coefs
# Use map and the string shortcut to extract the slope wt element from
#coefficients vectors
datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl
# Save the result from the previous exercise to the variable models
models<-map(cyl, ~ lm(mpg ~ wt, data = .))
# Use map and coef to get the coefficients for each model: coefs
coefs<-map(models,coef)
# Use string shortcut to extract the wt coefficient
map(coefs,"wt")
#18-Using a numeric vector
#Extract the second element from each vector in coefs using the numeric shortcut and
#map_dbl().
datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl
# Save the result from the previous exercise to the variable models
models<-map(cyl, ~ lm(mpg ~ wt, data = .))
coefs <- map(models, coef)
coefs
# use map_dbl with the numeric shortcut to pull out the second element
map_dbl(coefs,2)
#19-Putting it together with pipes
#Rewrite the last two lines to use a pipe instead.
# Define models (don't change)
models <- mtcars %>%
split(mtcars$cyl) %>%
map(~ lm(mpg ~ wt, data = .))
# Rewrite to be a single command using pipes
models %>%
map(summary) %>%
map_dbl("r.squared")
Last updated