--3-Functional programming

#1-Why functional programming

Why functional programming video: a long video about functional programming!

#2-Using a for loop to remove duplication

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

#We've provided some code to get you started. 
#Fill in the body of the for loop to calculate the median of each column and 
#store the results in output.

# Initialize output vector
output <- vector("double", ncol(df))  

# Fill in the body of the for loop
for (i in seq_along(df)) {            
  output[[i]] <- median(df[[i]])
}

# View the result
output

#3-Turning the for loop into a function

#Turn the for loop snippet into a function called col_median() that 
#takes one argument df and returns the vector of column medians.

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

# Turn this code into col_median()
col_median <- function(df) {
  output <- vector("double", ncol(df))
  for (i in seq_along(df)) {
    output[[i]] <- median(df[[i]])
  }
  output
}

col_median(df)

#4-What about column means?

#We have copied and pasted the col_median() function into the editor.
#Create a col_mean() function by editing col_median() to find 
#the column means instead.

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df

# Create col_mean() function to find column means
col_mean <- function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- mean(df[[i]])
  }
  output
}

col_mean(df)

#5-What about column standard deviations?

# Copy and paste the col_median function into the editor.
# Edit col_median function to find the column standard deviations instead.
# Use the name col_sd for your new function.

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df

# Define col_sd() function
col_sd <- function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- sd(df[[i]])
  }
  output
}

col_sd(df)

#6-Uh oh...time to write a function again

#Edit the function f() to take a second argument power.
#Edit the body of f() so that the absolute deviations raised to power are returned.

x<-c(1,2,3)
power<-3

# Add a second argument called power
f <- function(x,power) {
  # Edit the body to return absolute deviations raised to power
  abs(x - mean(x))^power
}

f(x,power=3)

#7-Functions can be arguments too

Functions can be arguments too video

#8-Using a function as an argument

# Find the column medians of df by specifying median as the fun argument to 
#col_summary().
# Find the column means of df by specifying mean as the fun argument to 
#col_summary().
# Find the column interquartile ranges of df using col_summary().

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df

col_median<-function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- median(df[[i]])
  }
  output
}

col_summary<-function(df, fun) {
  output <- vector("numeric", length(df))
  for (i in seq_along(df)) {
    output[[i]] <- fun(df[[i]])
  }
  output
}

col_mean<-function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- mean(df[[i]])
  }
  output
}

# Find the column medians using col_median() and col_summary()
col_median(df)
col_summary(df, median)

# Find the column means using col_mean() and col_summary()
col_mean(df)
col_summary(df, mean)

# Find the column IQRs using col_summary()
col_summary(df, IQR)

#9-Introducing purrr

Introducing purrr: a long video about purrr package!

https://www.rdocumentation.org/packages/purrr/versions/0.2.2.2

http://data.library.virginia.edu/getting-started-with-the-purrr-package-in-r/

https://www.youtube.com/watch?v=A8UaL47UXYE

#10-The map functions

#Use map_dbl() to find the...
# Column means of the data frame df.
# Column medians of the data frame df.
# Column standard deviations of the data frame df.

install.packages("purrr")
library(purrr)

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df

col_median<-function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- median(df[[i]])
  }
  output
}

col_summary<-function(df, fun) {
  output <- vector("numeric", length(df))
  for (i in seq_along(df)) {
    output[[i]] <- fun(df[[i]])
  }
  output
}

col_mean<-function(df) {
  output <- numeric(length(df))
  for (i in seq_along(df)) {
    output[[i]] <- mean(df[[i]])
  }
  output
}

# Load the purrr package
library(purrr)

# Use map_dbl() to find column means
map_dbl(df,mean)

# Use map_dbl() to column medians
map_dbl(df,median)

# Use map_dbl() to find column standard deviations
map_dbl(df,sd)

#11-The ... argument to the map functions

# Find the column means of planes by combining map_dbl() with mean().
# Find the column means of planes again, but this time exclude missing values 
# from the calculation.
# Find the 5th percentile of each column in planes by combining map_dbl with 
# quantile(). Don't forget to exclude missing values!

speed<-c(232,108,432,NA,NA,NA)

seats<-c(102,4,139,142,20,2)

engines<-c(4,1,2,2,2,1)

year<-c(1956,1975,1977,1996,2010,NA)

planes<-data.frame(year,engines,seats,speed)
planes

# Find the mean of each column
map_dbl(planes,mean)

# Find the mean of each column, excluding missing values
map_dbl(planes,mean,na.rm=TRUE)

# Find the 5th percentile of each column, excluding missing values
map_dbl(planes,quantile, probs = c(0.05),na.rm=TRUE)

#12-Picking the right map function

# Remember to choose the appropriate map function based on the output you expect 
# for each of the following:
# Find which columns are numeric in df3 by combining a map function with 
# is.numeric().
# Find the type of each column in df3 by combining a map function with typeof().
# Find a summary of each column in df3 by combining a map function with summary().

df <- data.frame(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df

# Find the columns that are numeric
map_lgl(df,is.numeric)

# Find the type of each column
map_chr(df,typeof)

# Find a summary of each column
map(df,summary)

#13-Shortcuts

Rstudio keyboard shortcuts

#14-Solve a simple problem first

# Examine the structure of cyl with str().
# Extract the first data frame in cyl and assign it to the variable four_cyls.
# Fit a linear regression of miles per gallon on weight using four_cyls as the data 
#argument to lm().

datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl

# Examine the structure of cyl
cyl
str(cyl)

# Extract the first element into four_cyls
four_cyls<-cyl[[1]]

# Fit a linear regression of mpg on wt using four_cyls
lm(mpg ~ wt, data = four_cyls)

#15-Using an anonymous function

#Rewrite the map() call to use the anonymous function 
#function(df) lm(mpg ~ wt, data = df)

datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl

# Rewrite to call an anonymous function
map(cyl, function(df) lm(mpg ~ wt, data = df))

#16-Using a formula

#Rewrite our call to map() to use the formula notation instead of an anonymous 
#function.

datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl

# Rewrite to use the formula shortcut instead
map(cyl, ~ lm(mpg ~ wt, data = .))

#17-Using a string

# Assign the result from the previous exercise to the variable models
# Use map and the coef() function to extract the coefficients from each model, 
#and save it in the variable coefs
# Use map and the string shortcut to extract the slope wt element from 
#coefficients vectors

datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl

# Save the result from the previous exercise to the variable models
models<-map(cyl, ~ lm(mpg ~ wt, data = .))

# Use map and coef to get the coefficients for each model: coefs
coefs<-map(models,coef)

# Use string shortcut to extract the wt coefficient 
map(coefs,"wt")

#18-Using a numeric vector

#Extract the second element from each vector in coefs using the numeric shortcut and
#map_dbl().

datasets::mtcars
mtcars
cyl<-split(mtcars,mtcars$cyl)
cyl

# Save the result from the previous exercise to the variable models
models<-map(cyl, ~ lm(mpg ~ wt, data = .))

coefs <- map(models, coef)
coefs

# use map_dbl with the numeric shortcut to pull out the second element
map_dbl(coefs,2)

#19-Putting it together with pipes

#Rewrite the last two lines to use a pipe instead.

# Define models (don't change)
models <- mtcars %>% 
  split(mtcars$cyl) %>%
  map(~ lm(mpg ~ wt, data = .))

# Rewrite to be a single command using pipes 
models %>% 
  map(summary) %>%
  map_dbl("r.squared")

Last updated