--5-Robust functions

#1-Robust functions

# Three main problems:
# type unstable functions
# non standard evaluation
# hidden argument

#2-An error is better than a surprise

# Add a call to stopifnot() to both_na() that checks arguments x and 
# y have the same length.
# Run the call to both_na() to verify it returns an error.

# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)

both_na <- function(x, y) {
  # Add stopifnot() to check length of x and y
  stopifnot(length(x)==length(y))
  
  sum(is.na(x) & is.na(y)) 
}
# Call both_na() on x and y
both_na(x, y)

#3-An informative error is even better

# Replace condition with a logical statement that evaluates to TRUE when x and 
# y have different lengths.
# Change the error message to "x and y must have the same length".
# Run the call to both_na() to verify it returns a more informative error.

# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)

both_na <- function(x, y) {
  # Replace condition with logical
  if (length(x)!=length(y)) {
    # Replace "Error" with better message
    stop("x and y must have the same length", call. = FALSE)
  }  
  sum(is.na(x) & is.na(y))
}

# Call both_na() 
both_na(x, y)

#4-A different kind of surprise: side effects -X

#Can you identify which of these functions doesn't have side effects?
# Define troublesome x and y
x <- c(10, 10, NA)

show_missings <- function(x) {
  n <- sum(is.na(x))
  cat("Missing values: ", n, "\n", sep = "")
  x
}
replace_missings <- function(x, replacement) {
  x[is.na(x)] <- replacement
  x
}
plot_missings <- function(x) {
  plot(seq_along(x), is.na(x))
  x
}
exclude_missings <- function() {
  options(na.action = "na.exclude")
}
show_missings(x)
replace_missings(x)
plot_missings(x)
exclude_missings(x)

#5-Unstable types

#6-sapply is another common culprit

#What type of objects will be A and B be?

df <- data.frame(
  a = 1L,
  b = 1.5,
  y = Sys.time(),
  z = ordered(1)
)

A <- sapply(df[1:4], class) 
B <- sapply(df[3:4], class)

A
B
#A and B are a list Matrix

#7-Using purrr solves the problem -X

#The first two chunks give some sapply() calls, and 
#demonstrate the type inconsistency by calling str() on each result.
#Define X, Y and Z using map() instead of sapply().
#Call str() on your X, Y and Z to demonstrate type consistency of map().

install.packages("purrr")
library(purrr)

df <- data.frame(
  a = 1L,
  b = 1.5,
  y = Sys.time(),
  z = ordered(1)
)

A <- map_chr(df[1:4], class) 
B <- map_chr(df[3:4], class)

# sapply calls
A <- sapply(df[1:4], class) 
B <- sapply(df[3:4], class)
C <- sapply(df[1:2], class) 

# Demonstrate type inconsistency
str(A)
str(B)
str(C)

# Use map() to define X, Y and Z
X<-map(df[1:4], class)
Y<-map(df[3:4], class)
Z<-map(df[1:2], class)

# Use str() to check type consistency
str(X)
str(Y)
str(Z)

#8-A type consistent solution -X

# Assign the body of our previous function to the variable class_list.
# Use map_chr() along with the numeric subsetting shortcut, 
# to extract the first element from every item in class_list.
# Run the final three lines to verify our new function always returns 
# a character vector.

df <- data.frame(
  a = 1L,
  b = 1.5,
  y = Sys.time(),
  z = ordered(1)
)

col_classes <- function(df) {
  # Assign list output to class_list
  class_list<- map(df, class)
  
  # Use map_chr() to extract first element in class_list
  map_chr(class_list,1)
}

# Check that our new function is type consistent
df %>% col_classes() %>% str()
df[3:4] %>% col_classes() %>% str()
df[1:2] %>% col_classes() %>% str()

#9-Or fail early if something goes wrong

# Replace condition with a logical statement that uses any() and map_dbl() to 
# check if any element in class_list has length greater than 1.
# Run the final three lines to verify an informative error is thrown.

df <- data.frame(
  a = 1L,
  b = 1.5,
  y = Sys.time(),
  z = ordered(1)
)

col_classes <- function(df) {
  class_list <- map(df, class)
  
  # Add a check that no element of class_list has length > 1
  if (any(map_dbl(class_list, length) > 1)) {
    stop("Some columns have more than one class", call. = FALSE)
  }
  
  # Use flatten_chr() to return a character vector
  flatten_chr(class_list)
}

# Check that our new function is type consistent
df %>% col_classes() %>% str()
df[3:4] %>% col_classes() %>% str()
df[1:2] %>% col_classes() %>% str()

#10-Non-standard evaluation

Non-standard evaluation video

#11-Programming with NSE functions

# We've placed diamonds_sub, a 20 row subset of the diamonds data from 
# the ggplot2 package in your workspace. 
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.

install.packages("ggplot2")
library("ggplot2")
data("diamonds")
diamonds

diamonds_sub<-head(diamonds)

big_x <- function(df, threshold) {
  dplyr::filter(df, x > threshold)
}

big_x(diamonds_sub, threshold = 7)

#12-When things go wrong

# Create a variable x and give it the value 1.
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.
# Create a threshold column in diamonds_sub with the value 100.
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.

install.packages("ggplot2")
library("ggplot2")
data("diamonds")
diamonds

big_x <- function(df, threshold) {
  dplyr::filter(df, x > threshold)
}
# Remove the x column from diamonds
diamonds_sub$x <- NULL

# Create variable x with value 1
x<-1

# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, threshold > 7)

# Create a threshold column with value 100
diamonds_sub$threshold <- 100

# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, threshold > 7)

#13-What to do?

# Write a check for each of the following:
# If x is not in names(df), stop with the message 
# "df must contain variable called x".
# If threshold is in names(df), stop with the message 
# "df must not contain variable called threshold".
# Remember to use the argument call. = FALSE in each call to stop() so that 
# the call is not a part of the error message.

df <- data.frame(
  a = 1L,
  b = 1.5,
  y = Sys.time(),
  z = ordered(1)
)

big_x <- function(df, threshold) {
  # Write a check for x not being in df
  if (!"x" %in% names(df)) { 
    stop("df must contain variable called x", call. = FALSE)
  }
  
  # Write a check for threshold being in df
  if ("threshold" %in% names(df)) {
    stop("df must not contain variable called threshold", call. = FALSE)
  }
  
  dplyr::filter(df, x > threshold)
}

big_x(df,threshold = 3)

Previous--4-Advanced inputs and outputs NextData Wrangling with R

Last updated 6 years ago