--5-Robust functions
#1-Robust functions
# Three main problems:
# type unstable functions
# non standard evaluation
# hidden argument
#2-An error is better than a surprise
# Add a call to stopifnot() to both_na() that checks arguments x and
# y have the same length.
# Run the call to both_na() to verify it returns an error.
# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Add stopifnot() to check length of x and y
stopifnot(length(x)==length(y))
sum(is.na(x) & is.na(y))
}
# Call both_na() on x and y
both_na(x, y)
#3-An informative error is even better
# Replace condition with a logical statement that evaluates to TRUE when x and
# y have different lengths.
# Change the error message to "x and y must have the same length".
# Run the call to both_na() to verify it returns a more informative error.
# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Replace condition with logical
if (length(x)!=length(y)) {
# Replace "Error" with better message
stop("x and y must have the same length", call. = FALSE)
}
sum(is.na(x) & is.na(y))
}
# Call both_na()
both_na(x, y)
#4-A different kind of surprise: side effects -X
#Can you identify which of these functions doesn't have side effects?
# Define troublesome x and y
x <- c(10, 10, NA)
show_missings <- function(x) {
n <- sum(is.na(x))
cat("Missing values: ", n, "\n", sep = "")
x
}
replace_missings <- function(x, replacement) {
x[is.na(x)] <- replacement
x
}
plot_missings <- function(x) {
plot(seq_along(x), is.na(x))
x
}
exclude_missings <- function() {
options(na.action = "na.exclude")
}
show_missings(x)
replace_missings(x)
plot_missings(x)
exclude_missings(x)
#5-Unstable types
#6-sapply is another common culprit
#What type of objects will be A and B be?
df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
A <- sapply(df[1:4], class)
B <- sapply(df[3:4], class)
A
B
#A and B are a list Matrix
#7-Using purrr solves the problem -X
#The first two chunks give some sapply() calls, and
#demonstrate the type inconsistency by calling str() on each result.
#Define X, Y and Z using map() instead of sapply().
#Call str() on your X, Y and Z to demonstrate type consistency of map().
install.packages("purrr")
library(purrr)
df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
A <- map_chr(df[1:4], class)
B <- map_chr(df[3:4], class)
# sapply calls
A <- sapply(df[1:4], class)
B <- sapply(df[3:4], class)
C <- sapply(df[1:2], class)
# Demonstrate type inconsistency
str(A)
str(B)
str(C)
# Use map() to define X, Y and Z
X<-map(df[1:4], class)
Y<-map(df[3:4], class)
Z<-map(df[1:2], class)
# Use str() to check type consistency
str(X)
str(Y)
str(Z)
#8-A type consistent solution -X
# Assign the body of our previous function to the variable class_list.
# Use map_chr() along with the numeric subsetting shortcut,
# to extract the first element from every item in class_list.
# Run the final three lines to verify our new function always returns
# a character vector.
df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
col_classes <- function(df) {
# Assign list output to class_list
class_list<- map(df, class)
# Use map_chr() to extract first element in class_list
map_chr(class_list,1)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()
df[3:4] %>% col_classes() %>% str()
df[1:2] %>% col_classes() %>% str()
#9-Or fail early if something goes wrong
# Replace condition with a logical statement that uses any() and map_dbl() to
# check if any element in class_list has length greater than 1.
# Run the final three lines to verify an informative error is thrown.
df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
col_classes <- function(df) {
class_list <- map(df, class)
# Add a check that no element of class_list has length > 1
if (any(map_dbl(class_list, length) > 1)) {
stop("Some columns have more than one class", call. = FALSE)
}
# Use flatten_chr() to return a character vector
flatten_chr(class_list)
}
# Check that our new function is type consistent
df %>% col_classes() %>% str()
df[3:4] %>% col_classes() %>% str()
df[1:2] %>% col_classes() %>% str()
#10-Non-standard evaluation
#11-Programming with NSE functions
# We've placed diamonds_sub, a 20 row subset of the diamonds data from
# the ggplot2 package in your workspace.
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.
install.packages("ggplot2")
library("ggplot2")
data("diamonds")
diamonds
diamonds_sub<-head(diamonds)
big_x <- function(df, threshold) {
dplyr::filter(df, x > threshold)
}
big_x(diamonds_sub, threshold = 7)
#12-When things go wrong
# Create a variable x and give it the value 1.
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.
# Create a threshold column in diamonds_sub with the value 100.
# Use big_x() to find all rows in diamonds_sub where the x column is greater than 7.
install.packages("ggplot2")
library("ggplot2")
data("diamonds")
diamonds
big_x <- function(df, threshold) {
dplyr::filter(df, x > threshold)
}
# Remove the x column from diamonds
diamonds_sub$x <- NULL
# Create variable x with value 1
x<-1
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, threshold > 7)
# Create a threshold column with value 100
diamonds_sub$threshold <- 100
# Use big_x() to find rows in diamonds_sub where x > 7
big_x(diamonds_sub, threshold > 7)
#13-What to do?
# Write a check for each of the following:
# If x is not in names(df), stop with the message
# "df must contain variable called x".
# If threshold is in names(df), stop with the message
# "df must not contain variable called threshold".
# Remember to use the argument call. = FALSE in each call to stop() so that
# the call is not a part of the error message.
df <- data.frame(
a = 1L,
b = 1.5,
y = Sys.time(),
z = ordered(1)
)
big_x <- function(df, threshold) {
# Write a check for x not being in df
if (!"x" %in% names(df)) {
stop("df must contain variable called x", call. = FALSE)
}
# Write a check for threshold being in df
if ("threshold" %in% names(df)) {
stop("df must not contain variable called threshold", call. = FALSE)
}
dplyr::filter(df, x > threshold)
}
big_x(df,threshold = 3)
Last updated