--2-When and how you should write a function

Previous--1-A quick refresher Next--3-Functional programming

Last updated 6 years ago

--2-When and how you should write a function

#1-Why should you write a function?

#2-Start with a snippet of code

#To help you test your code, create a vector x containing the numbers 1 through 10.
#Rewrite the code snippet to use the temporary variable x instead 
#of referring to the data frame column df$a

# Define example vector x
x<-c(1:10)

# Rewrite this snippet to refer to x
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))

#3-Rewrite for clarity

# Define the intermediate variable rng to contain the range of x 
#using the function range(). 
# Specify the na.rm() argument to automatically ignore any NAs in the vector.
# Rewrite the snippet to refer to this intermediate variable.

# Define example vector x
x <- 1:10

# Define rng
rng<-range(x,na.rm=TRUE)

# Rewrite this snippet to refer to the elements of rng
(x - min(x, rng)) /
  (max(x, rng) - min(rng))

#4-Finally turn it into a function!

# Use the function template to create a function called rescale01():
# The function should take a single argument x.
# The body of the function should be our rewritten snippet 
#from the previous exercise: 
#rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])
# Once you've written the function, test it out by calling it on the x 
#we've already defined.

# Define example vector x
x <- 1:10 

# Use the function template to create the rescale01 function
rescale01 <- function(x) {
  rng <- range(x, na.rm = TRUE) 
  (x - rng[1]) / (rng[2] - rng[1])
}

# Test your function, call rescale01 using the vector x as the argument
rescale01(x)

#5-How should you write a function?

df<-data.frame( 
  a=1:10,
  b=rnorm(1:10)
)
df

#6-Start with a simple problem

# Write a line of code that finds the number of positions 
# where both x and y have missing values. 
# You may find the is.na() and sum() functions useful, as well as the & operator.

# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3,  4)
is.na(x)
sum(is.na(x))

# Count how many elements are missing in both x and y
both_na<-function(x,y){
 sum( is.na(x) & is.na(y))
}

both_na(x,y)

#7-Rewrite snippet as function

#This time, instead of using the function template, 
#we've provided the body of the function. 
#Wrap the body with function assignment and curly braces.
#Assign the function to the name both_na().
#both_na() should take two arguments, x and y.

# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3,  4)

# Turn this snippet into a function: both_na()
both_na<-function(x,y){
  sum(is.na(x) & is.na(y))
}
both_na(x,y)

#8-Put our function to use

# Create the vectors x, y1 and y2 according the above examples.
# Call both_na() with x and y1.
# Call both_na() with x and y2.

# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3,  4)

# Turn this snippet into a function: both_na()
both_na<-function(x,y){
  sum(is.na(x) & is.na(y))
}
both_na(x,y)

# Define x, y1 and y2
x <-  c(NA, NA, NA)
y1 <- c( 1, NA, NA)
y2 <- c( 1, NA, NA)

# Call both_na on x, y1
both_na(x,y1)

# Call both_na on x, y2
both_na(x,y2)

#9-How can you write a good function?

#-good names
#-intuitive argument

#10-Good function names

#f2() isn't a very good function name! 
#Which of the following would make a good name for this function?

f2 <- function(x) {
  if (length(x) <= 1) return(NULL)
  x[-length(x)]
}
x<-c(1:10)

f2(x)

#remove_last()-> the right answer

#11-Argument names

#Rewrite the mean_ci() function to take arguments named level and
# x instead of c and nums, in that order for now.

# Rewrite mean_ci to take arguments named level and x
mean_ci <- function(level, x) {
  se <- sd(x) / sqrt(length(x))
  alpha <- 1 - level
  mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}

#12-Argument order

#Arguments are often one of two types:
  
# Data arguments supply the data to compute on.
# Detail arguments control the details of how the computation is done.

#Move the data argument, x, to the front.
#Move the detail argument, level, to the end and give it the default value 0.95.

# Alter the arguments to mean_ci
mean_ci <- function(x, level=0.95) {
  se <- sd(x) / sqrt(length(x))
  alpha <- 1 - level
  mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}

#13-Return statements

#Edit the mean_ci function using an if statement to check 
#for the case when x is empty and if so, 
#to produce the same warning as the code above then 
#immediately return() c(-Inf, Inf).# Alter the mean_ci function

mean_ci <- function(x, level = 0.95) {
  if(length(x)==0){
    return (c(-Inf,Inf))
  }
  else{
    se <- sd(x) / sqrt(length(x))
    alpha <- 1 - level
    mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
  }
}

mean_ci(numeric(0))

#14-What does this function do?

#Define a numeric vector x with the values: 1, 2, NA, 4 and 5.
#Call f() with the arguments x = x, and y = 3.
#Call f() with the arguments x = x, and y = 10.

f <- function(x, y) {
  x[is.na(x)] <- y
  cat(sum(is.na(x)), y, "\n")
  x
}
# Define a numeric vector x with the values 1, 2, NA, 4 and 5
x<-c( 1, 2, NA, 4 ,5)

# Call f() with the arguments x = x and y = 3
f(x=x,y=3)

# Call f() with the arguments x = x and y = 10
f(x=x,y=10)

#15-Let's make it clear from its name

# Rename the function f() to replace_missings().
# Change the name of the y argument to replacement.
# Now replace the missing values of df$z with 0's using your new function. 
#Make sure you assign the result back to df$z.

df<-data.frame(
  
  a=1:10,
  b=rnorm(1:10),
  z=NA
)

df

# Rename the function f() to replace_missings()
df
replace_missings <- function(x, replacement) {
  # Change the name of the y argument to replacement
  x[is.na(x)] <- replacement
  cat(sum(is.na(x)), replacement, "\n")
  x
}
# Rewrite the call on df$z to match our new names
df$z <- replace_missings(df$z, replacement = 0)

#16-Make the body more understandable

#Define is_miss, a logical that identifies the missing values in x.
#To reduce unncessary duplication, alter the rest of function 
#to refer to is_miss instead of is.na(x).

#with new dataset
replace_missings <- function(x, replacement) {
  # Define is_miss
  is_miss <- is.na(x)
  
  # Rewrite rest of function to refer to is_miss
  x[is_miss] <- replacement
  cat(sum(is_miss), replacement, "\n")
  x
}

df<-data.frame(
  
  a=1:10,
  b=rnorm(1:10),
  z=NA
)

x<-df

replace_missings(x,replacement = 0)

#17-Much better! But a few more tweaks...

#Rewrite to use message() and output "sum(is_miss) missings replaced 
#by the value replacement."
#Check your new function by replacing all the missing values of df$z with 0's, 
#as we've done before.



replace_missings <- function(x, replacement) {
  is_miss <- is.na(x)
  x[is_miss] <- replacement
  
  # Rewrite to use message()
  message(sum(is_miss), "missings replaced by the value" , replacement)
  x
}

# Check your new function by running on df$z
replace_missings(df$z,replacement=0)