--2-When and how you should write a function
#1-Why should you write a function?
Why should you write a function? video
#2-Start with a snippet of code
#To help you test your code, create a vector x containing the numbers 1 through 10.
#Rewrite the code snippet to use the temporary variable x instead
#of referring to the data frame column df$a
# Define example vector x
x<-c(1:10)
# Rewrite this snippet to refer to x
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
#3-Rewrite for clarity
# Define the intermediate variable rng to contain the range of x
#using the function range().
# Specify the na.rm() argument to automatically ignore any NAs in the vector.
# Rewrite the snippet to refer to this intermediate variable.
# Define example vector x
x <- 1:10
# Define rng
rng<-range(x,na.rm=TRUE)
# Rewrite this snippet to refer to the elements of rng
(x - min(x, rng)) /
(max(x, rng) - min(rng))
#4-Finally turn it into a function!
# Use the function template to create a function called rescale01():
# The function should take a single argument x.
# The body of the function should be our rewritten snippet
#from the previous exercise:
#rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])
# Once you've written the function, test it out by calling it on the x
#we've already defined.
# Define example vector x
x <- 1:10
# Use the function template to create the rescale01 function
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
# Test your function, call rescale01 using the vector x as the argument
rescale01(x)
#5-How should you write a function?
How should you write a function video
df<-data.frame(
a=1:10,
b=rnorm(1:10)
)
df
#6-Start with a simple problem
# Write a line of code that finds the number of positions
# where both x and y have missing values.
# You may find the is.na() and sum() functions useful, as well as the & operator.
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
is.na(x)
sum(is.na(x))
# Count how many elements are missing in both x and y
both_na<-function(x,y){
sum( is.na(x) & is.na(y))
}
both_na(x,y)
#7-Rewrite snippet as function
#This time, instead of using the function template,
#we've provided the body of the function.
#Wrap the body with function assignment and curly braces.
#Assign the function to the name both_na().
#both_na() should take two arguments, x and y.
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Turn this snippet into a function: both_na()
both_na<-function(x,y){
sum(is.na(x) & is.na(y))
}
both_na(x,y)
#8-Put our function to use
# Create the vectors x, y1 and y2 according the above examples.
# Call both_na() with x and y1.
# Call both_na() with x and y2.
# Define example vectors x and y
x <- c( 1, 2, NA, 3, NA)
y <- c(NA, 3, NA, 3, 4)
# Turn this snippet into a function: both_na()
both_na<-function(x,y){
sum(is.na(x) & is.na(y))
}
both_na(x,y)
# Define x, y1 and y2
x <- c(NA, NA, NA)
y1 <- c( 1, NA, NA)
y2 <- c( 1, NA, NA)
# Call both_na on x, y1
both_na(x,y1)
# Call both_na on x, y2
both_na(x,y2)
#9-How can you write a good function?
#-good names
#-intuitive argument
#10-Good function names
#f2() isn't a very good function name!
#Which of the following would make a good name for this function?
f2 <- function(x) {
if (length(x) <= 1) return(NULL)
x[-length(x)]
}
x<-c(1:10)
f2(x)
#remove_last()-> the right answer
#11-Argument names
#Rewrite the mean_ci() function to take arguments named level and
# x instead of c and nums, in that order for now.
# Rewrite mean_ci to take arguments named level and x
mean_ci <- function(level, x) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
#12-Argument order
#Arguments are often one of two types:
# Data arguments supply the data to compute on.
# Detail arguments control the details of how the computation is done.
#Move the data argument, x, to the front.
#Move the detail argument, level, to the end and give it the default value 0.95.
# Alter the arguments to mean_ci
mean_ci <- function(x, level=0.95) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
#13-Return statements
#Edit the mean_ci function using an if statement to check
#for the case when x is empty and if so,
#to produce the same warning as the code above then
#immediately return() c(-Inf, Inf).# Alter the mean_ci function
mean_ci <- function(x, level = 0.95) {
if(length(x)==0){
return (c(-Inf,Inf))
}
else{
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
}
mean_ci(numeric(0))
#14-What does this function do?
#Define a numeric vector x with the values: 1, 2, NA, 4 and 5.
#Call f() with the arguments x = x, and y = 3.
#Call f() with the arguments x = x, and y = 10.
f <- function(x, y) {
x[is.na(x)] <- y
cat(sum(is.na(x)), y, "\n")
x
}
# Define a numeric vector x with the values 1, 2, NA, 4 and 5
x<-c( 1, 2, NA, 4 ,5)
# Call f() with the arguments x = x and y = 3
f(x=x,y=3)
# Call f() with the arguments x = x and y = 10
f(x=x,y=10)
#15-Let's make it clear from its name
# Rename the function f() to replace_missings().
# Change the name of the y argument to replacement.
# Now replace the missing values of df$z with 0's using your new function.
#Make sure you assign the result back to df$z.
df<-data.frame(
a=1:10,
b=rnorm(1:10),
z=NA
)
df
# Rename the function f() to replace_missings()
df
replace_missings <- function(x, replacement) {
# Change the name of the y argument to replacement
x[is.na(x)] <- replacement
cat(sum(is.na(x)), replacement, "\n")
x
}
# Rewrite the call on df$z to match our new names
df$z <- replace_missings(df$z, replacement = 0)
#16-Make the body more understandable
#Define is_miss, a logical that identifies the missing values in x.
#To reduce unncessary duplication, alter the rest of function
#to refer to is_miss instead of is.na(x).
#with new dataset
replace_missings <- function(x, replacement) {
# Define is_miss
is_miss <- is.na(x)
# Rewrite rest of function to refer to is_miss
x[is_miss] <- replacement
cat(sum(is_miss), replacement, "\n")
x
}
df<-data.frame(
a=1:10,
b=rnorm(1:10),
z=NA
)
x<-df
replace_missings(x,replacement = 0)
#17-Much better! But a few more tweaks...
#Rewrite to use message() and output "sum(is_miss) missings replaced
#by the value replacement."
#Check your new function by replacing all the missing values of df$z with 0's,
#as we've done before.
replace_missings <- function(x, replacement) {
is_miss <- is.na(x)
x[is_miss] <- replacement
# Rewrite to use message()
message(sum(is_miss), "missings replaced by the value" , replacement)
x
}
# Check your new function by running on df$z
replace_missings(df$z,replacement=0)
Last updated