--5-Utilities
#1-Useful Functions
# mean()
# abs()
# round()
# sum()
# seq()
# rep()
# is.*()---> output: TRUE or FALSE
# as.*()--> define the object of variable
# append()
# rev()
#2-Mathematical utilities
# Have another look at some useful math functions that R features:
# abs(): Calculate the absolute value.
# sum(): Calculate the sum of all the values in a data structure.
# mean(): Calculate the arithmetic mean.
# round(): Round the values to 0 decimal places by default.
#Try out ?round in the console for variations of round() and
# ways to change the number of digits to round to.
#Calculate the sum of the absolute rounded values of the training errors.
#You can work in parts, or with a single one-liner.
#There's no need to store the result in a variable, just have R print it.
# The errors vector has already been defined for you
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)
# Sum of absolute rounded values of errors
sum(abs(round(errors)))
#3-Find the error
#Fix the error by including code on the last line.
#Remember: you want to call mean() only once!
# Don't edit these two lines
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)
vec1
vec2
# Fix the error
mean(c(abs(vec1),abs(vec2)))
#4-Data Utilities
#R features a bunch of functions to juggle around with data structures::
# seq(): Generate sequences, by specifying the from, to, and by arguments.
# rep(): Replicate elements of vectors and lists.
# sort(): Sort a vector in ascending order. Works on numerics,
#but also on character strings and logicals.
# rev(): Reverse the elements in a data structures for which reversal is defined.
# str(): Display the structure of any R object.
# append(): Merge vectors or lists.
# is.*(): Check for the class of an R object.
# as.*(): Convert an R object from one class to another.
# unlist(): Flatten (possibly embedded) lists to produce a vector.
#Convert both linkedin and facebook lists to a vector, and
#store them as li_vec and fb_vec respectively.
#Next, append fb_vec to the li_vec (Facebook data comes last).
#Save the result as social_vec.
#Finally, sort social_vec from high to low. Print the resulting vector.
# The linkedin and facebook lists have already been created for you
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)
# Convert linkedin and facebook to a vector: li_vec and fb_vec
li_vec<-as.vector(linkedin)
fb_vec<-as.vector(facebook)
# Append fb_vec to li_vec: social_vec
social_vec<-append(li_vec,fb_vec)
social_vec
# Sort social_vec
sort(unlist(social_vec),decreasing=TRUE)
#5-Find the error (2)
#Correct the expression. Make sure that your fix still uses the functions rep()
#and seq().
# Fix me
rep(seq(1, 7, by = 2), times = 7)
#6-Beat Gauss using R
#Using the function seq(), create a sequence that ranges from 1 to 500
#in increments of 3. Assign the resulting vector to a variable seq1.
#Again with the function seq(), create a sequence that ranges from 1200 to 900
#in increments of -7. Assign it to a variable seq2.
#Calculate the total sum of the sequences, either by using the sum()
#function twice and adding the two results, or
#by first concatenating the sequences and then using the sum() function once.
#Print the result to the console.
# Create first sequence: seq1
seq1<-seq(1,500,by=3)
seq1
# Create second sequence: seq2
seq2<-seq(1200,900,by=-7)
seq2
# Calculate total sum of the sequences
sum(seq1)+sum(seq2)
#7-Regular Expressions
# -Sequence of meta characters
# -Pattern existence
# -Pattern replacement
# -Pattern extraction
# -grepl()->output: TRUE FALSE
# grep()-> Position of the vector
# sub()
# gsub()
#8-grepl & grep
#Use grepl() to generate a vector of logicals that indicates
#whether these email addressess contain "edu".
#Print the result to the output.
#Do the same thing with grep(), but this time save the resulting indexes
#in a variable hits.
#Use the variable hits to select from the emails vector only the emails that
#contain "edu".
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "dalai.lama@peace.org",
"invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv")
# Use grepl() to match for "edu"
grepl(pattern="edu",x=emails)
# Use grep() to match for "edu", save result to hits
hits<-grep(pattern="edu",x=emails)
hits
# Subset emails using hits
emails[hits]
#9-grepl & grep (2)
#Use grepl() with the more advanced regular expression to return a logical vector.
#Simply print the result.
#Do a similar thing with grep() to create a vector of indices.
#Store the result in the variable hits.
#Use emails[hits] again to subset the emails vector.
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "dalai.lama@peace.org",
"invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv")
# Use grepl() to match for .edu addresses more robustly
grepl(pattern="@.*\\.edu$",x=emails)
# Use grep() to match for .edu addresses more robustly, save result to hits
hits<-grep(pattern="@.*\\.edu$",x=emails)
hits
# Subset emails using hits
emails[hits]
#10-sub & gsub
#With the advanced regular expression "@.*\\.edu$", use sub() to replace
#the match with "@datacamp.edu".
#Since there will only be one match per character string,
#gsub() is not necessary here.
#Inspect the resulting output.
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "global@peace.org",
"invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv")
# Use sub() to convert the email domains to datacamp.edu
sub(pattern="@.*\\.edu$",replacement="@datacamp.edu",x=emails)
#11-sub & gsub (2)
awards <- c("Won 1 Oscar.",
"Won 1 Oscar. Another 9 wins & 24 nominations.",
"1 win and 2 nominations.",
"2 wins & 3 nominations.",
"Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
"4 wins & 1 nomination.")
sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
#What does this code chunk return? awards is already defined in the workspace
#so you can start playing in the console straight away.
#A vector of integers containing: 1, 24, 2, 3, 2, 1.
#The vector awards gets returned as there isn't a single element in
#awards that matches the regular expression.
#A vector of character strings containing "1", "24", "2", "3", "2", "1".
#A vector of character strings containing
#"Won 1 Oscar.", "24", "2", "3", "2", "1".->The right answer
#12-Times and Dates
# as.Date-> time difference is days
# as. Posixct-> time difference is second by default,
#but if the result is large then Posixct come with simple readable time unit
# Dedicated packages to deal with time unit: lubridate,zoo, xts
#13-Right here, right now
# Ask R for the current date, and store the result in a variable today.
# To see what today looks like under the hood, call unclass() on it.
# Ask R for the current time, and store the result in a variable, now.
# To see the numerical value that corresponds to now, call unclass() on it.
# Get the current date: today
today<-Sys.Date()
# See what today looks like under the hood
unclass(today)
# Get the current time: now
now<-Sys.time()
# See what now looks like under the hood
unclass(now)
#14-Create and format dates
#In the editor on the right, three character strings
#representing dates have been created.
#Convert them to dates using as.Date(), and assign them to date1, date2, and
#date3 respectively.
#The code for date1 is already included.
#Extract useful information from the dates as character strings using format().
#From the first date, select the weekday. From the second date,
#select the day of the month.
#From the third date, you should select the abbreviated month and
#the 4-digit year, separated by a space.
# Definition of character strings representing dates
str1 <- "May 23, '96"
str2 <- "2012-03-15"
str3 <- "30/January/2006"
# Convert the strings to dates: date1, date2, date3
date1 <- as.Date(str1, format = "%b %d, '%y")
date2 <- as.Date(str2)
date3 <- as.Date(str3, format = "%d/%B/%Y")
# Convert dates to formatted strings
format(date1, "%A")
format(date2, "%d")
format(date3,"%b %Y")
#15-Create and format times
#Convert two strings that represent timestamps, str1 and str2,
#to POSIXct objects called time1 and time2.
#Using format(), create a string from time1 containing only the minutes.
#From time2, extract the hours and minutes as "hours:minutes AM/PM".
#Refer to the assignment text above to find the correct conversion symbols!
# Definition of character strings representing times
str1 <- "May 23, '96 hours:23 minutes:01 seconds:45"
str2 <- "2012-3-12 14:23:08"
# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str1, format = "%B %d, '%y hours:%H minutes:%M seconds:%S")
time2<-as.POSIXct(str2)
# Convert times to formatted strings
format(time1,"%M" )
format(time2,"%I:%M %p")
#16-Calculations with Dates
#Calculate the number of days that passed between the last and
#the first day you ate pizza. Print the result.
#Use the function diff() on pizza to calculate the differences between
#consecutive pizza days. Store the result in a new variable day_diff.
#Calculate the average period between two consecutive pizza days. Print the result.
# day1, day2, day3, day4 and day5 are already available in the workspace
day1<-as.Date("2017-11-09")
day2<-as.Date("2017-11-11")
day3<-as.Date("2017-11-16")
day4<-as.Date("2017-11-22")
day5<-as.Date("2017-11-27")
# Difference between last and first pizza day
day5-day1
print(day5-day1)
# Create vector pizza
pizza <- c(day1, day2, day3, day4, day5)
# Create differences between consecutive pizza days: day_diff
day_diff<-diff(pizza)
# Average period between two consecutive pizza days
print(mean(day_diff))
#17-Calculations with Times
#Calculate the difference between the two vectors logout and login,
#i.e. the time the user was online in each independent session.
#Store the result in a variable time_online.
#Inspect the variable time_online by printing it.
#Calculate the total time that the user was online. Print the result.
#Calculate the average time the user was online. Print the result.
login<-as.POSIXct(c("2017-11-13 10:18:04 UTC" ,"2017-11-18 09:14:18 UTC",
"2017-11-18 12:21:51 UTC", "2017-11-18 12:37:24 UTC",
"2017-11-20 21:37:55 UTC"))
logout<-as.POSIXct(c("2017-11-13 10:56:29 UTC", "2017-11-18 09:14:52 UTC",
"2017-11-18 12:35:48 UTC", "2017-11-18 13:17:22 UTC",
"2017-11-20 22:08:47 UTC"))
# login and logout are already defined in the workspace
login
as.vectorlogout
class(login)
class(logout)
# Calculate the difference between login and logout: time_online
time_online<-logout-login
# Inspect the variable time_online
time_online
# Calculate the total time online
sum(time_online)
# Calculate the average time online
mean(time_online)
#18-Time is of the essence
# Use as.Date() to convert the astro vector to a vector containing Date objects.
# You will need the %d, %b and %Y symbols to specify the format.
#Store the resulting vector as astro_dates.
# Use as.Date() to convert the meteo vector to a vector with Date objects.
# This time, you will need the %B, %d and %y symbols for the format argument.
# Store the resulting vector as meteo_dates.
# With a combination of max(), abs() and -,
#calculate the maximum absolute difference between the astronomical and
#the meteorological beginnings of a season, i.e. astro_dates and meteo_dates.
#Simply print this maximum difference to the console output.
astro<-c("20-Mar-2015", "25-Jun-2015", "23-Sep-2015", "22-Dec-2015")
meteo<-c("March 1, 15" , "June 1, 15" ,"September 1, 15" , "December 1, 15")
astro
meteo
# Convert astro to vector of Date objects: astro_dates
astro_dates<-as.Date(astro,format="%d-%b-%Y")
astro_dates
class(astro_dates)
# Convert meteo to vector of Date objects: meteo_dates
meteo_dates<-as.Date(meteo,format="%B %d, %y")
meteo_dates
# Calculate the maximum absolute difference between astro_dates and meteo_dates
print(max(abs(astro_dates-meteo_dates)))
Last updated