rm(list =ls()) # start from scratch##############################################################################.## lapply ( SOME_LIST , SOME_FUNCTION ) ####### The lapply function is used to "apply" a function to every entry in a list.## The function is run as many times as there are entries in the list.# # lapply returns a list that contains the return values from each of the# times the function was called. ## The "l" in "lapply" stands for "list", i.e. the return value of the # "lapply" function is a list. There are other similar functions# such as sapply, mapply and apply that have slightly different ways # of working. For now we will just focus on "lapply". Later in the course# we will learn about "sapply" and perhaps "mapply" and "apply".## (see the examples below)###############################################################################.# create a listgradebook =list(grades=c(80,85,72,95,100,89,65,84), students=c("joe","sue","bob","al","frank","mike","anne","clara"),honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))gradebook
#-----------------------------------------------------------------------.# Some built-in functions # # We'll use the following functions to help explain the lapply and sapply functions## length(SOME_VECTOR_OR_LIST) returns the number of items in the vector or the list# range(SOME_VECTOR) - returns c( min(SOME_VECTOR) , max(SOME_VECTOR))# summary(SOME_OBJECT) - returns a summary of what is in SOME_OBJECT#-----------------------------------------------------------------------.# length returns the number of items in a vector (or in a list)length( gradebook$grades ) # 8
[1] 8
length( gradebook$students ) # 8
[1] 8
length( gradebook$honors ) # 8
[1] 8
# range returns min and max of the vectorrange( gradebook$grades) # 65 100
[1] 65 100
range( gradebook$students) # "al" "sue"
[1] "al" "sue"
range( gradebook$honors) # 0 1
[1] 0 1
# summary returns a short summary of data.# different results for different types of data.summary( gradebook$grades ) # see output below
Min. 1st Qu. Median Mean 3rd Qu. Max.
65.00 78.00 84.50 83.75 90.50 100.00
# Min. 1st Qu. Median Mean 3rd Qu. Max. # 65.00 78.00 84.50 83.75 90.50 100.00 summary( gradebook$students ) # see output below
Length Class Mode
8 character character
# Length Class Mode # 8 character character summary( gradebook$honors ) # see output below
Mode FALSE TRUE
logical 5 3
# Mode FALSE TRUE # logical 5 3
28.2 lapply ( SOME_LIST , SOME_FUNCTION )
#-----------------------------------------------------------------------.# lapply ( SOME_LIST , SOME_FUNCTION )# # The lapply function takes two arguments:# X - a list# FUN - the name of a function.## lapply returns a new list that contains a many entries as there # were in the list X. Each entry in the new list contains the results# of "applying" the function X to an entry of the original list, X.## EXAMPLE:# > lst = list( c(10,20,30), # length of this is 3# c("apple", "orange")) # length of this is 2## > lapply(X=lst, FUN=length) # list of 3 and 2# [[1]]# [1] 3# # [[2]]# [1] 2#-----------------------------------------------------------------------.#........................................# EXAMPLE 1 - lapply(gradebook, length)#........................................# Use the lapply function to automatically apply the length function to # each of the items in the gradebook list.# This returns a new list that contains all of the results.lapply(gradebook, length) # see results below
$grades
[1] 8
$students
[1] 8
$honors
[1] 8
# $grades# [1] 8## $students# [1] 8## $honors# [1] 8# lapply returns a list of the answersmode(lapply(gradebook, length)) # "list"
[1] "list"
#........................................# EXAMPLE 2 - lapply(gradebook, range)#........................................# Show the results of the range function on each of the items in the listrange(gradebook$grades) # 65 100
[1] 65 100
range(gradebook$students) # "al" "sue"
[1] "al" "sue"
range(gradebook$honors) # 0 1
[1] 0 1
# apply range function to all items in the gradebook listlapply(gradebook, range) # see results below
# $grades# [1] 65 100# # $students# [1] "al" "sue"# # $honors# [1] 0 1#........................................# EXAMPLE 3 - lapply(gradebook, summary)#........................................# summary returns a short summary of data.# different results for different types of data.summary( gradebook$grades ) # see output below
Min. 1st Qu. Median Mean 3rd Qu. Max.
65.00 78.00 84.50 83.75 90.50 100.00
# Min. 1st Qu. Median Mean 3rd Qu. Max. # 65.00 78.00 84.50 83.75 90.50 100.00 summary( gradebook$students ) # see output below
Length Class Mode
8 character character
# Length Class Mode # 8 character character summary( gradebook$honors ) # see output below
Mode FALSE TRUE
logical 5 3
# Mode FALSE TRUE # logical 5 3 # apply summary function to all items in the gradebook listlapply(gradebook, summary) # all of the above results in a single list
$grades
Min. 1st Qu. Median Mean 3rd Qu. Max.
65.00 78.00 84.50 83.75 90.50 100.00
$students
Length Class Mode
8 character character
$honors
Mode FALSE TRUE
logical 5 3
# apply mode function to all items in the gradebook listlapply(gradebook, mode) # all of the above results in a single list
# remember the str function is very helpful for seeing the structure of complex listsstr(classes)
List of 4
$ year : num 2021
$ semester: chr "fall"
$ section1:List of 3
..$ students: chr [1:3] "abe" "bob" "charlie"
..$ test1 : num [1:3] 70 80 90
..$ test2 : num [1:3] 75 85 95
$ section2:List of 4
..$ students: chr [1:5] "fran" "anne" "sue" "bertha" ...
..$ test1 : num [1:5] 100 90 80 70 60
..$ test2 : num [1:5] 95 85 75 65 55
..$ test3 : num [1:5] 93 83 73 63 53
28.4 NAMED LISTS WITH lapply VS UNNAMED LISTS
#-----------------------------------------------------------------------.# NAMED LISTS WITH lapply VS UNNAMED LISTS## The return value of lapply will be a named list only if the# original list has names.#-----------------------------------------------------------------------.hasNames =list( fruit=c("apple","orange","pear","plum"),evens=seq(2,10,by=2),odds=seq(1,30,by=2),someLogicals=c(TRUE,FALSE,TRUE))hasNames
$fruit
Length Class Mode
4 character character
$evens
Min. 1st Qu. Median Mean 3rd Qu. Max.
2 4 6 6 8 10
$odds
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 8 15 15 22 29
$someLogicals
Mode FALSE TRUE
logical 1 2
[[1]]
Length Class Mode
4 character character
[[2]]
Min. 1st Qu. Median Mean 3rd Qu. Max.
2 4 6 6 8 10
[[3]]
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 8 15 15 22 29
[[4]]
Mode FALSE TRUE
logical 1 2
28.5 using custom functions with lapply
############################################################.# using custom functions with lapply## You can use any function that contains one argument # with lapply, even your own custom functions############################################################.stuff =list(grades =c(70,80,95,88,60),students =c("larry", "zeke","charlie","yaakov","abe"),age =c(17,23,20,20,21,19))# define a function to find the 2nd largest value in a vectorsecondLargest =function( vec ){sort(vec)[length(vec)-1]}# Test the functionnums =c(10,1000,900,-55,23)secondLargest(nums) # 900
[1] 900
# Example - use our function the vectors in the list, stuffsecondLargest( stuff$grades) # 88
[1] 88
secondLargest( stuff$students) # "yaakov"
[1] "yaakov"
secondLargest( stuff$age) # 21
[1] 21
# Use our custom function with lapply to automatically apply the function# to all entries in the list, stufflapply( stuff, secondLargest) # list of the answers
$grades
[1] 88
$students
[1] "yaakov"
$age
[1] 21
28.6 functions defined in one line dont need {curly braces}
###########################################.# functions defined in one line## If a function only needs one line of code# the {curly braces} aren't required. ###########################################.# Another way to define the second largest function - all in one line # (you don't the the {curly braces})secondLargest =function ( vec ) sort(vec)[length(vec)-1]# Test the functionnums =c(10,1000,900,-55,23)secondLargest(nums)
[1] 900
28.7 Using “one line functions” with lapply
###########################################.# Using "one line functions" with lapply###########################################.# create a listgradebook =list(grades=c(80,85,72,95,100,89,65,84), students=c("joe","sue","bob","al","frank","mike","anne","clara"),honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))gradebook
# If you want to define a function just to use with lapply you# can define it directly in the call to lapply# Use the secondLargest function we defined abovelapply(gradebook, secondLargest) # we defined gradebook above
#------------------------------------------------------------------------.# QUESTION## stuff is a list.# Write a single line of code that returns the number of # numeric vectors that are contained in a single list.## HINT: Use the is.numeric function, lapply and unlist## EXAMPLE## > stuff = list(c(100,200,300), # c("apple", "orange", "pear"), # seq(10,100,by=5),# c(TRUE, FALSE))## > YOUR CODE GOES HERE# [1] 2#------------------------------------------------------------------------.
click here for the answer
# Setup the datastuff =list(c(100,200,300), c("apple", "orange", "pear"), seq(10,100,by=5),c(TRUE, FALSE))stuff
#------------------------------------------------------------------------.# QUESTION# # (see previous question)# Same idea as previous question, but this time create a vector# called someStuff that contains just the logical and numeric vectors# from stuff. ## HINT: one way to do this is to create a custom function that# takes a single argument. The custom function should# return TRUE if the argument contains a numeric or a logical vector# and FALSE otherwise.## EXAMPLE:# > stuff = list( seq(10,20,length.out=4) , # numeric# NULL, # NULL# c("apple", "orange"), # character# rep(5, 3), # numeric# 1:4 > pi, # logical# NULL, # NULL# paste0(letters[1:5], 1:5), # character# 2 ^ (1:5) ) # numeric## > someStuff = YOUR CODE GOES HERE## > someStuff# [[1]]# [1] 10.00000 13.33333 16.66667 20.00000# # [[2]]# [1] 5 5 5# # [[3]]# [1] 2 4 8 16 32#------------------------------------------------------------------------.
# STEP 1# Create a function that returns TRUE if the arguemnt is either logical or numericisLogicalOrNumeric =function( something ){is.logical(something) |is.numeric(something)}# STEP 2 - use the function with lapplytfList =lapply(stuff, isLogicalOrNumeric) # list of TRUE/FALSE valuestfList
# STEP 3 - unlist the results to get a vectortfVector =unlist ( tfList ) # vector of TRUE/FALSE valuestvVector
Error in eval(expr, envir, enclos): object 'tvVector' not found
# STEP 4 - use the logical vector to get just the logical and numeric entries in the listsomeStuff = stuff[tfVector]# ALL STEPS TOGETHER IN ONE COMMAND # You can create an "anonymous" function directly in the call to lapply.someStuff = stuff [ unlist ( lapply(stuff, function (something) is.logical(something)|is.numeric(something) ) ) ] # or alternatively all in one linesomeStuff = stuff[unlist ( lapply(stuff, function (something) is.logical(something)|is.numeric(something) ) ) ] someStuff