28 26. lapply ( SOME_LIST , SOME_FUNCTION )

rm(list = ls())   # start from scratch

##############################################################################.
#
#      lapply ( SOME_LIST , SOME_FUNCTION )     ####
#
#
# The lapply function is used to "apply" a function to every entry in a list.
#
# The function is run as many times as there are entries in the list.
# 
# lapply returns a list that contains the return values from each of the
# times the function was called. 
#
# The "l" in "lapply" stands for "list", i.e. the return value of the 
# "lapply" function is a list. There are other similar functions
# such as sapply, mapply and apply that have slightly different ways 
# of working. For now we will just focus on "lapply". Later in the course
# we will learn about "sapply" and perhaps "mapply" and "apply".
#
# (see the examples below)
#
##############################################################################.


# create a list
gradebook = list(grades=c(80,85,72,95,100,89,65,84), 
                 students=c("joe","sue","bob","al","frank","mike","anne","clara"),
                 honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))
gradebook

$grades
[1]  80  85  72  95 100  89  65  84

$students
[1] "joe"   "sue"   "bob"   "al"    "frank" "mike"  "anne"  "clara"

$honors
[1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE

28.1 length, range, summary

#-----------------------------------------------------------------------.
# Some built-in functions 
# 
# We'll use the following functions to help explain the lapply and sapply functions
#
#   length(SOME_VECTOR_OR_LIST) returns the number of items in the vector or the list
#   range(SOME_VECTOR) - returns c( min(SOME_VECTOR) , max(SOME_VECTOR))
#   summary(SOME_OBJECT) - returns a summary of what is in SOME_OBJECT
#-----------------------------------------------------------------------.


# length returns the number of items in a vector (or in a list)
length( gradebook$grades )     # 8

[1] 8

length( gradebook$students )   # 8

[1] 8

length( gradebook$honors )     # 8

[1] 8

# range returns min and max of the vector
range( gradebook$grades)     # 65   100

[1]  65 100

range( gradebook$students)   # "al" "sue"

[1] "al"  "sue"

range( gradebook$honors)     #   0   1

[1] 0 1

# summary returns a short summary of data.
# different results for different types of data.

summary( gradebook$grades )    # see output below

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  65.00   78.00   84.50   83.75   90.50  100.00

#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 65.00   78.00   84.50   83.75   90.50  100.00 

summary( gradebook$students )   # see output below

   Length     Class      Mode 
        8 character character

# Length     Class      Mode 
#      8 character character 

summary( gradebook$honors )    # see output below

   Mode   FALSE    TRUE 
logical       5       3

#    Mode   FALSE    TRUE 
# logical       5       3

28.2 lapply ( SOME_LIST , SOME_FUNCTION )

#-----------------------------------------------------------------------.
# lapply ( SOME_LIST , SOME_FUNCTION )
# 
# The lapply function takes two arguments:
#   X   - a list
#   FUN - the name of a function.
#
# lapply returns a new list that contains a many entries as there 
# were in the list X. Each entry in the new list contains the results
# of "applying" the function X to an entry of the original list, X.
#
#    EXAMPLE:
#      > lst = list( c(10,20,30),           # length of this is 3
#                    c("apple", "orange"))  # length of this is 2
#
#      > lapply(X=lst, FUN=length) # list of 3 and 2
#      [[1]]
#      [1] 3
#      
#      [[2]]
#      [1] 2
#-----------------------------------------------------------------------.


#........................................
# EXAMPLE 1 - lapply(gradebook, length)
#........................................

# Use the lapply function to automatically apply the length function to 
# each of the items in the gradebook list.
# This returns a new list that contains all of the results.

lapply(gradebook, length)   # see results below

$grades
[1] 8

$students
[1] 8

$honors
[1] 8

# $grades
# [1] 8
#
# $students
# [1] 8
#
# $honors
# [1] 8


# lapply returns a list of the answers
mode(lapply(gradebook, length))  # "list"

[1] "list"

#........................................
# EXAMPLE 2 - lapply(gradebook, range)
#........................................

# Show the results of the range function on each of the items in the list
range(gradebook$grades)     #  65   100

[1]  65 100

range(gradebook$students)   # "al" "sue"

[1] "al"  "sue"

range(gradebook$honors)     #   0   1

[1] 0 1

# apply range function to all items in the gradebook list

lapply(gradebook, range)   # see results below

$grades
[1]  65 100

$students
[1] "al"  "sue"

$honors
[1] 0 1

# $grades
# [1]  65 100
# 
# $students
# [1] "al"  "sue"
# 
# $honors
# [1] 0 1



#........................................
# EXAMPLE 3 - lapply(gradebook, summary)
#........................................

# summary returns a short summary of data.
# different results for different types of data.
summary( gradebook$grades )    # see output below

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  65.00   78.00   84.50   83.75   90.50  100.00

#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 65.00   78.00   84.50   83.75   90.50  100.00 

summary( gradebook$students )   # see output below

   Length     Class      Mode 
        8 character character

# Length     Class      Mode 
#      8 character character 

summary( gradebook$honors )    # see output below

   Mode   FALSE    TRUE 
logical       5       3

#    Mode   FALSE    TRUE 
# logical       5       3 

# apply summary function to all items in the gradebook list
lapply(gradebook, summary)   # all of the above results in a single list

$grades
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  65.00   78.00   84.50   83.75   90.50  100.00 

$students
   Length     Class      Mode 
        8 character character 

$honors
   Mode   FALSE    TRUE 
logical       5       3

# apply mode function to all items in the gradebook list
lapply(gradebook, mode)   # all of the above results in a single list

$grades
[1] "numeric"

$students
[1] "character"

$honors
[1] "logical"

28.3 lapply with nested lists

#---------------------------------------------------------------------------.
# lapply also works with nested lists
# (i.e. lists that contain other lists)
#---------------------------------------------------------------------------.


rm(list = ls())

classes = list( year = 2021,
                semester = "fall",
                section1 = list ( students = c("abe","bob","charlie"),
                                  test1 = c(70,80,90),
                                  test2 = c(75,85,95)),
                section2 = list( students = c("fran", "anne", "sue", "bertha", "maxine"),
                                 test1 = c(100,90,80,70,60),
                                 test2 = c(95,85,75,65,55),
                                 test3 = c(93,83,73,63,53)) )

length(classes)   # 4  (classes contains 4 objects)

[1] 4

lapply(classes, length) # a list that contains 1,1,3,4, i.e. the length of each object in classes

$year
[1] 1

$semester
[1] 1

$section1
[1] 3

$section2
[1] 4

# remember the str function is very helpful for seeing the structure of complex lists
str(classes)

List of 4
 $ year    : num 2021
 $ semester: chr "fall"
 $ section1:List of 3
  ..$ students: chr [1:3] "abe" "bob" "charlie"
  ..$ test1   : num [1:3] 70 80 90
  ..$ test2   : num [1:3] 75 85 95
 $ section2:List of 4
  ..$ students: chr [1:5] "fran" "anne" "sue" "bertha" ...
  ..$ test1   : num [1:5] 100 90 80 70 60
  ..$ test2   : num [1:5] 95 85 75 65 55
  ..$ test3   : num [1:5] 93 83 73 63 53

28.4 NAMED LISTS WITH lapply VS UNNAMED LISTS

#-----------------------------------------------------------------------.
# NAMED LISTS WITH lapply VS UNNAMED LISTS
#
# The return value of lapply will be a named list only if the
# original list has names.
#-----------------------------------------------------------------------.


hasNames = list( fruit=c("apple","orange","pear","plum"),
                 evens=seq(2,10,by=2),
                 odds=seq(1,30,by=2),
                 someLogicals=c(TRUE,FALSE,TRUE))
hasNames

$fruit
[1] "apple"  "orange" "pear"   "plum"  

$evens
[1]  2  4  6  8 10

$odds
 [1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29

$someLogicals
[1]  TRUE FALSE  TRUE

str(hasNames)

List of 4
 $ fruit       : chr [1:4] "apple" "orange" "pear" "plum"
 $ evens       : num [1:5] 2 4 6 8 10
 $ odds        : num [1:15] 1 3 5 7 9 11 13 15 17 19 ...
 $ someLogicals: logi [1:3] TRUE FALSE TRUE

length(hasNames)

[1] 4

lapply(hasNames, length)

$fruit
[1] 4

$evens
[1] 5

$odds
[1] 15

$someLogicals
[1] 3

lapply(hasNames, range)

$fruit
[1] "apple" "plum" 

$evens
[1]  2 10

$odds
[1]  1 29

$someLogicals
[1] 0 1

lapply(hasNames, summary)

$fruit
   Length     Class      Mode 
        4 character character 

$evens
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2       4       6       6       8      10 

$odds
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      1       8      15      15      22      29 

$someLogicals
   Mode   FALSE    TRUE 
logical       1       2

noNames = list( c("apple","orange","pear","plum"),
                seq(2,10,by=2),
                seq(1,30,by=2),
                c(TRUE,FALSE,TRUE))
noNames

[[1]]
[1] "apple"  "orange" "pear"   "plum"  

[[2]]
[1]  2  4  6  8 10

[[3]]
 [1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29

[[4]]
[1]  TRUE FALSE  TRUE

str(noNames)

List of 4
 $ : chr [1:4] "apple" "orange" "pear" "plum"
 $ : num [1:5] 2 4 6 8 10
 $ : num [1:15] 1 3 5 7 9 11 13 15 17 19 ...
 $ : logi [1:3] TRUE FALSE TRUE

length(noNames)

[1] 4

lapply(noNames, length)

[[1]]
[1] 4

[[2]]
[1] 5

[[3]]
[1] 15

[[4]]
[1] 3

lapply(noNames, range)

[[1]]
[1] "apple" "plum" 

[[2]]
[1]  2 10

[[3]]
[1]  1 29

[[4]]
[1] 0 1

lapply(noNames, summary)

[[1]]
   Length     Class      Mode 
        4 character character 

[[2]]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2       4       6       6       8      10 

[[3]]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      1       8      15      15      22      29 

[[4]]
   Mode   FALSE    TRUE 
logical       1       2

28.5 using custom functions with lapply

############################################################.
#   using custom functions with lapply
#
# You can use any function that contains one argument 
# with lapply, even your own custom functions
############################################################.


stuff = list(grades = c(70,80,95,88,60),
             students = c("larry", "zeke","charlie","yaakov","abe"),
             age = c(17,23,20,20,21,19))

# define a function to find the 2nd largest value in a vector
secondLargest = function( vec ){
 sort(vec)[length(vec)-1]
}

# Test the function
nums = c(10,1000,900,-55,23)
secondLargest(nums)   # 900

[1] 900

# Example - use our function the vectors in the list, stuff
secondLargest( stuff$grades)    # 88

[1] 88

secondLargest( stuff$students)  # "yaakov"

[1] "yaakov"

secondLargest( stuff$age)       # 21

[1] 21

# Use our custom function with lapply to automatically apply the function
# to all entries in the list, stuff

lapply( stuff, secondLargest)    # list of the answers

$grades
[1] 88

$students
[1] "yaakov"

$age
[1] 21

28.6 functions defined in one line dont need {curly braces}

###########################################.
# functions defined in one line
#
# If a function only needs one line of code
# the {curly braces} aren't required. 
###########################################.

# Another way to define the second largest function - all in one line 
# (you don't the the {curly braces})
secondLargest = function ( vec ) sort(vec)[length(vec)-1]

# Test the function
nums = c(10,1000,900,-55,23)
secondLargest(nums)

[1] 900

28.7 Using “one line functions” with lapply

###########################################.
# Using "one line functions" with lapply
###########################################.

# create a list
gradebook = list(grades=c(80,85,72,95,100,89,65,84), 
                 students=c("joe","sue","bob","al","frank","mike","anne","clara"),
                 honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))
gradebook

$grades
[1]  80  85  72  95 100  89  65  84

$students
[1] "joe"   "sue"   "bob"   "al"    "frank" "mike"  "anne"  "clara"

$honors
[1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE

# If you want to define a function just to use with lapply you
# can define it directly in the call to lapply

# Use the secondLargest function we defined above
lapply(gradebook, secondLargest)   # we defined gradebook above

$grades
[1] 95

$students
[1] "mike"

$honors
[1] TRUE

# This also works
lapply(gradebook, function ( vec ) {
 sort(vec)[length(vec)-1]
})

$grades
[1] 95

$students
[1] "mike"

$honors
[1] TRUE

# So does this
lapply(gradebook, function ( vec ) sort(vec)[length(vec)-1] )

$grades
[1] 95

$students
[1] "mike"

$honors
[1] TRUE

# You can capture the output of lapply in a variable if you like
answers = lapply(gradebook, function ( vec ) sort(vec)[length(vec)-1] )

answers

$grades
[1] 95

$students
[1] "mike"

$honors
[1] TRUE

answers$students

[1] "mike"

answers[[1]]

[1] 95

mode(answers)   # "list"

[1] "list"

28.8 –PRACTICE–

#------------------------------------------------------------------------.
# QUESTION
#
# stuff is a list.
# Write a single line of code that returns the number of 
# numeric vectors that are contained in a single list.
#
# HINT: Use the is.numeric function, lapply and unlist
#
# EXAMPLE
#
#   > stuff = list(c(100,200,300), 
#                 c("apple", "orange", "pear"), 
#                 seq(10,100,by=5),
#                 c(TRUE, FALSE))
#
#   > YOUR CODE GOES HERE
#   [1] 2
#------------------------------------------------------------------------.

click here for the answer

# Setup the data
stuff = list(c(100,200,300), 
             c("apple", "orange", "pear"), 
             seq(10,100,by=5),
             c(TRUE, FALSE))
stuff

[[1]]
[1] 100 200 300

[[2]]
[1] "apple"  "orange" "pear"  

[[3]]
 [1]  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85  90  95 100

[[4]]
[1]  TRUE FALSE

# ANSWER
sum( unlist( lapply(stuff, is.numeric) )  )

[1] 2

# Or you can break up into multiple lines to make it easier to read.
sum( 
 unlist( 
  lapply(stuff, is.numeric) 
 )  
)

[1] 2

#------------------------------------------------------------------------.
# QUESTION
# 
# stuff is a list.
# Write a single line of code that creates a new variable,
# named numericStuff, that contains just a new list with 
# just the numeric vectors from stuff.
#
# EXAMPLE:
# > stuff = list( seq(10,20,length.out=4) ,   # numeric
#                 NULL,                       # NULL
#                 c("apple", "orange"),       # character
#                 rep(5, 3),                  # numeric
#                 1:4 > pi,                   # logical
#                 NULL,                       # NULL
#                 paste0(letters[1:5], 1:5),  # character
#                 2 ^ (1:5) )                 # numeric
#
# > numericStuff = YOUR CODE GOES HERE
#
# > numericStuff
# [[1]]
# [1] 10.00000 13.33333 16.66667 20.00000
# 
# [[2]]
# [1] 5 5 5
# 
# [[3]]
# [1]  2  4  8 16 32
#------------------------------------------------------------------------.

click here for the answer

# ANSWER
stuff = list( seq(10,20,length.out=4) ,   # numeric
              NULL,                       # NULL
              c("apple", "orange"),       # character
              rep(5, 3),                  # numeric
              1:4 > pi,                   # logical
              NULL,                       # NULL
              paste0(letters[1:5], 1:5),  # character
              2 ^ (1:5))                  # numeric

stuff

[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
NULL

[[3]]
[1] "apple"  "orange"

[[4]]
[1] 5 5 5

[[5]]
[1] FALSE FALSE FALSE  TRUE

[[6]]
NULL

[[7]]
[1] "a1" "b2" "c3" "d4" "e5"

[[8]]
[1]  2  4  8 16 32

numericStuff = stuff [        # ANSWER
 unlist ( 
  lapply(stuff, is.numeric) 
 ) 
] 

numericStuff

[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
[1] 5 5 5

[[3]]
[1]  2  4  8 16 32

#------------------------------------------------------------------------.
# QUESTION
# 
# (see previous question)
# Same idea as previous question, but this time create a vector
# called someStuff that contains just the logical and numeric vectors
# from stuff. 
#
# HINT: one way to do this is to create a custom function that
#       takes a single argument. The custom function should
#       return TRUE if the argument contains a numeric or a logical vector
#       and FALSE otherwise.
#
# EXAMPLE:
# > stuff = list( seq(10,20,length.out=4) ,   # numeric
#                 NULL,                       # NULL
#                 c("apple", "orange"),       # character
#                 rep(5, 3),                  # numeric
#                 1:4 > pi,                   # logical
#                 NULL,                       # NULL
#                 paste0(letters[1:5], 1:5),  # character
#                 2 ^ (1:5) )                 # numeric
#
# > someStuff = YOUR CODE GOES HERE
#
# > someStuff
# [[1]]
# [1] 10.00000 13.33333 16.66667 20.00000
# 
# [[2]]
# [1] 5 5 5
# 
# [[3]]
# [1]  2  4  8 16 32
#------------------------------------------------------------------------.

click here for the answer

# setup some data
stuff = list( seq(10,20,length.out=4) ,   # numeric
              NULL,                       # NULL
              c("apple", "orange"),       # character
              rep(5, 3),                  # numeric
              1:4 > pi,                   # logical
              NULL,                       # NULL
              paste0(letters[1:5], 1:5),  # character
              2 ^ (1:5))                  # numeric

stuff

[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
NULL

[[3]]
[1] "apple"  "orange"

[[4]]
[1] 5 5 5

[[5]]
[1] FALSE FALSE FALSE  TRUE

[[6]]
NULL

[[7]]
[1] "a1" "b2" "c3" "d4" "e5"

[[8]]
[1]  2  4  8 16 32

# STEP 1
# Create a function that returns TRUE if the arguemnt is either logical or numeric
isLogicalOrNumeric = function( something ){
 is.logical(something) | is.numeric(something)
}

# STEP 2 - use the function with lapply
tfList = lapply(stuff, isLogicalOrNumeric)   # list of TRUE/FALSE values
tfList

[[1]]
[1] TRUE

[[2]]
[1] FALSE

[[3]]
[1] FALSE

[[4]]
[1] TRUE

[[5]]
[1] TRUE

[[6]]
[1] FALSE

[[7]]
[1] FALSE

[[8]]
[1] TRUE

# STEP 3 - unlist the results to get a vector
tfVector = unlist ( tfList )  # vector of TRUE/FALSE values
tvVector

Error in eval(expr, envir, enclos): object 'tvVector' not found

# STEP 4 - use the logical vector to get just the logical and numeric entries in the list
someStuff = stuff[tfVector]

# ALL STEPS TOGETHER IN ONE COMMAND 
# You can create an "anonymous" function directly in the call to lapply.
someStuff = stuff [       
 unlist ( 
      lapply(stuff,  function (something) is.logical(something)|is.numeric(something) 
  )
 ) 
] 

# or alternatively all in one line
someStuff = stuff[unlist ( lapply(stuff,  function (something) is.logical(something)|is.numeric(something) ) ) ] 


someStuff

[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
[1] 5 5 5

[[3]]
[1] FALSE FALSE FALSE  TRUE

[[4]]
[1]  2  4  8 16 32