18  16. %in%

18.1 vector1 %in% vector2

################################################################.
################################################################.
##
## vector1 %in% vector2
##
## Returns a logical vector that is the same length as vector1.
## For every value in vector1 TRUE indicates that the value
## is in vector2 and FALSE indicates that it isn't.
## See examples below.
##
################################################################.
################################################################.

students <- c("joe", "sue", "al", "anne")
students
[1] "joe"  "sue"  "al"   "anne"
"sue" %in% students    # TRUE
[1] TRUE
"amanda" %in% students   # FALSE
[1] FALSE
c("sue","amanda") %in% students     # TRUE FALSE
[1]  TRUE FALSE
c("sue","amanda","felix","al","joaquin","xavier","zeke","anne")  %in% students 
[1]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
# ANSWER :  TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE

18.2 — Practice —

#---------------------------------------------------------------------.
# QUESTION
#
# The variables mathClass and englishClass contain names of students
# who are in each respective class.
#
# The "built-in" intersect function returns the values that are
# common to two different vectors. ( see ?intersect )
#
#   (a) use the intersect function to show those students who are 
#       in both the math and the english class.
#
#   (b) do the same as (a) but do NOT use the intersect function,
#       rather use the %in% operator to accomplish the same result.
#
#   (c) write a function name myintersect that does the same
#       thing as the built-in intersection function - but do not
#       call the built-in intersection function in the 
#       definition of myintersection.
#---------------------------------------------------------------------.
# Let's setup some data to work with
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

#   (a) use the intersect function to show those students who are 
#       in both the math and the English class.

# Let's look at the R documentation for the intersect function.
# View the help page by typing: 
#
#  ?intersect

# Use the function
intersect(mathClass, englishClass)
[1] "mike" "sue" 
#   (b) do the same as (a) but do NOT use the intersect function,
#       rather use the %in% operator to accomplish the same result.
#
#       HINT - remember that %in% returns logical values (i.e. TRUE/FALSE values)
#              Use these TRUE/FALSE values to index into the appropriate vector.



# step 1 - figure out how to get a logical vector to show TRUE for each student
#          in the math class that is also in the english class

mathClass %in% englishClass
[1] FALSE  TRUE  TRUE FALSE FALSE
# step 2 - use the result from step1 to get the names of those students from the math class
#          who are also in the english class

mathClass [ mathClass %in% englishClass ]
[1] "mike" "sue" 
# another way ....

englishClass %in% mathClass
[1]  TRUE FALSE  TRUE FALSE
englishClass[ englishClass %in% mathClass  ]
[1] "sue"  "mike"
#############################################################################.
# DON'T DO THE FOLLOWING! IT IS WRONG!
#############################################################################.
# The mathClass variable is used outside the brackets while
# the expression "englishClass %in% mathClass" gives one TRUE/FALSE for 
# each value in the englishClass variable. 
# You must make sure that the variable used outside the [brackets] matches up
# with the TRUE/FALSE values that are generated inside the [brackets].
#############################################################################.

mathClass[ englishClass %in% mathClass  ]  # THIS IS WRONG!!!
[1] "alice"  "sue"    "amanda"
# WHAT'S THE PROBLEM - FIGURE OUT WHAT HAPPENED
#
# original:           mathClass[ englishClass %in% mathClass  ]
#
# replace variables:  mathClass[c("sue","joe","mike","frank") %in% c("alice","mike","sue","bob","amanada")]
#
# %in%:               mathClass[c(TRUE, FALSE, TRUE, FALSE) ]
#
# recycle T/F values: mathClass[c(TRUE,FALSE,TRUE,FALSE,TRUE)]      
#
# final values:       "alice" "sue" "amanda"
#   (c) write a function name myintersect that does the same
#       thing as the built-in intersect function - but do not
#       call the built-in intersect function in the 
#       definition of myintersect.

myintersect = function(vec1, vec2) {
  
  vec1 [ vec1 %in% vec2 ]
  
}


myintersect( mathClass, englishClass)   # mike sue
[1] "mike" "sue" 
myintersect( englishClass, mathClass)  # "sue" "mike"
[1] "sue"  "mike"
myintersect( 1:10, 7:1000) # 7 8 9 109
[1]  7  8  9 10
# Other ways ... 

myintersect = function(vec1, vec2) {
  return ( vec1 [ vec1 %in% vec2 ] )
}

myintersect = function(vec1, vec2) {
  tf = vec1 %in% vec2 
  vec1 [ tf ]
}

myintersect = function(vec1, vec2) {
  tf = vec1 %in% vec2 
  return ( vec1 [ tf ] )
}
#---------------------------------------------------------------------.
# QUESTION  ####
#
# The variables mathClass and englishClass contain names of students
# who are in each respective class.
#
# Write a command to show the students who are in the math class but
# are NOT in the english class
#
# (a) use the built-in setdiff function (read the documentation)
#
#  EXAMPLE
#  (This is just an example. Your code must also work if there are different
#   names in the mathClass and englishClass variables.)
#
#     > # setup some data
#     > mathClass   = c("alice", "mike", "sue", "bob", "amanda")
#     > englishClass = c("sue", "joe", "mike", "frank")
#
#     > YOUR CODE GOES HERE
#     [1] "alice" "bob" "amanda"
#
# (b) Do the same thing as (a) but WITHOUT using the setdiff function
#     HINT: use the ! operator and the %in% operator
#---------------------------------------------------------------------.

::: {.callout-note icon=false collapse=“true”} ### part (a) - Click here for answer

# (a) use the built-in setdiff function (read the documentation)

# Setup some data
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

# This is the answer
setdiff(mathClass, englishClass)
[1] "alice"  "bob"    "amanda"
# (b) Do the same thing as (a) but WITHOUT using the setdiff function
#
#     HINT: use the ! operator and the %in% operator
#           Remember that ! will flip FALSE to TRUE and TRUE to FALSE.
#
#           > ! c(TRUE, FALSE, FALSE)
#           [1] FALSE TRUE TRUE

#--------------------------------------------------------------.
# Setup some data
#--------------------------------------------------------------.
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

#--------------------------------------------------------------.
# Before we get to the answer, think about the following.
#--------------------------------------------------------------.

# The following code shows one result for each person in the math class.
#
# For each person in the math class, the result is ...
# ... TRUE if they are also in English.
# ... FALSE if they are just in math.

mathClass  %in% englishClass    
[1] FALSE  TRUE  TRUE FALSE FALSE
# The following uses the ! to flip the TRUEs and FALSEs.
# The code still gives one result for each person in the class class.
# Now, for each person in the math class, the result is ...
# ... FALSE if they are also in English.
# ... TRUE if they are just in math.

! ( mathClass  %in% englishClass )  
[1]  TRUE FALSE FALSE  TRUE  TRUE
# The answer is below.
#
# We can use the last expression above as the index for the mathClass
# variable to retrieve only those students in the mathClass who are NOT also in 
# the English class.

mathClass [ ! ( mathClass  %in% englishClass )   ]
[1] "alice"  "bob"    "amanda"
# Note that %in% has higher precedence than ! (see ?Syntax).
# Therefore you don't need (parentheses) and the following still works.

mathClass [ ! mathClass %in% englishClass   ]
[1] "alice"  "bob"    "amanda"

18.3 ! SOME_VECTOR %in% ANOTHER_VECTOR

####################################################################################.
# Understand the placement of !  ####
#
# ! must immediately precede a logical vector (i.e. a vector of TRUE/FALSE values)
####################################################################################.

# Setup some data
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

# Correct - ! operates on TRUE/FALSE values (AKA logical vectors)
! c(TRUE, FALSE)
[1] FALSE  TRUE
# Wrong - it's an error to use ! with character values.
! c("mike", "sue")   # ERROR!
Error in !c("mike", "sue"): invalid argument type
# Correct - parentheses contain TRUE/FALSE values
#           since %in% results in a logical vector
! ( mathClass  %in% englishClass ) 
[1]  TRUE FALSE FALSE  TRUE  TRUE
# Correct - same as above - %in% has higher precedence 
#           than ! so you don't need (parentheses)
#           (see ?Syntax)
!   mathClass  %in% englishClass     
[1]  TRUE FALSE FALSE  TRUE  TRUE
# Wrong - ! is in wrong place
mathClass ! %in% englishClass  # ERROR
Error: <text>:2:11: unexpected '!'
1: # Wrong - ! is in wrong place
2: mathClass !
             ^

18.4 — Practice —

#---------------------------------------------------------------------.
# QUESTION
#
# The variables mathClass and englishClass contain names of students
# who are in each respective class.
#
# (a) Write a command to show the students who are only in one of the classes.
#     Do not repeat the same name twice. HINT: use the setdiff function
#
#     EXAMPLE
#     (This is just an example. Your code must also work if there are different
#      names in the mathClass and englishClass variables.)
#
#        > # setup some data
#        > mathClass   = c("alice", "mike", "sue", "bob", "amanda")
#        > englishClass = c("sue", "joe", "mike", "frank")
#
#        > YOUR CODE GOES HERE
#        [1] "alice" "bob" "amanda" "joe" "frank"
#
# (b) do the same, but this time, do NOT use the setdiff function.
#---------------------------------------------------------------------.
# setup some data
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

# ANSWER - using the setdiff and c functions

onlyMath = setdiff(mathClass, englishClass)
onlyEnglish = setdiff(englishClass, mathClass)

# Show the students who are only in one of the classes
c(onlyMath, onlyEnglish)
[1] "alice"  "bob"    "amanda" "joe"    "frank" 
# You can also do this all in one line 
c ( setdiff(mathClass, englishClass) , setdiff(englishClass, mathClass))
[1] "alice"  "bob"    "amanda" "joe"    "frank" 
# setup some data
mathClass   = c("alice", "mike", "sue", "bob", "amanda")
englishClass = c("sue", "joe", "mike", "frank")

# ANSWER - do NOT use the setdiff function - You can split up the code into
#          different commands to make it easier to read.

onlyMath = mathClass [ ! mathClass %in% englishClass ]
onlyEnglish = englishClass [ ! englishClass %in% mathClass ]
c(onlyMath, onlyEnglish)
[1] "alice"  "bob"    "amanda" "joe"    "frank"