20  18. logical operators    ! (not)    & (and)    | (or)

20.1 Review - sorting

# Comment on sorting
# see previous file

"apple" < "ball"    #TRUE
[1] TRUE
"ball"  < "apple"    #FALSE
[1] FALSE
FALSE < TRUE       #TRUE
[1] TRUE
TRUE < FALSE        #FALSE
[1] FALSE

20.2 Review - the ! (i.e. “not”) operator

#------------------------------------------------
# The ! operator
#
# The ! operator (pronounced as "not")
# changes TRUE to FALSE
# changes FALSE to TRUE
#
# "Truth Table" - a "truth table" shows how a
#                 logical operator works.
#
#      x      !x
#    -----  -----
#    TRUE   FALSE
#    FALSE  TRUE
#------------------------------------------------


!TRUE    # FALSE
[1] FALSE
!FALSE   # TRUE
[1] TRUE
! c(TRUE,FALSE,TRUE,TRUE)   # FALSE TRUE FALSE FALSE
[1] FALSE  TRUE FALSE FALSE
# Example

tests = c(NA, 80, NA, 90, 88)   
is.na(tests)   # TRUE FALSE TRUE FALSE FALSE
[1]  TRUE FALSE  TRUE FALSE FALSE
!is.na(tests)  # FALSE TRUE FALSE TRUE TRUE
[1] FALSE  TRUE FALSE  TRUE  TRUE
#-------------------------------------------------------------------
# QUESTION - tests and students are parallel vectors.
# The test grades are recorded in the same order as the student names.
# i.e. abe did not take the test so he is recorded as NA
#      bob got an 80
# etc.
# Answer the questions below.
#--------------------------------------------------------------------

tests = c(NA, 80, NA, 90, 88)   
students = c("abe", "bob", "carla", "dana", "ed")

# QUESTION - Write a command to
# show the names of the students who did NOT take the test
# (i.e. the grade is NA for those students)
students [ is.na(tests) ]
[1] "abe"   "carla"
# QUESTION - Write a command to
# show the names of the students who did take the test
# (i.e. the grade is NOT NA for those students)
students [ !is.na(tests) ]
[1] "bob"  "dana" "ed"  
#-------------------------------------------------------------------
# QUESTION
#
# mathClass contains the names of the students in a math class.
# committee is the names of the students on the yearbook committee.
#
# Answer the questions below.
#--------------------------------------------------------------------

# Another example
mathClass = c("abe", "bob", "carla", "dana", "ed", "fran", "george", "hillary")
committee = c("bob","zoe","abe","victor")

# QUESTION - write a command to show the names of the students in
# the math class who are on the yearbook committee. Use the %in% operator.

mathClass[ mathClass %in% committee ]   # one answer
[1] "abe" "bob"
committee[ committee %in% mathClass ]   # another (just as valid) answer
[1] "bob" "abe"
# QUESTION - write a command to show the names of the students in
# the math class who are NOT on the yearbook committee. Use the %in% operator.


mathClass[ ! mathClass %in% committee ]     # answer
[1] "carla"   "dana"    "ed"      "fran"    "george"  "hillary"

20.3 The & operator

#--------------------------------------------------------
# The & operator
#
# & operator (pronounced "and") is used to combine two different conditions.
# Result is TRUE only when both x and y are TRUE
# otherwise result is FALSE.
#
#  TRUTH TABLE:
#
#                       (result)
#        x       y       x&y
#      -----   -----    -----
#      TRUE    TRUE     TRUE
#      TRUE    FALSE    FALSE
#      FALSE   TRUE     FALSE
#      FALSE   FALSE    FALSE
#--------------------------------------------------------


TRUE & TRUE   # TRUE
[1] TRUE
TRUE & FALSE  # FALSE
[1] FALSE
FALSE & TRUE  # FALSE
[1] FALSE
FALSE & FALSE  # FALSE
[1] FALSE
#-----------------------------------------------------------
# & works in a "vectorized" way (as you'd expect from an R operator)
# i.e. the 1st values of the left and right hand vectors are operated on then 
#      the 2nd values from the left and right hand vectors are operated on, etc.
#      etc.
#-----------------------------------------------------------

c(TRUE,FALSE,TRUE,FALSE) & c(FALSE,FALSE,TRUE,TRUE)
[1] FALSE FALSE  TRUE FALSE
# original :   c(TRUE,FALSE,TRUE,FALSE) & c(FALSE,FALSE,TRUE,TRUE)
# vector operation: c(TRUE&FALSE , FALSE&FALSE , TRUE&TRUE , FALSE&TRUE)
# final answer:       FALSE        FALSE         TRUE        FALSE

#-----------------------------------------------------------
# recycling rule
#
# as with all vectorized operations the recycling rule is used 
# when one of the two vectors is shorter than the other.
#-----------------------------------------------------------

c(TRUE, FALSE) & c(FALSE, TRUE, TRUE, TRUE)     # FALSE FALSE TRUE FALSE
[1] FALSE FALSE  TRUE FALSE
# original :        c(TRUE, FALSE) & c(FALSE, TRUE, TRUE, TRUE)
# recycling:        c(TRUE, FALSE, TRUE, FALSE) & c(FALSE, TRUE, TRUE, TRUE)
# vector operation: c(TRUE&FALSE , FALSE&TRUE , TRUE&TRUE , FALSE&TRUE)
# final answer:       FALSE        FALSE         TRUE        FALSE




# You can have many different logical vectors &'ed together 

c(TRUE, TRUE) & c(FALSE, TRUE) & c(TRUE, FALSE)   # FALSE FALSE
[1] FALSE FALSE
# original: c(TRUE, TRUE) & c(FALSE, TRUE) & c(TRUE, FALSE)
# first &:  c(TRUE&FALSE , TRUE&TRUE)      & c(TRUE, FALSE)
#           c(FALSE      , TRUE)           & c(TRUE, FALSE)
# 2nd &:              c(FALSE & TRUE , TRUE & FALSE)
# Final answer:             FALSE         FALSE


#-----------------------------------------------------------
# QUESTION - nums is a vector that contains some numbers. Write a SINGLE COMMAND
#            that displays only those numbers from nums that are divisible by 
#            BOTH 3 and 5.
#-----------------------------------------------------------

# setup some data
nums = c(5, 6, 7, 15, 13, 20)
nums
[1]  5  6  7 15 13 20
# Let's think about how to do each condition separately

nums [nums %% 3 == 0] # 6 15 - numbers that are divisible by 3
[1]  6 15
nums [nums %% 5 == 0] # 5 15 20 - numbers that are divisible by 5
[1]  5 15 20
# When we combine the conditions with & we get the correct result

nums [nums %% 3 == 0  &  nums %% 5 == 0] # 15 - numbers that are divisible by BOTH 3 and 5
[1] 15
#-----------------------------------------------------
# QUESTION - test1, test2 and students
# are parallel vectors.
# Answer the questions below.
#
# EXAMPLE (your answers must work if the data is different too)
#
#   test1   =  c(90,    88,   87,     84,    92)
#   test2   =  c(70,    80,   90,     94,    100)
#   students = c("abe","bob","carla","dana","ed")
#-----------------------------------------------------


#.........................................................................
# QUESTION - Write a command to show the names of the students who got an 
# 85 or above on both tests.
#.........................................................................

# Setup some data
test1   =  c(90,    88,   87,     84,    92)
test2   =  c(70,    80,   90,     94,    100)
students = c("abe","bob","carla","dana","ed")

 
# ANSWER
students [  test1 >= 85  &  test2 >= 85       ]   # CORRECT ANSWER
[1] "carla" "ed"   
# original:    students [  test1 >= 85  &  test2 >= 85       ]
#              students [  c(TRUE,TRUE,TRUE,FALSE,TRUE)  &  c(FALSE,FALSE,TRUE,TRUE,TRUE)       ]
#     &:       students [  c(TRUE&FALSE,TRUE&FALSE,TRUE&TRUE,FALSE&TRUE,TRUE&TRUE) 
#              students [  c(FALSE,       FALSE,     TRUE,     FALSE     ,TRUE) ]
#               "carla"  "ed"

#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# NOTE:   When you use numbers in the context of logical operators (i.e. ! & |)
#
# zero becomes FALSE
# anything else become TRUE
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

as.logical(0)  #FALSE
[1] FALSE
as.logical(99)  #TRUE
[1] TRUE
as.logical(-99.27)  #TRUE
[1] TRUE
as.logical(1)  #TRUE
[1] TRUE
0 & 1  # same as   FALSE & TRUE
[1] FALSE
976.123 & 0  # same as TRUE & FALSE
[1] FALSE
-27.98 & 15   # same as TRUE & TRUE
[1] TRUE

20.4 There MUST BE a LOGICAL vector on both sides of the & (and also the | see below)

# Refer to the final answer to the question above.
students [  test1 >= 85  &  test2 >= 85       ]   # CORRECT ANSWER
[1] "carla" "ed"   
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# WARNING
#
# You must have an expression that results in a LOGICAL vector on both
# sides of the &. In the code below, test1 is NOT a logical vector but it
# appears on the left side of the &. This is WRONG.
#
#       # WRONG!
#       # students [ test1 & test2 >= 85 ]    # WRONG WRONG WRONG !!! DONT DO THIS !!!
#
# The right hand side of the & is OK - it is test2>=85 which is a logical vector.
# The correct way to write this code is as was done above:
# 
#       students [  test1 >= 85  &  test2 >= 85       ]   # CORRECT ANSWER
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# CORRECT ANSWER (already shown above)
students[ test1 >= 85 & test2 >= 85 ]
[1] "carla" "ed"   
# WRONG ANSWER _ DONT DO THIS !!!
students [ test1 & test2 >= 85 ]    # WRONG WRONG WRONG !!! DONT DO THIS !!!
[1] "carla" "dana"  "ed"   
#   original;           students [  test1 & test2 >= 85 ]    # WRONG WRONG WRONG !!! DONT DO THIS !!!
#   replace values:     students [   c(90,    88,   87,     84,    92) & c(70,    80,   90,     94,    100) >= c(85,85,85,85,85)]    # WRONG WRONG WRONG !!! DONT DO THIS !!!
#  >=                   students [   c(90,    88,   87,     84,    92) & c(70>=85,80>=85, 90>=85, 94>=85,100>=85)]  
#                       students [   c(90,    88,   87,     84,    92) & c(FALSE,FALSE,TRUE,TRUE,TRUE)]  
# convert #rs to logicals: students [   c(TRUE,TRUE,TRUE,TRUE,TRUE) & c(FALSE,FALSE,TRUE,TRUE,TRUE)]  
#                          students [   c(FALSE,FALSE,TRUE,TRUE,TRUE) ] 
#                          "carla" "dana" "ed"

20.5 –PRACTICE–

#----------------------------------------------------------------------------.
# QUESTIONS
#
# Answer the questions below using the following data.
# test1, test2 and students are parallel vectors.
# Your answers must work even if the actual numbers and names were different.
#----------------------------------------------------------------------------.

test1   =  c(90,    88,   87,     84,    92)
test2   =  c(70,    80,   90,     94,    100)
students = c("abe","bob","carla","dana","ed")
# QUESTION - Write a command to show the names of the students
# who got in the 80's on test1.
students [ test1 >= 80 & test1 < 90 ]
[1] "bob"   "carla" "dana" 
# QUESTION - Write a command to show the names of the students
# who got in the 80's on test2.
students [ test2 >= 80 & test2 < 90 ]
[1] "bob"
# QUESTION - Write a command to show the names of the students
# who got in the 80's on both test1 and on test2.
students [ (test1 >= 80 & test1 < 90) & (test2 >= 80 & test2 < 90)]
[1] "bob"
# The following also works since it doesn't matter in this case 
# which & happens first.
students [ test1 >= 80 & test1 < 90 & test2 >= 80 & test2 < 90]
[1] "bob"
students [ test1 >= 80 & (test1 < 90 & test2 >= 80) & test2 < 90]
[1] "bob"

20.6 The | operator      (NOTE: | is pronouced “or”)

#--------------------------------------------------------   
# The | operator    
#   
# |  operator (pronounced "or") is used to combine two different conditions.    
# The symbol is a vertical line or bar (often drawn with a small break in the line) 
# It is found on every standard computer keyboard - just look for it.   
# On my keyboard it is below the backspace, on the same key as the backslash character. 
#   
# The symbol is often referred to as a "pipe character" by technologists.   
# (The reason it is called a "pipe" by technologists originated with the    
#  Unix operating system but it beyond the scope of what we are discussing now  
#  and I don't want to confuse you more than necessary :) ...   
#   
# Result is TRUE when either x or y are TRUE or when both are TRUE.     
# Otherwise, the result is FALSE (i.e. the result is only FALSE when both x and y are FALSE)    
#   
#  TRUTH TABLE: 
#   
#                     (result)  
#        x       y       x|y    
#      -----   -----    -----
#      TRUE    TRUE     TRUE    
#      TRUE    FALSE    TRUE    
#      FALSE   TRUE     TRUE    
#      FALSE   FALSE    FALSE   
#--------------------------------------------------------

TRUE | TRUE   # TRUE    
[1] TRUE
TRUE | FALSE  # TRUE    
[1] TRUE
FALSE | TRUE  # TRUE    
[1] TRUE
FALSE | FALSE # FALSE
[1] FALSE
# An example with the recycling rule    

c(TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE)   
[1]  TRUE FALSE  TRUE  TRUE
# original:     c(TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE)   
# recycling:    c(TRUE, FALSE, TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE)  
#               c(TRUE|FALSE, FALSE|FALSE, TRUE|FALSE, FALSE|TRUE)  
# final result:     TRUE         FALSE       TRUE        TRUE   



# This also works with multiple vectors (same as above with addition of a 3rd vector)   

c(TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE) | c(FALSE, TRUE, FALSE, FALSE)    
[1] TRUE TRUE TRUE TRUE
# original:     c(TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE) | c(FALSE, TRUE, FALSE, FALSE)    
# recycling:    c(TRUE, FALSE, TRUE, FALSE) | c(FALSE, FALSE, FALSE, TRUE) | c(FALSE, TRUE, FALSE, FALSE)   
# 1st |:        c(TRUE|FALSE, FALSE|FALSE, TRUE|FALSE, FALSE|TRUE) | c(FALSE, TRUE, FALSE, FALSE)   
#               c(   TRUE,       FALSE,      TRUE,        TRUE)    | c(FALSE, TRUE, FALSE, FALSE)   
# 2nd |:        c(TRUE|FALSE, FALSE|TRUE,  TRUE|FALSE,  TRUE|FALSE) 
# final result:     TRUE         TRUE        TRUE          TRUE
#----------------------------------------------------------------------------.
# QUESTIONS
#
# Answer the questions below using the following data.
# test1, test2 and students are parallel vectors.
# Your answers must work even if the actual numbers and names were different.
#----------------------------------------------------------------------------.

test1   =  c(90,    88,   87,     84,    92)
test2   =  c(70,    80,   90,     94,    100)
students = c("abe","bob","carla","dana","ed")
# QUESTION - Write a command to show the names of the students
# who did NOT get in the 80's on test1.
# CORRECT ANSWER
students [ test1 < 80    |    test1 >= 90  ]
[1] "abe" "ed" 
# THE FOLLOWING IS WRONG
students [ test1 < 80    &    test1 >= 90  ] # WRONG!!! This should be | not & 
character(0)
# Even when you get the wrong answer, it is important to be able to 
# work through exactly WHY you got the wrong answer. 

#original:           students [ test1 < 80    &    test1 >= 90  ]
#
# replace numbers:   students [ c(90,88,87,84,92) < c(80,80,80,80,80)    &    c(90,88,87,84,92) >= c(90,90,90,90,90)  ]
#
# replace numbers:   students [ c(90<80,88<80,87<80,84<80,92<80)  &    c(90>=90,88>=90,87>=90,84>=90,92>=90) ]
#
# replace numbers:   students [ c(FALSE,FALSE,FALSE,FALSE,FALSE)  &    c(TRUE,FALSE,FALSE,FALSE,TRUE) ]
#
# &               :   students [ c(FALSE,FALSE,FALSE,FALSE,FALSE) ]
#
# ANSWER - no students :  character(0)
# QUESTION - Write a command to show the names of the students
# who did NOT get in the 80's on test2.
students [ test2 < 80    |    test2 >= 90  ]
[1] "abe"   "carla" "dana"  "ed"   

20.7 Order of operations: !     &     |

#---------------------------------------------------------------------  
# ORDER OF OPERATIONS   
#   
# As always, expressions in (parentheses) are done first. Otherwise,    
# the order of operations between ! & | operators is    
#   
#    first   !    (NOT)     
#    then    &    (AND) 
#    finally |    (OR)  
#   
# As with all order of operations issues, (parentheses) can change what is done first.
#---------------------------------------------------------------------  


# Examples: 
TRUE | FALSE & FALSE     # TRUE   because & is done before |    
[1] TRUE
(TRUE | FALSE) & FALSE   # FALSE  same code but with parentheses    
[1] FALSE
FALSE | TRUE & FALSE     # FALSE    
[1] FALSE
(FALSE | TRUE) & FALSE   # FALSE - parentheses don't change the result here     
[1] FALSE
TRUE | TRUE & FALSE      # TRUE   because & is done before |    
[1] TRUE
(TRUE | TRUE) & FALSE    # FALSE  same code but with parentheses    
[1] FALSE
TRUE | !FALSE & FALSE    # TRUE   first ! then & then | 
[1] TRUE
(TRUE | !FALSE) & FALSE  # FALSE  same code but with parentheses    
[1] FALSE
# Use the following data to answer the question below. 
# Note that this data is just an example.
# Your answer must work even if the actual numbers and names were to be
# different. 
#
#   test1   =  c(90,    88,   87,     84,    92)
#   test2   =  c(70,    80,   90,     94,    100)
#   students = c("abe","bob","carla","dana","ed")
#
# QUESTION
#
# Write a command to show the names of the students
# who did NOT get in the 80's - neither on test1 nor on test2.
#
# HINT - you can use a combination of & and |. Make sure
# that you take the order of operations (& before |) into account
# when code your anwer.
# setup the data
test1   =  c(90,    88,   87,     84,    92)
test2   =  c(70,    80,   90,     94,    100)
students = c("abe","bob","carla","dana","ed")

# ANSWER
students [ (test1 < 80 | test1 >= 90) &  (test2 < 80 | test2 >= 90) ]
[1] "abe" "ed" 
# EXPLANATION:
# You can think of the above answer as the following: 

TRUE_IF_DIDNT_GET_80s_ON_TEST1 = test1 < 80 | test1 >= 90
TRUE_IF_DIDNT_GET_80s_ON_TEST1
[1]  TRUE FALSE FALSE FALSE  TRUE
TRUE_IF_DIDNT_GET_80s_ON_TEST2 = test2 < 80 | test2 >= 90
TRUE_IF_DIDNT_GET_80s_ON_TEST2
[1]  TRUE FALSE  TRUE  TRUE  TRUE
TRUE_IF_DIDNT_GET_80s_ON_EITHER_TEST = TRUE_IF_DIDNT_GET_80s_ON_TEST1   &   TRUE_IF_DIDNT_GET_80s_ON_TEST2
TRUE_IF_DIDNT_GET_80s_ON_EITHER_TEST
[1]  TRUE FALSE FALSE FALSE  TRUE
students
[1] "abe"   "bob"   "carla" "dana"  "ed"   
students [ TRUE_IF_DIDNT_GET_80s_ON_EITHER_TEST ]
[1] "abe" "ed" 
# THE following are WRONG becasue & happens before | based on the order of operations
students [ test1 < 80 | test1 >= 90 &  test2 < 80 | test2 >= 90 ]
[1] "abe"   "carla" "dana"  "ed"   
students [ test1 < 80 | (test1 >= 90 &  test2 < 80) | test2 >= 90 ]
[1] "abe"   "carla" "dana"  "ed"   
#---------------------------------------------------------------------. 
# Sometimes you have to take into account what is done first    
# a larger context. The following should be easy enough to remember.    
#   
#      1st - parentheses    
#      2nd - all math   
#      3rd - all relational operators (i.e. > < >= <= == !=)    
#      4th - !  
#      5th - &  
#      6th - |  
#      7th - assignment to a variable   
#   
# You can also refer back to the complete official documentation for    
# the complete list of R operator precedence by typing  
#   
#    ?Syntax #  (capital "S")   
#   
# or refer to this url (search for "r official documentation order of operations")  
#   
#    https://stat.ethz.ch/R-manual/R-devel/library/base/html/Syntax.html    
#   
# This list is summarized below for the operators that we have learned so far.  
#   
#      ^                exponentiation (right to left)  
#      - +              unary minus and plus (as in -2 and in +2)   
#      :                  sequence operator 
#      %%    %/%    %in%   (and operators surrounded by %percents%) 
#      * /              multiply, divide    
#      + -              add, subtract   
#      < > <= >= == !=  ordering and comparison 
#      !                negation    
#      &                  and   
#      |                or  
#      ->               rightwards assignment   
#      <-               assignment (right to left)  
#      =                  assignment (right to left)    
#   
#---------------------------------------------------------------------.

20.8 –PRACTICE–

#---------------------------------------------------------------------. 
# Use the following data to answer the questions below.
# These are parallel vectors. They contain information about
# the opening and closing prices of some stocks yesterday.
#
# Answer the following questions by writing a command that calculates
# the specified information.
#
# Note that this is sample data. Your answers must work even if the
# actual values of the data were to be different.
#---------------------------------------------------------------------. 

company= c("JP Chase", "citigrp","pfizer", "moderna","google","microsoft","facebook","j&j",     "goldman sachs", "oracle" )
sector = c("fin",      "fin",    "pharma", "pharma", "tech", "tech",      "tech",    "pharma",  "fin",           "tech")
hq     = c("ny",        "ny",     "ny",    "ma",     "ca",    "wa",       "ca",      "nj",      "ny",            "tx" )
ticker = c("jpm",      "c",      "pfe",    "mrna",   "goog",  "msft",     "fb",      "jnj",     "gs",            "orcl")
ceo =    c("dimon",    "fraser", "bourla",  "bancel","pichai","nadella",  "zuckerberg","gorsky","solomon",      "catz")
open =   c(171.00,     101,       93.04,    345.20,   2300,    308.61,     326,       164.06,    414.41,         98.42)
close =  c(160,        99,       120.00,    365.20,   2405,    350,        300,       140,       465.50,         104)
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# QUESTION
# (a) Show the names of all "tech" companies that are NOT headquartered
#     in California (i.e. "ca")
#
# (b) Show their tickers   (another command)
#
# (c) Show their headquarters (hq)    (another command)
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# ANSWER - (a) Show the names of all "tech" companies that are NOT headquartered
#              in California (i.e. "ca")

company [   sector == "tech" & hq != "ca"]
[1] "microsoft" "oracle"   
# ANSWER - (b) Show their tickers   (another command)
ticker [   sector == "tech" & hq != "ca"]
[1] "msft" "orcl"
# ANSWER - (c) Show their headquarters (hq)    (another command)
hq [   sector == "tech" & hq != "ca"]
[1] "wa" "tx"
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# QUESTION
#
# (a) Show the names of the companies in alphabetical order that are
#     headquartered in either NY or CA
#
# (b) Show their headquarters (hq)    (another command)
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# ANSWER - (a) Show the names of the companies in alphabetical order that are
#              headquartered in either NY or CA

# These are the companies

company [  hq == "ny" | hq == "ca" ] 
[1] "JP Chase"      "citigrp"       "pfizer"        "google"       
[5] "facebook"      "goldman sachs"
# Use sort to put them in alphabetical order

sort ( company [  hq == "ny" | hq == "ca" ] )
[1] "citigrp"       "facebook"      "goldman sachs" "google"       
[5] "JP Chase"      "pfizer"       
# ANSWER - (b) Show their headquarters (hq)    (another command)
#
# NOTE - the headquarters are shown in the original order of the companies,
# not in the sorted order of the companies - see the answer to part (a).
# Using what we've learned so far there is no simple way to put
# the headquarters in the sorted order of the companies.
# We will soon learn about dataframes which does allow for sorting one variable
# based upon the values in a different variable.


company [  hq == "ny" | hq == "ca" ] 
[1] "JP Chase"      "citigrp"       "pfizer"        "google"       
[5] "facebook"      "goldman sachs"
hq [  hq == "ny" | hq == "ca" ] 
[1] "ny" "ny" "ny" "ca" "ca" "ny"
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# QUESTION
# (a) Show the names of the tech companies whose stock increased in value
# (b) Show their open values  (another command)
# (c) Show their close values   (another command) 
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# ANSWER - (a) Show the names of the tech companies whose stock increased in value
company[  sector == "tech"  &  close > open ]
[1] "google"    "microsoft" "oracle"   
# ANSWER - (b) Show their open values  (another command)
open[  sector == "tech"  &  close > open ]
[1] 2300.00  308.61   98.42
# (c) Show their close values   (another command) 
close[  sector == "tech"  &  close > open ]
[1] 2405  350  104
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .. . . . . .
# QUESTION
# (a) Show the names of the tech companies whose stock increased in value by 10% or more
# (b) Show their open values  (another command)
# (c) Show their close values   (another command) 
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# ANSWER

# (a) Show the names of the tech companies whose stock increased in value by 10% or more
company[  sector == "tech"  &  close >= open + open * .1 ]
[1] "microsoft"
# (b) Show their open values  (another command)
open[  sector == "tech"  &  close >= open + open * .1 ]
[1] 308.61
# (c) Show their close values   (another command) 
close[  sector == "tech"  &  close >= open + open * .1 ]
[1] 350
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# QUESTION
# (a) Show the names of the tech and pharma companies whose stock increased in value by 10% or more
#
# (b) Show their sectors   (another command)
#
# (c) Show their open values  (another command)
#
# (d) Show their close values   (another command) 
#
# (e) Show the percent increases of the tech and pharma companies whose stock increased in value by 10% or more
#     HINT: calculate a new vector that calculates the percent increases of all 
#     the stocks. Then use that new vector to get just the percent increases for
#     the companies we want. You can do this all in one command or in different
#     commands.
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# (a) Show the names of the tech and pharma companies whose stock increased in value by 10% or more

company[  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1] "pfizer"    "microsoft"
# Note - the parentheses above ARE required. 
# 
# This is wrong since the & happens before | due to order of operations, you will get 
# ALL tech companies and not just those who increased in value by 10%
company[  sector == "tech" | sector == "pharma"  &  close >= open + open * .1 ]  # WRONG
[1] "pfizer"    "google"    "microsoft" "facebook"  "oracle"   
# This is wrong since the & happens before | due to order of operations, you will get 
# ALL pharma companies and not just those who increased in value by 10%
company[  close >= open + open * .1  &  sector == "tech" | sector == "pharma"   ]
[1] "pfizer"    "moderna"   "microsoft" "j&j"      
# ANSWER - (b) Show their sectors   (another command)

sector[  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1] "pharma" "tech"  
# ANSWER - (c) Show their open values  (another command)
open[  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1]  93.04 308.61
# ANSWER - (d) Show their close values   (another command) 
close[  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1] 120 350
# ANSWER - (e) Show the percent increases of the tech and pharma companies whose stock increased in value by 10% or more
#     HINT: calculate a new vector that calculates the percent increases of all 
#     the stocks. Then use that new vector to get just the percent increases for
#     the companies we want. You can do this all in one command or in different
#     commands.


# One way - in two commands
pctIncreases = ( close - open) / open * 100
pctIncreases
 [1]  -6.432749  -1.980198  28.976784   5.793743   4.565217  13.411749
 [7]  -7.975460 -14.665366  12.328370   5.669579
pctIncreases[  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1] 28.97678 13.41175
# Another way - all in one command

( ( close - open) / open * 100) [  ( sector == "tech" | sector == "pharma" )  &  close >= open + open * .1 ]
[1] 28.97678 13.41175
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# QUESTION
# (a) Show the tickers of "fin" and "tech" companies whose stock opened or closed over $100
# (b) Show their sectors   (another command)
# (c) Show their open values  (another command)
# (d) Show their close values   (another command) 
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# (a) Show the tickers of "fin" and "tech" companies whose stock opened or closed over $100
ticker [ ( sector == "fin" | sector == "tech") &  ( open >= 100 | close >= 100 )  ]
[1] "jpm"  "c"    "goog" "msft" "fb"   "gs"   "orcl"
# (b) Show their sectors   (another command)
sector [ ( sector == "fin" | sector == "tech") &  ( open >= 100 | close >= 100 )  ]
[1] "fin"  "fin"  "tech" "tech" "tech" "fin"  "tech"
# (c) Show their open values  (another command)
open [ ( sector == "fin" | sector == "tech") &  ( open >= 100 | close >= 100 )  ]
[1]  171.00  101.00 2300.00  308.61  326.00  414.41   98.42
# (d) Show their close values   (another command) 
close [ ( sector == "fin" | sector == "tech") &  ( open >= 100 | close >= 100 )  ]
[1]  160.0   99.0 2405.0  350.0  300.0  465.5  104.0