chr_data <- c("Data", "Daft", "YouTube", "channel",
"learn", "and", "have", "FUN!")15 15. stringr tutorial
This is an overview of the stringr package that is part of the “tidyverse” family of packages. The info in this section comes from this youtube playlist: https://www.youtube.com/watch?v=oIu5jK8DeX8&list=PLiC1doDIe9rDwsUhd3FtN1XGCV2ES1xZ2
See these resources for more info about the entire tidyverse family of packages.
See these links for more info about the stringr package
See these links for more info about other related tidyverse packagaes
- tibble package: https://tibble.tidyverse.org/
- magrittr: https://magrittr.tidyverse.org/
- lubridate package: https://lubridate.tidyverse.org/
- hms package: https://hms.tidyverse.org/
- dplyr package: https://dplyr.tidyverse.org/
- Also see this playlist for info about dplyr: https://www.youtube.com/watch?v=THGFXV4RW8U&list=PLiC1doDIe9rC8RgWPAWqDETE-VbKOWfWl
15.1 stringr: Basic String Manipulation
# Check the length of a string
str_length("Hi there! How are you?")Error in str_length("Hi there! How are you?"): could not find function "str_length"
str_length(chr_data)Error in str_length(chr_data): could not find function "str_length"
# Convert string letters to uppercase
str_to_upper(chr_data)Error in str_to_upper(chr_data): could not find function "str_to_upper"
# Convert string letters to lowercase
str_to_lower(chr_data)Error in str_to_lower(chr_data): could not find function "str_to_lower"
# Convert string to title (first letter uppercase)
str_to_title(chr_data)Error in str_to_title(chr_data): could not find function "str_to_title"
# Convert string to sentence (only first letter of first word uppercase)
str_to_sentence("make me into a SENTENCE!")Error in str_to_sentence("make me into a SENTENCE!"): could not find function "str_to_sentence"
# Trim whitespace
str_trim(" Trim Me! ")Error in str_trim(" Trim Me! "): could not find function "str_trim"
# Pad strings with whitespace
str_pad("Pad Me!", width = 15, side="both")Error in str_pad("Pad Me!", width = 15, side = "both"): could not find function "str_pad"
# Truncate strings to a given length
str_trunc("If you have a long string, you might want to truncate it!",
width = 50)Error in str_trunc("If you have a long string, you might want to truncate it!", : could not find function "str_trunc"
15.2 stringr: Split and Join Strings
# Split strings
str_split("Split Me!", pattern = " ")Error in str_split("Split Me!", pattern = " "): could not find function "str_split"
food <- c(
"apples and oranges and pears and bananas",
"pineapples and mangos and guavas"
)
stringr::str_split(food, " and ")[[1]]
[1] "apples" "oranges" "pears" "bananas"
[[2]]
[1] "pineapples" "mangos" "guavas"
# Join strings (equivalent to base R paste())
str_c("Join", "Me!", sep="_")Error in str_c("Join", "Me!", sep = "_"): could not find function "str_c"
# Join strings (equivalent to base R paste())
str_c(c("Join", "vectors"), c("Me!", "too!"), sep="_")Error in str_c(c("Join", "vectors"), c("Me!", "too!"), sep = "_"): could not find function "str_c"
# Collapse a vector of strings into a single string
str_c(c("Turn", "me", "into", "one", "string!"), collapse= " ")Error in str_c(c("Turn", "me", "into", "one", "string!"), collapse = " "): could not find function "str_c"
# Convert NA values in character vector to string "NA"
str_replace_na(c("Make", NA, "strings!"))Error in str_replace_na(c("Make", NA, "strings!")): could not find function "str_replace_na"
15.3 stringr: Sorting Strings
sort_data <- c("sort", "me", "please!")
# Get vector of indicies that would sort a string alphabetically
str_order(sort_data)Error in str_order(sort_data): could not find function "str_order"
# Use discovered ordering to extract data in sorted order
sort_data[str_order(sort_data)]Error in str_order(sort_data): could not find function "str_order"
# Directly extract sorted strings
str_sort(sort_data)Error in str_sort(sort_data): could not find function "str_sort"
# Extract in reverse sorted order
str_sort(sort_data, decreasing = TRUE)Error in str_sort(sort_data, decreasing = TRUE): could not find function "str_sort"
15.4 stringr: String Interpolation
first <- c("Luke", "Han", "Jean-Luc")
last <- c("Skywalker", "Solo", "Picard")
# Interpolate (insert variable values) into strings with str_glue()
str_glue("My name is {first}. {first} {last}.")Error in str_glue("My name is {first}. {first} {last}."): could not find function "str_glue"
minimum_age <- 18
over_minimum <- c(5, 17, 33)
# Interpolate the result of an execution into a string
str_glue("{first} {last} is {minimum_age + over_minimum} years old.")Error in str_glue("{first} {last} is {minimum_age + over_minimum} years old."): could not find function "str_glue"
num <- c(1:5)
# Interpolate the result of function calls
str_glue("The square root of {num} is {round(sqrt(num), 3)}.")Error in str_glue("The square root of {num} is {round(sqrt(num), 3)}."): could not find function "str_glue"
fuel_efficiency <- 30
# Interpolate strings using data from a data frame
mtcars %>% rownames_to_column("Model") %>%
filter(mpg > fuel_efficiency) %>%
str_glue_data("The {Model} gets {mpg} mpg.")Error in mtcars %>% rownames_to_column("Model") %>% filter(mpg > fuel_efficiency) %>% : could not find function "%>%"
15.5 stringr: String Matching
head(data,8) author score
1 butt_ghost 3
2 buntaro_pup 1
3 iidealized 2
4 [deleted] 1
5 stathibus 6
6 soulslicer0 2
7 swiftsecond 1
body
1 Hdf5. It's structured, it's easy to get data in and out, and it's fast. Plus it will scale if you ever get up there in dataset size.
2 yep, good point.
3 Google must have done (and is doing) serious internal research in ranking. I've heard they're pretty good at that and they've even made some money doing it :P
4 [deleted]
5 Sebastian Thrun's book, Probabilistic Robotics, goes through this in great detail. Get it, read it, make it your bible.
6 This. Such a legendary book. Kalman filters, particle filters, recursive Bayesian filters and a whole bunch of other stuff. I learnt so much. Read these 3 for starts from the book, then come back and ask the questions
7 Do you still need help?
# Detecting the presence of a pattern in strings
str_detect(data$body[1:100], pattern="deep")Error in str_detect(data$body[1:100], pattern = "deep"): could not find function "str_detect"
# Get the indicies of matched strings
str_inds <- str_which(data$body[1:100], pattern="deep")Error in str_which(data$body[1:100], pattern = "deep"): could not find function "str_which"
str_indsError: object 'str_inds' not found
# Extract matched strings using detected indicies
data$body[str_inds]Error: object 'str_inds' not found
# Count the number of matches
str_count(data$body[1:100], "deep")Error in str_count(data$body[1:100], "deep"): could not find function "str_count"
# Get the position of matches
str_locate_all(data$body[1], "deep")Error in str_locate_all(data$body[1], "deep"): could not find function "str_locate_all"
# Get a list of the first match found in each string as a vector
str_extract(data$body[1:3], "deep|the|and")Error in str_extract(data$body[1:3], "deep|the|and"): could not find function "str_extract"
# Get a list of the first match found in each string as matrix
str_match(data$body[1:3], "deep|the|and")Error in str_match(data$body[1:3], "deep|the|and"): could not find function "str_match"
# Get a list of the all matches found in each string as list of matricies
str_match_all(data$body[1:3], "deep|the|and")Error in str_match_all(data$body[1:3], "deep|the|and"): could not find function "str_match_all"
15.6 stringr: Subset and Replace Strings
head(data,8) author score
1 butt_ghost 3
2 buntaro_pup 1
3 iidealized 2
4 [deleted] 1
5 stathibus 6
6 soulslicer0 2
7 swiftsecond 1
body
1 Hdf5. It's structured, it's easy to get data in and out, and it's fast. Plus it will scale if you ever get up there in dataset size.
2 yep, good point.
3 Google must have done (and is doing) serious internal research in ranking. I've heard they're pretty good at that and they've even made some money doing it :P
4 [deleted]
5 Sebastian Thrun's book, Probabilistic Robotics, goes through this in great detail. Get it, read it, make it your bible.
6 This. Such a legendary book. Kalman filters, particle filters, recursive Bayesian filters and a whole bunch of other stuff. I learnt so much. Read these 3 for starts from the book, then come back and ask the questions
7 Do you still need help?
# Get a string subset based on character position
str_sub(data$body[1], start=1, end=100)Error in str_sub(data$body[1], start = 1, end = 100): could not find function "str_sub"
# Get a string subset based on words
word(data$body[1], start=1, end=10)Error in word(data$body[1], start = 1, end = 10): could not find function "word"
# Get the strings that contain a certain pattern
str_subset(data$body[1:100], pattern="deep")Error in str_subset(data$body[1:100], pattern = "deep"): could not find function "str_subset"
# Replace a substring with a new string by substring position
str_sub(data$body[1], start=1, end=100) <- str_to_upper(str_sub(data$body[1],
start=1,
end=100))Error in str_to_upper(str_sub(data$body[1], start = 1, end = 100)): could not find function "str_to_upper"
str_sub(data$body[1], start=1, end=100)Error in str_sub(data$body[1], start = 1, end = 100): could not find function "str_sub"
# Replace first occurrence of a substring with a new string by matching
str_replace(data$body[1], pattern="deep|DEEP", replacement="multi-layer")Error in str_replace(data$body[1], pattern = "deep|DEEP", replacement = "multi-layer"): could not find function "str_replace"
# Replace all occurrences of a substring with a new string by matching
str_replace_all(data$body[1], pattern="deep|DEEP", replacement="multi-layer")Error in str_replace_all(data$body[1], pattern = "deep|DEEP", replacement = "multi-layer"): could not find function "str_replace_all"
15.7 stringr: Viewing Strings
# Basic printing
print(data$body[1:10]) [1] "Hdf5. It's structured, it's easy to get data in and out, and it's fast. Plus it will scale if you ever get up there in dataset size."
[2] "yep, good point."
[3] "Google must have done (and is doing) serious internal research in ranking. I've heard they're pretty good at that and they've even made some money doing it :P"
[4] "[deleted]"
[5] "Sebastian Thrun's book, Probabilistic Robotics, goes through this in great detail. Get it, read it, make it your bible."
[6] "This. Such a legendary book. Kalman filters, particle filters, recursive Bayesian filters and a whole bunch of other stuff. I learnt so much. Read these 3 for starts from the book, then come back and ask the questions"
[7] "Do you still need help?"
[8] NA
[9] NA
[10] NA
deep_learning_posts <- data$body[str_which(data$body, "deep learning")]Error in str_which(data$body, "deep learning"): could not find function "str_which"
# View strings in HTML format with the first occurence of a pattern highlighted
str_view(deep_learning_posts, pattern="deep")Error in str_view(deep_learning_posts, pattern = "deep"): could not find function "str_view"
# View strings in HTML format with the first all occurences highlighted
str_view_all(deep_learning_posts, pattern="deep")Error in str_view_all(deep_learning_posts, pattern = "deep"): could not find function "str_view_all"
# Format strings into paragraphs of a given width with str_wrap()
wrapped <- str_wrap(data$body[str_which(data$body, "deep learning")][1],
width = 50)Error in str_wrap(data$body[str_which(data$body, "deep learning")][1], : could not find function "str_wrap"
wrapped Error: object 'wrapped' not found
# Print wrapped string with output obeying newlines
wrapped %>% cat()Error in wrapped %>% cat(): could not find function "%>%"
# Display wrapped paragraph as HTML, inserting paragraph breaks
str_wrap(data$body[str_which(data$body, "deep learning")][1], width = 50) %>%
str_replace_all("\n", "<br>") %>%
str_view_all(pattern = "deep")Error in str_wrap(data$body[str_which(data$body, "deep learning")][1], : could not find function "%>%"