17  17. JSON Parsing Practice in R

These exercises ask you to retrieve specific information from JSON that has been processed by the fromJSON() function.

Each exercise includes some JSON and then specific questions that ask you to retrieve specific data from that JSON.

The questions are rated easy,medium,harder based on the following:

easy questions: data is readily accessible by simply using list, dataframe and matrix notation

medium questions: you might need additional functions such as unlist() or others.

harder questions: require to you to use either apply functions or loops to solve the problem.

library(jsonlite)

17.1 Exercise: Basic JSON Object Access

Answer the questions below by referring to the following JSON response from an API.

Click to see JSON
# API response stored in variable 'weather_data'
weather_data <- fromJSON('{
  "location": "Seattle",
  "current": {
    "temperature": 12.5,
    "conditions": "Cloudy",
    "wind": {
      "speed": 15.5,
      "direction": "NW"
    }
  },
  "forecast": [
    {"day": "Monday", "high": 14, "low": 8},
    {"day": "Tuesday", "high": 16, "low": 9},
    {"day": "Wednesday", "high": 15, "low": 7}
  ]
}')
Click to see fromJSON() output
# THE STRUCTURE
str(weather_data)
List of 3
 $ location: chr "Seattle"
 $ current :List of 3
  ..$ temperature: num 12.5
  ..$ conditions : chr "Cloudy"
  ..$ wind       :List of 2
  .. ..$ speed    : num 15.5
  .. ..$ direction: chr "NW"
 $ forecast:'data.frame':   3 obs. of  3 variables:
  ..$ day : chr [1:3] "Monday" "Tuesday" "Wednesday"
  ..$ high: int [1:3] 14 16 15
  ..$ low : int [1:3] 8 9 7
# THE DATA
weather_data
$location
[1] "Seattle"

$current
$current$temperature
[1] 12.5

$current$conditions
[1] "Cloudy"

$current$wind
$current$wind$speed
[1] 15.5

$current$wind$direction
[1] "NW"



$forecast
        day high low
1    Monday   14   8
2   Tuesday   16   9
3 Wednesday   15   7

17.1.1 Question (easy): Write R code to extract the current temperature.

Click for answer
weather_data$current$temperature
[1] 12.5
# or alternatively
weather_data[["current"]][["temperature"]]
[1] 12.5

17.1.2 Question (easy): Write R code to create a vector of all forecasted high temperatures.

Click for answer
weather_data$forecast$high
[1] 14 16 15
# or
weather_data$forecast[,"high"]
[1] 14 16 15

17.2 Exercise: E-commerce Product Data

Answer the questions below by referring to the following JSON response from an e-commerce API.

Click to see JSON
# API response stored in variable 'product_data'
product_data <- fromJSON('{
  "store": "TechStore",
  "products": [
    {
      "id": "p123",
      "name": "Wireless Headphones",
      "price": 89.99,
      "stock": 45,
      "colors": ["black", "white", "blue"],
      "specs": {
        "battery": "20 hours",
        "connectivity": "Bluetooth 5.0",
        "weight": "250g"
      }
    },
    {
      "id": "p124",
      "name": "Smart Watch",
      "price": 199.99,
      "stock": 28,
      "colors": ["black", "silver"],
      "specs": {
        "battery": "48 hours",
        "connectivity": "Bluetooth 5.0",
        "weight": "45g"
      }
    },
    {
      "id": "p125",
      "name": "Wireless Charger",
      "price": 29.99,
      "stock": 120,
      "colors": ["black", "white"],
      "specs": {
        "input": "QC 3.0",
        "output": "15W",
        "weight": "80g"
      }
    }
  ],
  "last_updated": "2024-02-20"
}')
Click to see fromJSON() output
# THE STRUCTURE
str(product_data)
List of 3
 $ store       : chr "TechStore"
 $ products    :'data.frame':   3 obs. of  6 variables:
  ..$ id    : chr [1:3] "p123" "p124" "p125"
  ..$ name  : chr [1:3] "Wireless Headphones" "Smart Watch" "Wireless Charger"
  ..$ price : num [1:3] 90 200 30
  ..$ stock : int [1:3] 45 28 120
  ..$ colors:List of 3
  .. ..$ : chr [1:3] "black" "white" "blue"
  .. ..$ : chr [1:2] "black" "silver"
  .. ..$ : chr [1:2] "black" "white"
  ..$ specs :'data.frame':  3 obs. of  5 variables:
  .. ..$ battery     : chr [1:3] "20 hours" "48 hours" NA
  .. ..$ connectivity: chr [1:3] "Bluetooth 5.0" "Bluetooth 5.0" NA
  .. ..$ weight      : chr [1:3] "250g" "45g" "80g"
  .. ..$ input       : chr [1:3] NA NA "QC 3.0"
  .. ..$ output      : chr [1:3] NA NA "15W"
 $ last_updated: chr "2024-02-20"
# THE DATA
product_data
$store
[1] "TechStore"

$products
    id                name  price stock             colors specs.battery
1 p123 Wireless Headphones  89.99    45 black, white, blue      20 hours
2 p124         Smart Watch 199.99    28      black, silver      48 hours
3 p125    Wireless Charger  29.99   120       black, white          <NA>
  specs.connectivity specs.weight specs.input specs.output
1      Bluetooth 5.0         250g        <NA>         <NA>
2      Bluetooth 5.0          45g        <NA>         <NA>
3               <NA>          80g      QC 3.0          15W

$last_updated
[1] "2024-02-20"

17.2.1 Question (easy): Write R code to extract the name and price of the second product.

Click for answer
# Extract name
product_data$products$name[2]
[1] "Smart Watch"
# or
product_data$products[2, "name"]
[1] "Smart Watch"
# Extract price
product_data$products$price[2]
[1] 199.99
# or
product_data$products[2, "price"]
[1] 199.99
# Combined as a named vector
c(name = product_data$products$name[2], price = product_data$products$price[2])
         name         price 
"Smart Watch"      "199.99" 

17.2.2 Question (easy): Write R code to extract all available colors for the Wireless Headphones product.

Click for answer
# Find the row index for the Wireless Headphones
headphone_index <- which(product_data$products$name == "Wireless Headphones")

# Extract the colors
product_data$products$colors[[headphone_index]]
[1] "black" "white" "blue" 

17.2.3 Question (easy): Write R code to create a data frame showing each product’s name, price, and stock.

Click for answer
data.frame(
  name = product_data$products$name,
  price = product_data$products$price,
  stock = product_data$products$stock
)
                 name  price stock
1 Wireless Headphones  89.99    45
2         Smart Watch 199.99    28
3    Wireless Charger  29.99   120
# Alternatively
product_data$products[, c("name", "price", "stock")]
                 name  price stock
1 Wireless Headphones  89.99    45
2         Smart Watch 199.99    28
3    Wireless Charger  29.99   120

17.3 Exercise: Restaurant Menu API

Answer the questions below by referring to the following JSON response from a restaurant menu API.

Click to see JSON
# API response stored in variable 'menu_data'
menu_data <- fromJSON('{
  "restaurant": {
    "name": "Flavor Haven",
    "location": "123 Main St",
    "hours": "11am-10pm",
    "menu": {
      "appetizers": [
        {"item": "Garlic Bread", "price": 5.99, "vegetarian": true},
        {"item": "Buffalo Wings", "price": 9.99, "vegetarian": false},
        {"item": "Mozzarella Sticks", "price": 7.99, "vegetarian": true}
      ],
      "main_courses": [
        {"item": "Margherita Pizza", "price": 14.99, "vegetarian": true, "sizes": ["small", "medium", "large"]},
        {"item": "Beef Burger", "price": 12.99, "vegetarian": false, "sides": ["fries", "salad"]},
        {"item": "Fettuccine Alfredo", "price": 13.99, "vegetarian": true, "options": ["add chicken"]}
      ],
      "desserts": [
        {"item": "Chocolate Cake", "price": 6.99, "vegetarian": true},
        {"item": "Ice Cream", "price": 4.99, "vegetarian": true, "flavors": ["vanilla", "chocolate", "strawberry"]}
      ]
    }
  }
}')
Click to see fromJSON() output
# THE STRUCTURE
str(menu_data)
List of 1
 $ restaurant:List of 4
  ..$ name    : chr "Flavor Haven"
  ..$ location: chr "123 Main St"
  ..$ hours   : chr "11am-10pm"
  ..$ menu    :List of 3
  .. ..$ appetizers  :'data.frame': 3 obs. of  3 variables:
  .. .. ..$ item      : chr [1:3] "Garlic Bread" "Buffalo Wings" "Mozzarella Sticks"
  .. .. ..$ price     : num [1:3] 5.99 9.99 7.99
  .. .. ..$ vegetarian: logi [1:3] TRUE FALSE TRUE
  .. ..$ main_courses:'data.frame': 3 obs. of  6 variables:
  .. .. ..$ item      : chr [1:3] "Margherita Pizza" "Beef Burger" "Fettuccine Alfredo"
  .. .. ..$ price     : num [1:3] 15 13 14
  .. .. ..$ vegetarian: logi [1:3] TRUE FALSE TRUE
  .. .. ..$ sizes     :List of 3
  .. .. .. ..$ : chr [1:3] "small" "medium" "large"
  .. .. .. ..$ : NULL
  .. .. .. ..$ : NULL
  .. .. ..$ sides     :List of 3
  .. .. .. ..$ : NULL
  .. .. .. ..$ : chr [1:2] "fries" "salad"
  .. .. .. ..$ : NULL
  .. .. ..$ options   :List of 3
  .. .. .. ..$ : NULL
  .. .. .. ..$ : NULL
  .. .. .. ..$ : chr "add chicken"
  .. ..$ desserts    :'data.frame': 2 obs. of  4 variables:
  .. .. ..$ item      : chr [1:2] "Chocolate Cake" "Ice Cream"
  .. .. ..$ price     : num [1:2] 6.99 4.99
  .. .. ..$ vegetarian: logi [1:2] TRUE TRUE
  .. .. ..$ flavors   :List of 2
  .. .. .. ..$ : NULL
  .. .. .. ..$ : chr [1:3] "vanilla" "chocolate" "strawberry"
# THE DATA
menu_data
$restaurant
$restaurant$name
[1] "Flavor Haven"

$restaurant$location
[1] "123 Main St"

$restaurant$hours
[1] "11am-10pm"

$restaurant$menu
$restaurant$menu$appetizers
               item price vegetarian
1      Garlic Bread  5.99       TRUE
2     Buffalo Wings  9.99      FALSE
3 Mozzarella Sticks  7.99       TRUE

$restaurant$menu$main_courses
                item price vegetarian                sizes        sides
1   Margherita Pizza 14.99       TRUE small, medium, large         NULL
2        Beef Burger 12.99      FALSE                 NULL fries, salad
3 Fettuccine Alfredo 13.99       TRUE                 NULL         NULL
      options
1        NULL
2        NULL
3 add chicken

$restaurant$menu$desserts
            item price vegetarian                        flavors
1 Chocolate Cake  6.99       TRUE                           NULL
2      Ice Cream  4.99       TRUE vanilla, chocolate, strawberry

17.3.1 Question (easy): Write R code to extract the name and location of the restaurant.

Click for answer
# Extract name
menu_data$restaurant$name
[1] "Flavor Haven"
# Extract location
menu_data$restaurant$location
[1] "123 Main St"
# Combined as a named vector
c(name = menu_data$restaurant$name, location = menu_data$restaurant$location)
          name       location 
"Flavor Haven"  "123 Main St" 

17.3.2 Question (easy): Write R code to extract all vegetarian items from the appetizers section.

Click for answer
# Filter the appetizers data frame for vegetarian items
vegetarian_apps <- menu_data$restaurant$menu$appetizers[menu_data$restaurant$menu$appetizers$vegetarian == TRUE, ]

# Just the names of vegetarian appetizers
vegetarian_app_names <- menu_data$restaurant$menu$appetizers$item[menu_data$restaurant$menu$appetizers$vegetarian == TRUE]

# Show the result
vegetarian_apps
               item price vegetarian
1      Garlic Bread  5.99       TRUE
3 Mozzarella Sticks  7.99       TRUE
# or
vegetarian_app_names
[1] "Garlic Bread"      "Mozzarella Sticks"

17.3.3 Question (easy): Write R code to calculate the average price of all main courses.

Click for answer
mean(menu_data$restaurant$menu$main_courses$price)
[1] 13.99

17.4 Exercise: Nested Arrays

Answer the questions below by referring to the following JSON response from an API.

Click to see JSON
# API response stored in variable 'library_data'
library_data <- fromJSON('{
  "library": {
    "name": "Central Library",
    "books": [
      {
        "title": "Data Science Basics",
        "authors": ["Smith, J.", "Jones, K."],
        "categories": ["programming", "statistics"],
        "ratings": [4.5, 4.8, 4.2]
      },
      {
        "title": "Statistics Using R",
        "authors": ["Smith, J.", "Jones, K."],
        "categories": ["statistics", "R"],
        "ratings": [4.5, 4.8, 4.2]
      },
      {
        "title": "R Programming Guide",
        "authors": ["Wilson, M."],
        "categories": ["programming", "R"],
        "ratings": [4.7, 4.6, 4.9, 4.5]
      }
    ]
  }
}')
Click to see fromJSON() output
# THE STRUCTURE
str(library_data)
List of 1
 $ library:List of 2
  ..$ name : chr "Central Library"
  ..$ books:'data.frame':   3 obs. of  4 variables:
  .. ..$ title     : chr [1:3] "Data Science Basics" "Statistics Using R" "R Programming Guide"
  .. ..$ authors   :List of 3
  .. .. ..$ : chr [1:2] "Smith, J." "Jones, K."
  .. .. ..$ : chr [1:2] "Smith, J." "Jones, K."
  .. .. ..$ : chr "Wilson, M."
  .. ..$ categories:List of 3
  .. .. ..$ : chr [1:2] "programming" "statistics"
  .. .. ..$ : chr [1:2] "statistics" "R"
  .. .. ..$ : chr [1:2] "programming" "R"
  .. ..$ ratings   :List of 3
  .. .. ..$ : num [1:3] 4.5 4.8 4.2
  .. .. ..$ : num [1:3] 4.5 4.8 4.2
  .. .. ..$ : num [1:4] 4.7 4.6 4.9 4.5
# THE DATA
library_data
$library
$library$name
[1] "Central Library"

$library$books
                title              authors              categories
1 Data Science Basics Smith, J., Jones, K. programming, statistics
2  Statistics Using R Smith, J., Jones, K.           statistics, R
3 R Programming Guide           Wilson, M.          programming, R
             ratings
1      4.5, 4.8, 4.2
2      4.5, 4.8, 4.2
3 4.7, 4.6, 4.9, 4.5

17.4.1 Question (medium): Write code to create a vector of all unique book categories

across all books.

Click for answer
unique(unlist(library_data$library$books$categories))
[1] "programming" "statistics"  "R"          

17.4.2 Question (harder): get named vector of ratings for each book

Calculate the average rating for each book and store it in a named vector where names are book titles.

Click for answer
bookTitles = sapply(library_data$library$books$ratings, mean)
names(bookTitles) = library_data$library$books$title
bookTitles
Data Science Basics  Statistics Using R R Programming Guide 
              4.500               4.500               4.675 

17.5 Exercise: Complex Nested Structures

Answer the questions below by referring to the following JSON response from a social media API.
The API was called with the following URL: https://api.somesite.com/userinfo/Alex Chen

Click to see JSON
# API response stored in variable 'social_data'
# The response includes info about a single user "Alex Chen"
social_data <- fromJSON('{
  "user": {
    "id": "u123",
    "name": "Alex Chen",
    "posts": [
      {
        "id": "p1",
        "content": "Learning R!",
        "timestamp": "2024-02-15",
        "comments": [
          {
            "user": "Carol Wu",
            "text": "Check out tidyverse",
            "likes": 5
          },
          {
            "user": "Bob Smith",
            "text": "Great choice!",
            "likes": 3
          },
          {
            "user": "Carol Wu",
            "text": "tidyverse makes many things easier",
            "likes": 2
          }
        ]
      },
      {
        "id": "p2",
        "content": "JSON parsing is fun",
        "timestamp": "2024-02-16",
        "comments": [
          {
            "user": "Mike Jones",
            "text": "Try jsonlite",
            "likes": 4
          }
        ]
      },
      {
        "id": "p3",
        "content": "Coffee is key :)",
        "timestamp": "2024-02-16",
        "comments": [
          {
            "user": "Carol Wu",
            "text": "I totally agree!",
            "likes": 1
          },
          {
            "user": "Mike Jones",
            "text": "yessss",
            "likes": 0
          }
        ]
      }

    ]
  }
}')
Click to see fromJSON() output
# THE STRUCTURE
str(social_data)
List of 1
 $ user:List of 3
  ..$ id   : chr "u123"
  ..$ name : chr "Alex Chen"
  ..$ posts:'data.frame':   3 obs. of  4 variables:
  .. ..$ id       : chr [1:3] "p1" "p2" "p3"
  .. ..$ content  : chr [1:3] "Learning R!" "JSON parsing is fun" "Coffee is key :)"
  .. ..$ timestamp: chr [1:3] "2024-02-15" "2024-02-16" "2024-02-16"
  .. ..$ comments :List of 3
  .. .. ..$ :'data.frame':  3 obs. of  3 variables:
  .. .. .. ..$ user : chr [1:3] "Carol Wu" "Bob Smith" "Carol Wu"
  .. .. .. ..$ text : chr [1:3] "Check out tidyverse" "Great choice!" "tidyverse makes many things easier"
  .. .. .. ..$ likes: int [1:3] 5 3 2
  .. .. ..$ :'data.frame':  1 obs. of  3 variables:
  .. .. .. ..$ user : chr "Mike Jones"
  .. .. .. ..$ text : chr "Try jsonlite"
  .. .. .. ..$ likes: int 4
  .. .. ..$ :'data.frame':  2 obs. of  3 variables:
  .. .. .. ..$ user : chr [1:2] "Carol Wu" "Mike Jones"
  .. .. .. ..$ text : chr [1:2] "I totally agree!" "yessss"
  .. .. .. ..$ likes: int [1:2] 1 0
# THE DATA
social_data
$user
$user$id
[1] "u123"

$user$name
[1] "Alex Chen"

$user$posts
  id             content  timestamp
1 p1         Learning R! 2024-02-15
2 p2 JSON parsing is fun 2024-02-16
3 p3    Coffee is key :) 2024-02-16
                                                                                                        comments
1 Carol Wu, Bob Smith, Carol Wu, Check out tidyverse, Great choice!, tidyverse makes many things easier, 5, 3, 2
2                                                                                    Mike Jones, Try jsonlite, 4
3                                                           Carol Wu, Mike Jones, I totally agree!, yessss, 1, 0

17.5.1 Question (harder): Create a data frame containing each comment’s user and number of likes.

Click for answer
# Method 1 - extract vectors separately and combine into a dataframe
users = unlist(sapply(social_data$user$posts$comments, function(x) x$user))
likes = unlist(sapply(social_data$user$posts$comments, function(x) x$likes))
data.frame(user=users, likes=likes)
        user likes
1   Carol Wu     5
2  Bob Smith     3
3   Carol Wu     2
4 Mike Jones     4
5   Carol Wu     1
6 Mike Jones     0
# Method 2: Using do.call to combine list of data frames
do.call(rbind, lapply(social_data$user$posts$comments, function(comments) {
  comments
}))[, c("user", "likes")]
        user likes
1   Carol Wu     5
2  Bob Smith     3
3   Carol Wu     2
4 Mike Jones     4
5   Carol Wu     1
6 Mike Jones     0
# Method 3: Using tidyverse if available
library(tidyverse)
Warning: package 'stringr' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()  masks stats::filter()
✖ purrr::flatten() masks jsonlite::flatten()
✖ dplyr::lag()     masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
bind_rows(social_data$user$posts$comments)[,c("user","likes")]
        user likes
1   Carol Wu     5
2  Bob Smith     3
3   Carol Wu     2
4 Mike Jones     4
5   Carol Wu     1
6 Mike Jones     0

17.5.2 Question (harder): function returning all comments by specific user

Create a function that takes the social_data object and a username as parameters, then returns all comments made by that user.

Click for answer
find_user_comments <- function(data, username) {
  
  # CommentsListDf is a list of dataframes. Each dataframe in the
  # list contains the columns: user, text, likes
  commentsListDf = data$user$posts$comments
  
  # commentsList is a list of vectors. The vectors contain just the comments for 
  # the specified username. There is one vector in the list from each of
  # the original dataframes.
  commentsList = lapply(commentsListDf, function(df){ df[df$user==username, "text"]})
  
  # Combine all of the vectors in the list into a single vector.
  return( unlist(commentsList) )
}

# Test the function
find_user_comments(social_data, "Carol Wu")
[1] "Check out tidyverse"                "tidyverse makes many things easier"
[3] "I totally agree!"                  

17.6 Exercise: Working with Lists of Data Frames

Answer the questions below by referring to the following JSON response from a fitness tracking API:

Click to see JSON
# API response stored in variable 'fitness_data'
fitness_data <- fromJSON('{
  "user_id": "f789",
  "tracking_data": {
    "daily_steps": [
      {"date": "2024-02-14", "steps": 8432, "active_minutes": 45},
      {"date": "2024-02-15", "steps": 10234, "active_minutes": 62},
      {"date": "2024-02-16", "steps": 7321, "active_minutes": 38}
    ],
    "workouts": [
      {
        "type": "running",
        "sessions": [
          {"date": "2024-02-14", "duration": 30, "distance": 5.2},
          {"date": "2024-02-14", "duration": 25, "distance": 3.9},
          {"date": "2024-02-16", "duration": 25, "distance": 4.1}
        ]
      },
      {
        "type": "cycling",
        "sessions": [
          {"date": "2024-02-14", "duration": 30, "distance": 9.1},
          {"date": "2024-02-15", "duration": 45, "distance": 15.3}
        ]
      }
    ]
  }
}')
Click to see fromJSON() output
# THE STRUCTURE
str(fitness_data)
List of 2
 $ user_id      : chr "f789"
 $ tracking_data:List of 2
  ..$ daily_steps:'data.frame': 3 obs. of  3 variables:
  .. ..$ date          : chr [1:3] "2024-02-14" "2024-02-15" "2024-02-16"
  .. ..$ steps         : int [1:3] 8432 10234 7321
  .. ..$ active_minutes: int [1:3] 45 62 38
  ..$ workouts   :'data.frame': 2 obs. of  2 variables:
  .. ..$ type    : chr [1:2] "running" "cycling"
  .. ..$ sessions:List of 2
  .. .. ..$ :'data.frame':  3 obs. of  3 variables:
  .. .. .. ..$ date    : chr [1:3] "2024-02-14" "2024-02-14" "2024-02-16"
  .. .. .. ..$ duration: int [1:3] 30 25 25
  .. .. .. ..$ distance: num [1:3] 5.2 3.9 4.1
  .. .. ..$ :'data.frame':  2 obs. of  3 variables:
  .. .. .. ..$ date    : chr [1:2] "2024-02-14" "2024-02-15"
  .. .. .. ..$ duration: int [1:2] 30 45
  .. .. .. ..$ distance: num [1:2] 9.1 15.3
# THE DATA
fitness_data
$user_id
[1] "f789"

$tracking_data
$tracking_data$daily_steps
        date steps active_minutes
1 2024-02-14  8432             45
2 2024-02-15 10234             62
3 2024-02-16  7321             38

$tracking_data$workouts
     type                                                      sessions
1 running 2024-02-14, 2024-02-14, 2024-02-16, 30, 25, 25, 5.2, 3.9, 4.1
2 cycling                     2024-02-14, 2024-02-15, 30, 45, 9.1, 15.3

17.6.1 Question (harder): Create a data frame with total distance covered for each type of workout.

Click for answer
workout_summary <- data.frame(
  type = fitness_data$tracking_data$workouts$type,
  total_distance = sapply(fitness_data$tracking_data$workouts$sessions, 
                         function(x) sum(x$distance))
)

workout_summary
     type total_distance
1 running           13.2
2 cycling           24.4