TUTORIAL 6 - APIs and JSON data

Visualizations for Tabular Data

Tabular data…is maybe a bit more nuanced?
Tutorial
JSON
Tables
API
Author

Barrie Robison

Published

February 23, 2024

Code
library(tidyr)
library(dplyr)
library(repurrrsive)
library(tibblify)
library(purrr)
library(tidyverse)
library(readxl)
library(rjson)
library(jsonlite)
library(ggplot2)


Bio<-fromJSON("draft.json")

noway<-as.data.frame(Bio)

players<- noway %>%
  select(data.draftYear, data.playerId, data.playerName, data.draftYear,
         data.roundNumber, data.overallPickNumber, data.pickInRound,
         data.height, data.weight, data.position)

cleanplayers <- drop_na(players)
cleanplayers$playerId<-cleanplayers$data.playerId

cleanplayers2 <- cleanplayers %>%
  distinct(playerId, .keep_all= TRUE)

skaters<-cleanplayers2%>%
  filter(data.position!="G")
Code
api_url <- "https://statsapi.web.nhl.com"


recentdrafts<-skaters%>%
  filter(data.draftYear>1999)
recentdrafts$playerId<-recentdrafts$data.playerId
playerIds <- as.list(recentdrafts$data.playerId)

testloop<-data.frame()
i<-0
for (player_id in playerIds){
  
  

endpoint <- paste0("/api/v1/people/", player_id, "/stats?stats=yearByYear")


url_json <- paste0(api_url,endpoint)

raw_json <- url_json %>% 
  httr::GET() %>% 
  httr::content()

argh <- tibblify(raw_json$stats[[1]]$splits)

playerstats<- unnest_wider(argh, col=c(stat,league))
playerstats$playerId<-player_id
playerstatsfinal<-playerstats %>%
  filter(name=="National Hockey League") %>%
  select(season, games, points, goals, assists,  
         name, playerId) 
  
testloop<-rbind(testloop,playerstatsfinal)
i<-i+1
print(i)
}



d2000on<-full_join(testloop,recentdrafts, by="playerId")
Code
dataIwant <- d2000on
write.csv(dataIwant, file="NHLskaterstats.csv")
Code
library(dplyr)

dataIwant2<- dataIwant %>%
  mutate(seasonshort = as.numeric(str_sub(season, start=1, end=4)))

dataIwant2$postdraft<- dataIwant2$seasonshort-dataIwant2$data.draftYear

dataIwant2$postdraft <- paste("dY", dataIwant2$postdraft)

dataIwant3<-dataIwant2 %>%
  group_by(playerId, data.draftYear, data.playerName, data.roundNumber,
           data.pickInRound, data.overallPickNumber, data.position, data.height,
           data.weight, postdraft) %>%
  summarise(totgames=sum(games), totgoals=sum(goals), totassist=sum(assists), totpoint=sum(points))

datawide<-dataIwant3 %>%
  pivot_wider(names_from = postdraft, values_from = c(totgames, totgoals, totassist, totpoint),
              values_fill = 0)

allplayerswide<-full_join(cleanplayers,datawide, by="playerId")

allplayerswide <- allplayerswide %>% 
    mutate_at(19:106, ~replace_na(.,0))

lookup <- c(name = "data.playerName.x", 
            draftyear = "data.draftYear.x",
            round = "data.roundNumber.x",
            overall = "data.overallPickNumber.x",
            pickinRound = "data.pickInRound.x",
            height = "data.height.x",
            weight = "data.weight.x",
            position = "data.position.x")

testthing<-rename(allplayerswide, all_of(lookup))

keepthis<-testthing[c(1, 3:10, 19:106)]

games<-keepthis%>%
  select(c(1:9, starts_with("totgames")))

gameslong <- games %>%
  pivot_longer(
    cols = starts_with("totgames"),
    names_to = "postdraft",
    names_prefix = "totgames_dY ",
    values_to = "NHLgames")

gameslong <- gameslong%>%
  filter(postdraft!="NA")

gameslong$postdraft<-as.numeric(gameslong$postdraft)


points<-keepthis%>%
  select(c(1:9, starts_with("totpoint")))

pointslong <- points %>%
  pivot_longer(
    cols = starts_with("totpoint"),
    names_to = "postdraft",
    names_prefix = "totpoint_dY ",
    values_to = "points")

pointslong <- pointslong%>%
  filter(postdraft!="NA")

pointslong$postdraft<-as.numeric(pointslong$postdraft)

assists<-keepthis%>%
  select(c(1:9, starts_with("totassist")))

assistslong <- assists %>%
  pivot_longer(
    cols = starts_with("totassist"),
    names_to = "postdraft",
    names_prefix = "totassist_dY ",
    values_to = "assists")

assistslong <- assistslong%>%
  filter(postdraft!="NA")

assistslong$postdraft<-as.numeric(assistslong$postdraft)

goals<-keepthis%>%
  select(c(1:9, starts_with("totgoal")))

goalslong <- goals %>%
  pivot_longer(
    cols = starts_with("totgoal"),
    names_to = "postdraft",
    names_prefix = "totgoals_dY ",
    values_to = "goals")

goalslong <- goalslong%>%
  filter(postdraft!="NA")

goalslong$postdraft<-as.numeric(goalslong$postdraft)



gamegoal<- left_join(gameslong,goalslong, by = c("playerId", "draftyear",
                                                 "name", "round", "pickinRound",
                                                 "height", "weight", "position",
                                                 "overall", "postdraft"))
gga<-left_join(gamegoal,assistslong, by = c("playerId", "draftyear",
                                                 "name", "round", "pickinRound",
                                                 "height", "weight", "position",
                                                 "overall", "postdraft"))
ggap<-left_join(gga,pointslong, by = c("playerId", "draftyear",
                                                 "name", "round", "pickinRound",
                                                 "height", "weight", "position",
                                                 "overall", "postdraft"))




statslong <- ggap %>%
  mutate(position = replace(position, position == "G", "Goaltender"),
         position = replace(position, position == "D", "Defense"),
         position = replace(position, position == "C" | position =="LW", "Forward"),
         position = replace(position, position == "RW" | position == "F", "Forward"),
         position = replace(position, position == "C/LW" | position == "C/RW", "Forward"),
         position = replace(position, position == "LW/RW", "Forward"))
  

actualdata<- statslong %>%
  filter(round<8, draftyear>2000)

actualdata2<-actualdata%>%
  distinct(playerId,postdraft, .keep_all = TRUE)




 
write.csv(actualdata2, file = "NHLdraftstats.csv")