This document describes the process for updating USGDPpresidents {Ecdat}.
Start by checking the span of years in USGDPpresidents:
library(Ecdat)
## Loading required package: Ecfun
##
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
##
## sign
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
(rngYrs <- range(USGDPpresidents$Year))
## [1] 1610 2018
Next download “GDP - US” and “CPI - US” from Measuring Worth. On 2019-04-06 this produced two csv files, which I downloaded and copied into the working directory:
getwd()
## [1] "/private/var/folders/mh/mrm_14nx19g13lsnj9zmvwjr0000gn/T/RtmpVizbKS/Rbuild4b0f13bed01a/Ecfun/vignettes"
(csv2 <- dir(pattern='\\.csv$'))
## character(0)
if(length(csv2)==2){
Update0 <- TRUE
} else Update0 <- FALSE
The following assumes there are only 2 csv files in the working directory at this time, with the CPI being the first (in lexicographical order). If this is not correct, something needs to change for the following to work properly.
Read these two files:
Update <- FALSE
if(Update0){
str(USCPI <- read.csv(csv2[1], skip=2))
str(USGDP. <- read.csv(csv2[2], skip=1))
library(Ecfun)
USGDP <- asNumericDF(USGDP.)
print(rngCPIyrs <- range(USCPI$Year) )
print(rngGDPyrs <- range(USGDP$Year) )
endYr <- max(rngCPIyrs, rngGDPyrs)
if(endYr>rngYrs[2]) print(Update <- TRUE)
}
If Update, create a local copy of USGDPpresidents with the additional rows required to hold the new data:
if(Update){
rowsNeeded <- (endYr - rngYrs[2])
Nold <- nrow(USGDPpresidents)
iRep <- c(1:Nold, rep(Nold, rowsNeeded))
USGDPp2 <- USGDPpresidents[iRep,]
}
Fix the Year and insert NAs for all other columns for the new rows:
if(Update){
iNew <- (Nold+(1:rowsNeeded))
USGDPp2$Year[iNew] <- ((rngYrs[2]+1):endYr)
rownames(USGDPp2) <- USGDPp2$Year
#
USGDPp2[iNew, -1] <- NA
}
Now replace CPI by the new numbers:
if(Update){
selCPI <- (USGDPp2$Year %in% USCPI$Year)
if(any(!is.na(USGDPp2[!selCPI, 2]))){
stop('ERROR: There are CPI numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
USGDPp2$CPI[selCPI] <- USCPI[,2]
}
Does USGDPpresidents.Rd needs to be updated to reflect the proper reference years for the CPI?
if(Update){
readLines(csv2[1], n=4)
}
If this says “Average 1982-84 = 100”, it should be good. Otherwise that (and this) should be updated.
Now let’s update GDPdeflator:
if(Update){
selGDP <- (USGDPp2$Year %in% USGDP$Year)
#
if(any(!is.na(USGDPp2[!selGDP, 'GDPdeflator']))){
stop('ERROR: There are GDPdeflator numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selDefl <- grep('Deflator', names(USGDP))
USGDPp2$GDPdeflator[selGDP] <- USGDP[,selDefl]
print(names(USGDP)[selDefl])
}
Compare the index year of “GDP.Deflator” with that in USGDPpresidents.Rd: If they are different, fix USGDPpresidents.Rd.
Now update population:
if(Update){
selPop <- grep('Population', names(USGDP))
USGDPp2$population.K[selGDP] <- USGDP[,selPop]
print(names(USGDP)[selPop])
}
Now realGDPperCapita. This also has a reference year, so we need to make sure we get them all:
if(Update){
if(any(!is.na(USGDPp2[!selGDP, 'readGDPperCapita']))){
stop('ERROR: There are realGDPperCapita numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selGDPperC <- grep('Real.GDP.per.c', names(USGDP))
USGDPp2$realGDPperCapita[selGDP] <- USGDP[,selGDPperC]
print(names(USGDP)[selGDPperC])
}
Compare the index year of ‘Real.GDP.per.capita’ with that in USGDPpresidents.Rd: If they are different, fix USGDPpresidents.Rd.
Next: executive:
if(Update){
exec <- as.character(USGDPp2$executive)
exec[is.na(exec)] <- c('Obama', 'Trump', 'Trump')
lvlexec <- c(levels(USGDPp2$executive),
'Trump')
USGDPp2$executive <- ordered(exec, lvlexec)
}
Similarly: war
if(Update){
war <- as.character(USGDPp2$war)
war[is.na(war)] <- ''
lvlwar <- levels(USGDPp2$war)
USGDPp2$war <- ordered(war, lvlwar)
}
Next: battleDeaths
and battleDeathsPMP
:
if(Update){
USGDPp2$battleDeaths[iNew] <- 0
#
USGDPp2$battleDeathsPMP <- with(USGDPp2,
1000*battleDeaths/population.K)
}
Keynes?
if(Update){
USGDPp2$Keynes[iNew] <- 0
}
Unemployment figures came from different sources for different years. Since 1940 the source has been the Bureau of Labor Statistics (BLS), series LNS14000000 from the Current Population Survey. These data are available as a monthly series from the Current Population Survey of the Bureau of Labor Statistics.
Download the most recent years as an Excel file, compute row averages, and manually transfer the numbers for the most recent years here:
if(Update){
USGDPp2$unemployment[iNew] <- c(4.875,
4.35, 3.89166666666667)
USGDPp2$unempSource[iNew] <- USGDPp2$unempSource[
iNew[1]-1]
tail(USGDPp2)
}
if(Update){
USGDPpresidents <- USGDPp2
save(USGDPpresidents, file='USGDPpresidents.rda')
}
Now copy this file from the current working directory to ~Ecdat, overwriting the previous version.