Financial data aggregator

This post will document an R script that will download and aggregate various financial and economic time series. This data will be used in future posts for modelling and analysis activities.

Packages required and download parameters

Load the required packages.

library("DescTools")
library("tidyquant")
library("timetk")
library("broom")
library("tibbletime")

Then we set parameters that determine the data to download.

# Parameters for binary market indicator
lb = 6                  #  Lookback period for drawdown assessment
pc = 0.2                #  Percent drawdown for binary market in/out indicator cutoff
fr = -0.025             #  Forward return for binary market in/out indicator cutoff

# Start and end dates
s.date = as.Date("1945-01-01")
e.date = as.Date("2019-03-01")

# Time series to download, Quandl and FRED
qndlm = c("NAHB/NWFHMI.1",
          "ISM/MAN_NEWORDERS.5")
fredw = c("IC4WSA")
fredm = c("AAA",       #    Moody's Seasoned Aaa Corporate Bond Yield
          "ACDGNO",    #    Value of Manufacturers' New Orders for Consumer Goods: Consumer
          "AHETPI",    #    Average Hourly Earnings of Production and Nonsupervisory Employees:
          "AWHMAN",    #    Average Weekly Hours of Production and Nonsupervisory Employees:
          "BAA",       #    Moody's Seasoned Baa Corporate Bond Yield
          "BOGMBASE",  #    Monetary Base; Total
          "CFNAIDIFF", #    Chicago Fed National Activity Index: Diffusion Index
          "CPIAUCSL",  #    Consumer Price Index for All Urban Consumers: All Items
          "CPILFESL",  #    Consumer Price Index for All Urban Consumers: All Items Less Food and
          "FEDFUNDS",  #    Effective Federal Funds Rate
          "GS10",      #    10-Year Treasury Constant Maturity Rate
          "GS2",       #    2-Year Treasury Constant Maturity Rate
          "INDPRO",    #    Industrial Production Index
          "ISRATIO",   #    Total Business: Inventories to Sales Ratio
          "KCFSI",     #    Kansas City Financial Stress Index
          "M2SL",      #    M2 Money Stock
          "NEWORDER",  #    Manufacturers' New Orders: Nondefense Capital Goods Excluding Aircraft
          "PERMIT",    #    New Private Housing Units Authorized by Building Permits
          "TB3MS",     #    3-Month Treasury Bill: Secondary Market Rate
          "TWEXMMTH",  #  Trade Weighted U.S. Dollar Index: Major Currencies
          "UNRATE",    #    Civilian Unemployment Rate
          "LOANS")     #    Loans and Leases in Bank Credit, All Commercial Banks 

Downloading external data

This section of code executes the collection of data from various sources. We use the function “tq_get” from the tidyquant package.

# Get S&P500 data
sp_5   <-  tq_get("^GSPC",get = "stock.prices",from = s.date)

# Get fred monthly economic data
econ.m1 <- tq_get(fredm, get = "economic.data",from = s.date)

# Spread fred monthly data to column, fill missing values
econ.m2 <- spread(econ.m1, symbol, price) %>% 
  fill(ACDGNO, CFNAIDIFF, ISRATIO)

# Get fred weekly economic data
econ.w1 <- tq_get(fredw, get = "economic.data", from = s.date)

# Convert weekly data to monthly frequency
econ.m3 <- econ.w1 %>% 
  rename("IC4WSA" = "price") %>%
  group_by(month=floor_date(date, "month")) %>% 
  summarize(IC4WSA = last(IC4WSA)) %>%
  rename("date" = "month") 

# Get quandl monthy data
econ.m4 <- tq_get(qndlm,get="quandl",from="1985-03-01") %>%
  mutate(price = if_else(is.na(value), index, value), 
         date = floor_date(if_else(is.na(date), month, date),"month")) %>%
  select(symbol, date, price) %>% spread(symbol, price) %>%
  rename(HMI = "NAHB/NWFHMI.1", NEWORD = "ISM/MAN_NEWORDERS.5") 

# Get Shiller download: http://www.econ.yale.edu/~shiller/data.htm
econ.m5 <- read.zoo(file = "C:/Users/brent/Documents/R/R_import/Shiller.csv",
                    FUN = as.Date, header = T, sep = ",", 
                    format = "%d/%m/%Y", index.column = 1)
econ.m5 <- tk_tbl(econ.m5, rename_index = "date") %>% 
  mutate(date = floor_date(date, "month"))

# Join all data (except stock data)
econ.m  <- full_join(econ.m2, econ.m3, by = "date")
econ.m  <- full_join(econ.m, econ.m4, by = "date")
econ.m  <- full_join(econ.m, econ.m5, by = "date") %>% 
  filter(date >= s.date & date <= e.date)

Note that the Shiller cyclically adjusted price earnings data is manually saved to a local directory and loaded via a read csv function.

Let’s see what our data looks like now.

str(econ.m)
## Classes 'tbl_df', 'tbl' and 'data.frame':    891 obs. of  36 variables:
##  $ date     : Date, format: "1945-01-01" "1945-02-01" ...
##  $ AAA      : num  2.69 2.65 2.62 2.61 2.62 2.61 2.6 2.61 2.62 2.62 ...
##  $ ACDGNO   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ AHETPI   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ AWHMAN   : num  45.5 45.5 45.3 45 44.3 44.8 44.7 40.9 41.5 41.5 ...
##  $ BAA      : num  3.46 3.41 3.38 3.36 3.32 3.29 3.26 3.26 3.24 3.2 ...
##  $ BOGMBASE : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CFNAIDIFF: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CPIAUCSL : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CPILFESL : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ FEDFUNDS : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ GS10     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ GS2      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ INDPRO   : num  16.8 16.8 16.7 16.4 15.9 ...
##  $ ISRATIO  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ KCFSI    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ LOANS    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ M2SL     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ NEWORDER : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ PERMIT   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ TB3MS    : num  0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 ...
##  $ TWEXMMTH : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ UNRATE   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ IC4WSA   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ NEWORD   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ HMI      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ P        : num  13.5 13.9 13.9 14.3 14.8 ...
##  $ D        : num  0.643 0.647 0.65 0.65 0.65 ...
##  $ E        : num  0.94 0.95 0.96 0.973 0.987 ...
##  $ CPI      : num  17.8 17.8 17.8 17.8 17.9 18.1 18.1 18.1 18.1 18.1 ...
##  $ Fraction : num  1945 1945 1945 1945 1945 ...
##  $ Rate.GS10: num  2.37 2.35 2.34 2.33 2.31 ...
##  $ Price    : num  189 196 196 201 207 ...
##  $ Dividend : num  9.03 9.08 9.13 9.13 9.08 ...
##  $ Earnings : num  13.2 13.3 13.5 13.7 13.8 ...
##  $ CAPE     : num  12 12.3 12.3 12.6 13 ...

It is obvious that not all time series start on the specified start date. Everything that is requested is present. Next, we want to download and join the S&P 500 index time series.

Stock market data

We now want to take stock index data (the S&P 500 index), roll it up from a daily to a monthly periodicity, and create a set of rolling attributes.

# Create new attributes
sp_5 <- sp_5 %>% 
  
  # Group to monthly periodicity and create monthly returns
  group_by(month = floor_date(date, "month")) %>%
  summarize(low = min(low), close = last(close), volume = sum(volume)) %>%
  rename("date" = "month") %>% 
  tq_mutate(select = close, mutate_fun = periodReturn, period = "monthly", 
            type = "log", col_rename  = "rtn_m") %>%
  
  # Forward monthly return
  mutate(fwd_rtn_m = lead(rtn_m, 1)) %>% 
  
  # 6 monthly returns
  tq_mutate(select = rtn_m, mutate_fun = rollapply, width = lb, FUN = sum, 
            col_rename  = "rtn_6m") %>%
  
  # 6 monthly low watermark
  tq_mutate(select = low, mutate_fun = rollapply, width = lb, FUN = min, 
            col_rename  = "min_6m") %>%
  
  # 6 monthly max drawdown
  mutate(dd_6m = -lag(log(close), n = lb) + log(min_6m)) %>%
  
  # Binary flag, 1 = 6 monthly return less than specified and drawdown in
  # in excess ofthat specified 
  mutate(flag = ifelse(rtn_6m < fr | dd_6m < -pc , 1, 0)) %>%
  
  # Lead binary flag by 6 months
  mutate(y1 = lead(flag, lb)) %>%
  
  # Indicator for when binary flag changes
  mutate(diff_flag = c(NA, diff(y1)))

The attribute y1 is a binary indicator that looks forward over the next 6 months and returns 1 if the maximum drawdown is more than 20% or the return is less than 2.5% (these parameters can be changed as described above). In all other cases this binary indicator returns 0. This is the independent or forecast variable for future analysis.

If we can forecast this indicator, we can stay out of the market during downturns or when returns are considered insufficient.

To visualize this indicator, it is ideal to do so as a shading over a time series chart. To achieve this, we need to convert the indicator into a series of start and end dates. The following code does just that.

sp_5s    <-  sp_5 %>% filter(diff_flag == 1) %>% select(date) %>% rename(start = date)
sp_5e    <-  sp_5 %>% filter(diff_flag == -1) %>% select(date) %>% rename(end = date)
short    <-  min(count(sp_5s), count(sp_5e))
sp_shade <-  data.frame(head(sp_5s, short), head(sp_5e, short))

All datasets are now created. Finally, these data sets are merged and saved for later use.

# Join economic and stock return time series
econ_fin_data <- inner_join(econ.m, sp_5, by = "date")
# Save time series data and 
saveRDS(econ_fin_data, file="econ_fin_data.Rda")
saveRDS(sp_shade, file="sp_shade.Rda")

That’s it for now, next we will look at visualising these time series.

 
comments powered by Disqus