Leadership Details

16 Apr 2015

To install Systematic Investor Toolbox (SIT) please visit About page.

Let’s continue with Run Leadership Rcpp post and look at results in more details

First, let’s load historical prices for S&P 500:

#*****************************************************************
# Load historical end of day data
#*****************************************************************
library(SIT)
load.packages('quantmod')

filename = 'big.test.Rdata'

if(!file.exists(filename)) {
  tickers = nasdaq.100.components()
  tickers = sp500.components()$tickers

  data = env()
  getSymbols.extra(tickers, src = 'yahoo', from = '1970-01-01', env = data, auto.assign = T)
  
	rm.index = which(sapply(ls(data), function(i) is.null(data[[i]])))
	if(any(rm.index)) env.del(names(rm.index), data)
  
  for(i in ls(data)) data[[i]] = adjustOHLC(data[[i]], use.Adjusted=T)
  #print(bt.start.dates(data))

  bt.prep(data, align='keep.all', dates='2000::', fill.gaps=T)

  # remove ones with little history
  prices = data$prices
  bt.prep.remove.symbols(data, which(
   	count(prices, side = 2) < 3*252 | is.na(last(prices)) 
  ))
       
  # show the ones removed
  print(setdiff(tickers,names(data$prices)))

  prices = data$prices
  save(prices, file=filename)
}

load(file=filename)

#*****************************************************************
# Helper functions
#*****************************************************************
# lead - lag sample function
rtest = function(ret, nwindow) {
	nlag = floor(nwindow/2)
	n = ncol(ret)
	nperiod = nrow(ret)
	out = array(0, c(n,n,nperiod))
	index = !lower.tri(out[,,1])

	for(i in nwindow:nperiod) {
		temp = coredata(ret[(i-nwindow+1):i,])
		temp.out = matrix(0, n,n)
		
		for (c1 in 2:n) {
			data1 = temp[,c1]
			c2.index = 1:(c1-1)
			data2 = temp[,c2.index,drop=F]

			# if c1.cor : c2 -> c1 i.e. cor(data1[(lag+1):nwindow], data2[1:(nwindow-lag),])
			c1.cor = rep(0,ncol(data2))
			for (lag in 0:nlag) {
				cor.temp = cor(data1[(lag+1):nwindow], data2[1:(nwindow-lag),])
				c1.cor = c1.cor + pmax(cor.temp, 0)
			}
			c1.cor = c1.cor / (nlag + 1)
			
			# if c2.cor : c1 -> c2 i.e. cor(data1[1:(nwindow-lag)], data2[(lag+1):nwindow,])
			c2.cor = rep(0,ncol(data2))
			for (lag in 1:nlag) {
				cor.temp = cor(data1[1:(nwindow-lag)], data2[(lag+1):nwindow,])
				c2.cor = c2.cor + pmax(cor.temp, 0)
			}
			c2.cor = c2.cor / nlag
			
			# if c1.cor >= c2.cor hence c2 -> c1 i.e. cor(data1[(lag+1):nwindow], data2[1:(nwindow-lag),])
			index = c1.cor >= c2.cor
			if(any(index))
				temp.out[c2.index[index], c1] = c1.cor[index]
				
			# if c2.cor >= c1.cor hence c1 -> c2 i.e. cor(data1[1:(nwindow-lag)], data2[(lag+1):nwindow,])
			index = !index
			if(any(index))
				temp.out[c1, c2.index[index]] = c2.cor[index]							
		}			
		out[,,i] = temp.out
	}
	out
}


# lead - lag sample function
rtest.one = function(ret, nwindow, c1.name, c2.name) {
	c1 = which(colnames(ret) == c1.name)
	c2 = which(colnames(ret) == c2.name)
	if(c1 < c2) { # switch
		temp = c2; c2 = c1; c1 = temp
		temp = c2.name; c2.name = c1.name; c1.name = temp
	}

	nlag = floor(nwindow/2)
	n = ncol(ret)
	nperiod = nrow(ret)
	out = array(NA, c(n,n,nperiod))
	index = !lower.tri(out[,,1])

	i = nperiod

		temp = coredata(ret[(i-nwindow+1):i,])
		temp.out = matrix(NA, n,n)
		
			data1 = temp[,c1]
			data2 = temp[,c2,drop=F]

			c1.cor = c()
			for (lag in 0:nlag) {
				cor.temp = cor(data1[(lag+1):nwindow], data2[1:(nwindow-lag),])
				c1.cor = c(c1.cor, cor.temp)
			}			
			
			c2.cor = c()
			for (lag in 1:nlag) {
				cor.temp = cor(data1[1:(nwindow-lag)], data2[(lag+1):nwindow,])
				c2.cor = c(c2.cor, cor.temp)
			}


			c1.cor.mean = sum(pmax(c1.cor,0)) / (nlag + 1)
			c2.cor.mean = sum(pmax(c2.cor,0)) / nlag
			# if c1.cor >= c2.cor hence c2 -> c1 i.e. cor(data1[(lag+1):nwindow], data2[1:(nwindow-lag),])				
			# if c2.cor >= c1.cor hence c1 -> c2 i.e. cor(data1[1:(nwindow-lag)], data2[(lag+1):nwindow,])
			relationship = iif(c1.cor.mean >= c2.cor.mean, 'c2 -> c1', 'c1 -> c2')

		list(
			c1 = c1, c2 = c2, c1.name = c1.name, c2.name = c2.name,
			x = -nlag:nlag,
			y = c(c2.cor, c1.cor),
			c1.cor = to.nice(c1.cor.mean),
			c2.cor = to.nice(c2.cor.mean),
			relationship = relationship,
			c1.ret = ret[(i-nwindow+1):i,c1],
			c2.ret = ret[(i-nwindow+1):i,c2]

		)
}
# make plot
rtest.visualize = function(out) {
	par(mar=c(5,4,4,5))	
	plot(out$x, out$y, type = "n", xlab='Lag', ylab='Correlation', las=1,
		main = paste('c2 =', out$c2.name, out$c2.cor, 'vs c1 =', out$c1.name, out$c1.cor,
		'\n', out$relationship)	
	)
	col1 = col.add.alpha('blue',100)
	polygon(c(first(out$x), out$x, last(out$x)), c(0, pmax(out$y,0), 0), col=col1, border=col1)
	col1 = col.add.alpha('gray',200)
	polygon(c(first(out$x), out$x, last(out$x)), c(0, pmin(out$y,0), 0), col=col1, border=col1)
	abline(h=0, v=0)
	lines(out$x, out$y, col='red')
	lines(out$x, out$y, type='b', pch='+', col='red')
}	
rtest.visualize.relationship = function(out) {
	ylim = range(c(cumprod(1 + out$c1.ret), cumprod(1 + out$c2.ret))) 
	plota(cumprod(1 + out$c1.ret), type='l', ylim = ylim, col='black')
		plota.lines(cumprod(1 + out$c2.ret), col='blue')
		plota.legend(paste('c1', out$c1.name, ',c2', out$c2.name, ',', out$relationship), 'black,blue, red')
}




#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
prices = prices[,]

n = ncol(prices)
nperiod = nrow(prices)
ret = prices / mlag(prices) - 1

index =  (nperiod-20):nperiod
ret = ret[index,]
nperiod = nrow(ret)
nwindow = 15

#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
# make sure to install [Rtools on windows](http://cran.r-project.org/bin/windows/Rtools/)
load.packages('Rcpp')
load.packages('RcppParallel')

# load Rcpp functions
sourceCpp('lead.lag.correlation.cpp')

c.cor = cp_run_leadership_smart(ret, nwindow, T)

# remove zero entries
clean.adj = function(adj.mat, threshold = 0.5) {
	adj.mat[ abs(adj.mat) < threshold] = 0
	keep.index = (rowSums(adj.mat != 0) >= 1) | (colSums(adj.mat != 0) >= 1)	
	adj.mat = adj.mat[keep.index, keep.index]	
	adj.mat
}

adj.mat = c.cor[,,nperiod-1]
	rownames(adj.mat) = colnames(adj.mat) = colnames(prices)
	adj.mat = clean.adj(adj.mat, 0.38)

temp = adj.mat
	temp[temp == 0] = NA
print(to.percent(temp))

	AIG	APA	NEM	PPL	XL
AIG
APA
ED				38.18%
NEM
PPL
PVH	41.76%
RL	42.19%				38.71%
STT			38.28%
XL		38.30%

# examine some pairs
out = rtest.one(ret, nwindow, 'AIG', 'PVH')
	rtest(ret[,c(out$c2,out$c1)], nwindow)[,,nperiod]

      [,1] [,2] [1,] 0.0000000    0 [2,] 0.3668593    0

	rtest.visualize(out)

plot of chunk plot-2

	rtest.visualize.relationship(out)

plot of chunk plot-2

out = rtest.one(ret, nwindow, 'XL', 'RL')
	rtest(ret[,c(out$c2,out$c1)], nwindow)[,,nperiod]

 [,1]      [,2] [1,]    0 0.2261738 [2,]    0 0.0000000

	rtest.visualize(out)

plot of chunk plot-2

	rtest.visualize.relationship(out)

plot of chunk plot-2

out = rtest.one(ret, nwindow, 'APA', 'XL')
	rtest(ret[,c(out$c2,out$c1)], nwindow)[,,nperiod]

      [,1] [,2] [1,] 0.0000000    0 [2,] 0.3136286    0

	rtest.visualize(out)

plot of chunk plot-2

	rtest.visualize.relationship(out)

plot of chunk plot-2

# adj.mat(row,col) is mapped to 
#
# |from |to  |value            |
# |:----|:---|:----------------|
# |row  |col |adj.mat(row,col) |

load.packages('igraph') 
g  = graph.adjacency(adj.mat,weighted=TRUE)
get.data.frame(g)

from to weight 1 ED PPL 0.3817913 2 PVH AIG 0.4175742 3 RL AIG 0.4219342 4 RL XL 0.3870656 5 STT NEM 0.3828356 6 XL APA 0.3830476

print(g)

IGRAPH DNW- 9 6 -- 
+ attr: name (v/c), weight (e/n)

(this report was produced on: 2015-04-24)

Systematic Investor's Blog About Contact

Leadership Details

Related Posts

RFinance 2016 15 May 2016

Benchmark Plus 14 Apr 2016

Exponentially Weighted Volatility using RCPP 10 Apr 2016