Page Rank

To install Systematic Investor Toolbox (SIT) please visit About page.

Let’s continue with Run Leadership Rcpp post and look at results and PageRank

First, let’s load historical prices for S&P 500:

#*****************************************************************
# Load historical end of day data
#*****************************************************************
library(SIT)
load.packages('quantmod')

filename = 'big.test.Rdata'

if(!file.exists(filename)) {
  tickers = nasdaq.100.components()
  tickers = sp500.components()$tickers

  data = env()
  getSymbols.extra(tickers, src = 'yahoo', from = '1970-01-01', env = data, auto.assign = T)
  
	rm.index = which(sapply(ls(data), function(i) is.null(data[[i]])))
	if(any(rm.index)) env.del(names(rm.index), data)
  
  for(i in ls(data)) data[[i]] = adjustOHLC(data[[i]], use.Adjusted=T)
  #print(bt.start.dates(data))

  bt.prep(data, align='keep.all', dates='2000::', fill.gaps=T)

  # remove ones with little history
  prices = data$prices
  bt.prep.remove.symbols(data, which(
   	count(prices, side = 2) < 3*252 | is.na(last(prices)) 
  ))
       
  # show the ones removed
  print(setdiff(tickers,names(data$prices)))

  prices = data$prices
  save(prices, file=filename)
}

load(file=filename)

#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
prices = prices[,]

n = ncol(prices)
nperiod = nrow(prices)
ret = prices / mlag(prices) - 1

index =  (nperiod-20):nperiod
ret = ret[index,]
nperiod = nrow(ret)
nwindow = 15

#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
# make sure to install [Rtools on windows](http://cran.r-project.org/bin/windows/Rtools/)
load.packages('Rcpp')
load.packages('RcppParallel')

# load Rcpp functions
sourceCpp('lead.lag.correlation.cpp')




c.cor = cp_run_leadership_smart(ret, nwindow, T)

# remove zero entries
clean.adj = function(adj.mat, threshold = 0.5) {
	adj.mat[ abs(adj.mat) < threshold] = 0
	keep.index = (rowSums(adj.mat != 0) >= 1) | (colSums(adj.mat != 0) >= 1)	
	adj.mat = adj.mat[keep.index, keep.index]	
	adj.mat
}

load.packages('igraph') 
adj.mat = c.cor[,,21]
	rownames(adj.mat) = colnames(adj.mat) = colnames(prices)
	adj.mat = clean.adj(adj.mat, 0.35)

# Construct graph G
g  = graph.adjacency(adj.mat,weighted=TRUE)
df = get.data.frame(g)
print(head(df))
rownames(x) from to weight
1 APH DHI 0.3690346
2 BBY APA 0.3584224
3 CAG DLPH 0.3587414
4 DISCA DLPH 0.3615856
5 DLPH XEC 0.3540787
6 ED PPL 0.3722490
#print(get.data.frame(g, what="vertices"))
#print(get.data.frame(g, what="edges"))
#E(g)$weight

par(mar=c(0,0,0,0))	
#V(g)$label.cex = 0.5
plot(g, edge.label=round(E(g)$weight, 3), edge.label.cex = 0.8, edge.width = 1, 
	edge.arrow.size = 0.5,edge.color = 'lightgray', edge.curved=T,
	vertex.label.cex = 0.8,	vertex.size=5, vertex.color=NA, vertex.frame.color=NA)

plot of chunk plot-2

print(to.nice(t(sort(page.rank(g, directed = TRUE)$vector))))
BBY CAG DISCA ED ES GNW HOG KMX NOV PVH RL STT TDC WM XL PSA HSY PHM PPL APH OMC TROW APA DHI AIG DLPH XEC
0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.03 0.03 0.03 0.03 0.04 0.04 0.04 0.05 0.08 0.09 0.10 0.11
RemoveDescendant = function(j, stack) {
	if(j == len(p)) return()
		
	for(i in (j+1):len(p)) 
		if(stack$index[i]) 		
			if(adj.mat[L[i],L[j]] != 0) {
				RemoveDescendant(i, stack)
				stack$index[i] = F
			}
}


# ExtractLeaders(G,p)
# Compute PageRank vector p on G
p = page.rank(g, directed = TRUE)$vector
# Sort time series in descending order by p
L = sort.list(p, decreasing = T)
#L = names(sort(p, decreasing = T))

stack = env(index = rep(T, len(p)))

for(i in 1:len(p))
	if(stack$index[i])
		RemoveDescendant(i, stack)
leaders = p[L[stack$index]]

print(leaders)

0.105187239944522 0.0882535811827645 0.0759500693481276 0.0504426740183451 0.0410540915395284 0.0410540915395284 0.0318265356559877 0.0318170418795006 0.0314284504922054 0.0314189567157183

# plot leaders
col = iif(is.na(match(V(g)$name, names(leaders))), 'white', 'red')
size = leaders[match(V(g)$name, names(leaders))]
	size = punif(size, min=min(leaders), max=max(leaders))
	size = iif(is.na(size), 5, round(20*size))


par(mar=c(0,0,0,0))	
#V(g)$label.cex = 0.5
plot(g, edge.label=round(E(g)$weight, 3), edge.label.cex = 0.8, edge.width = 1, 
	edge.arrow.size = 0.8,edge.color = 'lightgray', edge.curved=T,
	vertex.color=col.add.alpha(col,100),
	vertex.label.cex = 0.5,	vertex.size=size, vertex.color=NA, vertex.frame.color=NA)

plot of chunk plot-2

(this report was produced on: 2015-04-24)