Page Rank
21 Apr 2015To install Systematic Investor Toolbox (SIT) please visit About page.
Let’s continue with Run Leadership Rcpp post and look at results and PageRank
First, let’s load historical prices for S&P 500:
#*****************************************************************
# Load historical end of day data
#*****************************************************************
library(SIT)
load.packages('quantmod')
filename = 'big.test.Rdata'
if(!file.exists(filename)) {
tickers = nasdaq.100.components()
tickers = sp500.components()$tickers
data = env()
getSymbols.extra(tickers, src = 'yahoo', from = '1970-01-01', env = data, auto.assign = T)
rm.index = which(sapply(ls(data), function(i) is.null(data[[i]])))
if(any(rm.index)) env.del(names(rm.index), data)
for(i in ls(data)) data[[i]] = adjustOHLC(data[[i]], use.Adjusted=T)
#print(bt.start.dates(data))
bt.prep(data, align='keep.all', dates='2000::', fill.gaps=T)
# remove ones with little history
prices = data$prices
bt.prep.remove.symbols(data, which(
count(prices, side = 2) < 3*252 | is.na(last(prices))
))
# show the ones removed
print(setdiff(tickers,names(data$prices)))
prices = data$prices
save(prices, file=filename)
}
load(file=filename)
#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
prices = prices[,]
n = ncol(prices)
nperiod = nrow(prices)
ret = prices / mlag(prices) - 1
index = (nperiod-20):nperiod
ret = ret[index,]
nperiod = nrow(ret)
nwindow = 15
#*****************************************************************
# Run Lead Lag Correlation
#*****************************************************************
# make sure to install [Rtools on windows](http://cran.r-project.org/bin/windows/Rtools/)
load.packages('Rcpp')
load.packages('RcppParallel')
# load Rcpp functions
sourceCpp('lead.lag.correlation.cpp')
c.cor = cp_run_leadership_smart(ret, nwindow, T)
# remove zero entries
clean.adj = function(adj.mat, threshold = 0.5) {
adj.mat[ abs(adj.mat) < threshold] = 0
keep.index = (rowSums(adj.mat != 0) >= 1) | (colSums(adj.mat != 0) >= 1)
adj.mat = adj.mat[keep.index, keep.index]
adj.mat
}
load.packages('igraph')
adj.mat = c.cor[,,21]
rownames(adj.mat) = colnames(adj.mat) = colnames(prices)
adj.mat = clean.adj(adj.mat, 0.35)
# Construct graph G
g = graph.adjacency(adj.mat,weighted=TRUE)
df = get.data.frame(g)
print(head(df))
rownames(x) | from | to | weight |
---|---|---|---|
1 | APH | DHI | 0.3690346 |
2 | BBY | APA | 0.3584224 |
3 | CAG | DLPH | 0.3587414 |
4 | DISCA | DLPH | 0.3615856 |
5 | DLPH | XEC | 0.3540787 |
6 | ED | PPL | 0.3722490 |
#print(get.data.frame(g, what="vertices"))
#print(get.data.frame(g, what="edges"))
#E(g)$weight
par(mar=c(0,0,0,0))
#V(g)$label.cex = 0.5
plot(g, edge.label=round(E(g)$weight, 3), edge.label.cex = 0.8, edge.width = 1,
edge.arrow.size = 0.5,edge.color = 'lightgray', edge.curved=T,
vertex.label.cex = 0.8, vertex.size=5, vertex.color=NA, vertex.frame.color=NA)
print(to.nice(t(sort(page.rank(g, directed = TRUE)$vector))))
BBY | CAG | DISCA | ED | ES | GNW | HOG | KMX | NOV | PVH | RL | STT | TDC | WM | XL | PSA | HSY | PHM | PPL | APH | OMC | TROW | APA | DHI | AIG | DLPH | XEC |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.03 | 0.03 | 0.03 | 0.03 | 0.04 | 0.04 | 0.04 | 0.05 | 0.08 | 0.09 | 0.10 | 0.11 |
RemoveDescendant = function(j, stack) {
if(j == len(p)) return()
for(i in (j+1):len(p))
if(stack$index[i])
if(adj.mat[L[i],L[j]] != 0) {
RemoveDescendant(i, stack)
stack$index[i] = F
}
}
# ExtractLeaders(G,p)
# Compute PageRank vector p on G
p = page.rank(g, directed = TRUE)$vector
# Sort time series in descending order by p
L = sort.list(p, decreasing = T)
#L = names(sort(p, decreasing = T))
stack = env(index = rep(T, len(p)))
for(i in 1:len(p))
if(stack$index[i])
RemoveDescendant(i, stack)
leaders = p[L[stack$index]]
print(leaders)
0.105187239944522 0.0882535811827645 0.0759500693481276 0.0504426740183451 0.0410540915395284 0.0410540915395284 0.0318265356559877 0.0318170418795006 0.0314284504922054 0.0314189567157183
# plot leaders
col = iif(is.na(match(V(g)$name, names(leaders))), 'white', 'red')
size = leaders[match(V(g)$name, names(leaders))]
size = punif(size, min=min(leaders), max=max(leaders))
size = iif(is.na(size), 5, round(20*size))
par(mar=c(0,0,0,0))
#V(g)$label.cex = 0.5
plot(g, edge.label=round(E(g)$weight, 3), edge.label.cex = 0.8, edge.width = 1,
edge.arrow.size = 0.8,edge.color = 'lightgray', edge.curved=T,
vertex.color=col.add.alpha(col,100),
vertex.label.cex = 0.5, vertex.size=size, vertex.color=NA, vertex.frame.color=NA)
(this report was produced on: 2015-04-24)