Minimum Spanning Tree
08 Mar 2015
To install Systematic Investor Toolbox (SIT) please visit About page.
MKTSTK an another amazing graphics example
to visualize correlation matrix at Stock market visualization: Minimum Spanning Trees
Following their example, I will visualize below the stocks in NASDAQ 100 Index
for the last year using end of the day data and last 5 days using 1 minute data.
I found following references very useful:
#*****************************************************************
# Load historical end of day data
#*****************************************************************
library ( SIT )
load.packages ( 'quantmod' )
tickers = nasdaq.100.components ()
data <- new.env ()
getSymbols ( tickers , src = 'yahoo' , from = '1970-01-01' , env = data , auto.assign = T )
for ( i in ls ( data )) data [[ i ]] = adjustOHLC ( data [[ i ]], use.Adjusted = T )
#print(bt.start.dates(data))
bt.prep ( data , align = 'keep.all' , dates = '2000::' )
# remove ones with little history
bt.prep.remove.symbols.min.history ( data )
# show the ones removed
print ( setdiff ( tickers , names ( data $ prices )))
FB
GOOG
KRFT
LVNTA
LMCA
LMCK
TRIP
#*****************************************************************
# Visualize Correlation Matrix
#*****************************************************************
prices = data $ prices
ret = diff ( log ( prices ))
ret = last ( ret , 252 )
print ( join ( c ( 'Minimum Spanning Tree based on Pearson Correlation for Nasdaq 100 Components' ,
'based on daily returns for' , format ( range ( index ( ret )), '%d-%b-%Y' )), ' ' ))
Minimum Spanning Tree based on Pearson Correlation for Nasdaq 100 Components based on daily returns for 07-Mar-2014 06-Mar-2015
plot.cor ( ret , 0.5 )
Next let’s get intraday 1 minute historical quotes and visualize correlation based on the last 5 days:
#*****************************************************************
# Load historical intraday quotes
#*****************************************************************
tickers = names ( data $ prices )
filename = 'data/nasdaq.100.intraday.Rdata'
if ( ! file.exists ( filename )) {
data1 <- new.env ()
for ( ticker in tickers )
data1 [[ ticker ]] = getSymbol.intraday.google ( ticker , 'NASDAQ' , 60 , '15d' )
save ( data1 , file = filename )
}
load ( file = filename )
#print(bt.start.dates(data1))
bt.prep ( data1 , align = 'keep.all' , fill.gaps = T )
#*****************************************************************
# Visualize Correlation Matrix
#*****************************************************************
prices = data1 $ prices
ret = diff ( log ( prices ))
# there are 391 = 6*60+30+1 entries each day
# i.e. dim(data1$AAPL['2015:03:03'])
# last 5 days
ret = last ( ret , 5 * ( 6 * 60+30+1 ))
print ( join ( c ( 'Minimum Spanning Tree based on Pearson Correlation for Nasdaq 100 Components' ,
'based on 1 minute returns for' , format ( range ( index ( ret )), '%d-%b-%Y %H-%M' )), ' ' ))
Minimum Spanning Tree based on Pearson Correlation for Nasdaq 100 Components based on 1 minute returns for 02-Mar-2015 09-30 06-Mar-2015 16-00
plot.cor ( ret , 0.5 )
Helper functions:
#*****************************************************************
# Helper Function to Create / Clean Correlation Matrix
#*****************************************************************
clean.cor = function ( ret , threshold = 0.5 ) {
cor_mat = cor ( coredata ( ret ), use = 'complete.obs' , method = 'pearson' )
cor_mat [ abs ( cor_mat ) < threshold ] = 0
keep.index = rowSums ( cor_mat != 0 ) > 1
cor_mat = cor_mat [ keep.index , keep.index ]
cor_mat [ lower.tri ( cor_mat , diag = TRUE ) ] = 0
cor_mat
}
#*****************************************************************
# Helper Function to Plot Minimum Spanning Tree
#*****************************************************************
plot.cor = function ( ret , threshold = 0.5 ) {
cor_mat = clean.cor ( ret , threshold )
transform.fn = function ( x ) x
transform.fn = function ( x ) sqrt ( 2.0 * ( 1 - x ) )
transform.fn = function ( x ) 1 - abs ( x )
dist = transform.fn ( cor_mat )
dist [ cor_mat == 0 ] = 0
load.packages ( 'igraph' )
graph = graph.adjacency ( dist , weighted = TRUE , mode = 'upper' )
mst = minimum.spanning.tree ( graph )
breaks = c ( 0 , 55 , 60 , 65 , 70 , 100 )
cols = spl ( 'black,blue,red,green,orange' )
labels = 1 : 5
factor = cut ( E ( mst ) $ weight , breaks = transform.fn ( breaks / 100 ), labels = labels )
for ( i in labels )
E ( mst )[ factor == i ] $ color = col.add.alpha ( cols [ i ], 150 )
set.seed ( 100 )
par ( mar = c ( 1 , 1 , 1 , 1 ))
plot ( mst , vertex.size = 5 , vertex.color = NA , vertex.frame.color = NA , edge.width = 3 )
legend ( 'bottomleft' , title = 'Minimum Spanning Tree' , cex = 0.75 , pch = 16 , bty = 'n' , ncol = 2 ,
col = spl ( 'black,blue,red,green,orange' ),
legend = spl ( '>70%,65-70,60-65,55-60,50-55' )
)
# full graph
factor = cut ( E ( graph ) $ weight , breaks = transform.fn ( breaks / 100 ), labels = labels )
for ( i in labels )
E ( graph )[ factor == i ] $ color = col.add.alpha ( cols [ i ], 100 )
set.seed ( 100 )
plot ( graph , vertex.size = 5 , vertex.color = NA , vertex.frame.color = NA , edge.width = 3 )
legend ( 'bottomleft' , title = 'Full Graph' , cex = 0.75 , pch = 16 , bty = 'n' , ncol = 2 ,
col = spl ( 'black,blue,red,green,orange' ),
legend = spl ( '>70%,65-70,60-65,55-60,50-55' )
)
}
(this report was produced on: 2015-03-08)