Run Quantile in Rcpp

To install Systematic Investor Toolbox (SIT) please visit About page.

In the A ‘Simple’ Tactical Asset Allocation Portfolio with Percentile Channels (for Dummies) post, David Varadi discussed a Channel Breakout system that required to estimate moving window quantile.

This is a time consuming operation that can be speed up with runquantile function in caTools package.

Alternatively one can write a simple Rcpp function to speed up commutations If you are just starting with Rcpp, I found following resources extremely useful:

To get started on Windows you would need to install Rtools

Below are sample functions to compute given quantile. The run_quantile0 function is basic and not very efficient because it sorts all elements in each window. The second function, run_quantile function takes advantage of previous order and only insert new element in the sorted array.

#include <Rcpp.h>
using namespace Rcpp;
using namespace std;

// [[Rcpp::plugins(cpp11)]]

// quantile setup
struct quantile {
	int lo, hi, n, total;
	double hlo, hhi;
};
quantile make_quantile(int n, double prob) {
	quantile res;
	double index = (n - 1) * prob;
	res.lo = floor(index);
	res.hi = res.lo + 1;
	res.hhi = index - res.lo;
	res.hlo = 1 - res.hhi;
	return res;	
}

// index vector
vector<int> make_seq(int n) {
	vector<int> id(n);
	iota(id.begin(), id.end(), 0);
	return id;
}

// [[Rcpp::export]]
NumericVector run_quantile0(NumericVector x, int n, double prob) {
	auto sz = x.size();
	NumericVector res(sz);	
	
	// quantile setup
	auto q = make_quantile(n, prob);

	for(int i = 0; i < (sz-n+1); i++) {
		// can be made a lot faster by not re-sorting each time
		vector<double> z(&x[i], &x[i+n]);
		sort(z.begin(), z.end());    	
		res[i+n-1] = q.hlo * z[q.lo] + q.hhi * z[q.hi];  
	}
    
	// pad the first n-1 elements with NA
	fill(res.begin(), res.end()-sz+n-1, NA_REAL);
	return res;	
}

// [[Rcpp::export]]
NumericVector run_quantile(NumericVector x, int n, double prob) {
	auto sz = x.size();
	NumericVector res(sz);	
	
	// quantile setup
	auto q = make_quantile(n, prob);

	// index vector
	auto id = make_seq(n);
	
	for(int i = 0; i < (sz-n+1); i++) {
		if(i == 0)
			sort(id.begin(), id.end(), 
				[&](int a, int b) { return x[a] < x[b]; });
		else {
	    		// remove index (i-1)
		    	id.erase(find(id.begin(), id.end(), i-1));
		    	// insert keeping sorted order
	    		id.insert(lower_bound(id.begin(), id.end(), i+n-1, 
	    			[&](int a, int b) { return x[a] < x[b]; }), i+n-1);
		}    
		
		res[i+n-1] = q.hlo * x[id[q.lo]] + q.hhi * x[id[q.hi]];  
	}
    
	// pad the first n-1 elements with NA
	fill(res.begin(), res.end()-sz+n-1, NA_REAL);
	return res;	
}

Please save above code in the quantile.cpp file or download quantile.cpp.

Next let’s check for correctness and speed.

#*****************************************************************
# Rcpp Run Quantile
#*****************************************************************
# make sure to install Rtools on windows
# http://cran.r-project.org/bin/windows/Rtools/
library(SIT)
load.packages('quantmod')

load.packages('Rcpp')

# load run quantile functions
sourceCpp('quantile.cpp')

#*****************************************************************
# Test functionality and speed
#*****************************************************************
# inefficient R implementation
run_quantileR = function(x, n, probs) {
	out = c( rep(NA,(n-1)), sapply(n:len(x), function(i) quantile(x[(i- (n-1) ):i], probs) ))
	as.vector(out)
}	


# basic test
load.packages('microbenchmark')

n = 10
probs = 0.5
x = runif(100)

test1 = run_quantileR(x, n, probs)
test2 = run_quantile0(x, n, probs)
test3 = run_quantile(x, n, probs)

print(all.equal(test1, test2))

TRUE

print(all.equal(test1, test3))

TRUE

print(to.nice(summary(microbenchmark(
	run_quantileR(x, n, probs),
	run_quantile0(x, n, probs),
	run_quantile(x, n, probs),
	times = 10
)),0))
  expr min lq mean median uq max neval
1 run_quantileR(x, n, probs) 13,666 13,963 14,566 14,261 15,060 16,278 10
2 run_quantile0(x, n, probs) 40 41 50 54 57 61 10
3 run_quantile(x, n, probs) 13 13 20 17 30 34 10

Second implementation, run_quantile, really shines over larger look back window

n = 1000
probs = 0.9
x = runif(100000)

test1 = run_quantile0(x, n, probs)
test2 = run_quantile(x, n, probs)

print(all.equal(test1, test2))

TRUE

print(to.nice(summary(microbenchmark(
	run_quantile0(x, n, probs),
	run_quantile(x, n, probs),
	times = 1
)),0))
  expr min lq mean median uq max neval
1 run_quantile0(x, n, probs) 5,570 5,570 5,570 5,570 5,570 5,570 1
2 run_quantile(x, n, probs) 82 82 82 82 82 82 1

To be continued.

(this report was produced on: 2015-03-07)