Run Quantile in Rcpp
04 Mar 2015To install Systematic Investor Toolbox (SIT) please visit About page.
In the A ‘Simple’ Tactical Asset Allocation Portfolio with Percentile Channels (for Dummies) post, David Varadi discussed a Channel Breakout system that required to estimate moving window quantile.
This is a time consuming operation that can be speed up with runquantile function in caTools package.
Alternatively one can write a simple Rcpp function to speed up commutations If you are just starting with Rcpp, I found following resources extremely useful:
To get started on Windows you would need to install Rtools
Below are sample functions to compute given quantile. The run_quantile0 function is basic and not very efficient because it sorts all elements in each window. The second function, run_quantile function takes advantage of previous order and only insert new element in the sorted array.
#include <Rcpp.h>
using namespace Rcpp;
using namespace std;
// [[Rcpp::plugins(cpp11)]]
// quantile setup
struct quantile {
int lo, hi, n, total;
double hlo, hhi;
};
quantile make_quantile(int n, double prob) {
quantile res;
double index = (n - 1) * prob;
res.lo = floor(index);
res.hi = res.lo + 1;
res.hhi = index - res.lo;
res.hlo = 1 - res.hhi;
return res;
}
// index vector
vector<int> make_seq(int n) {
vector<int> id(n);
iota(id.begin(), id.end(), 0);
return id;
}
// [[Rcpp::export]]
NumericVector run_quantile0(NumericVector x, int n, double prob) {
auto sz = x.size();
NumericVector res(sz);
// quantile setup
auto q = make_quantile(n, prob);
for(int i = 0; i < (sz-n+1); i++) {
// can be made a lot faster by not re-sorting each time
vector<double> z(&x[i], &x[i+n]);
sort(z.begin(), z.end());
res[i+n-1] = q.hlo * z[q.lo] + q.hhi * z[q.hi];
}
// pad the first n-1 elements with NA
fill(res.begin(), res.end()-sz+n-1, NA_REAL);
return res;
}
// [[Rcpp::export]]
NumericVector run_quantile(NumericVector x, int n, double prob) {
auto sz = x.size();
NumericVector res(sz);
// quantile setup
auto q = make_quantile(n, prob);
// index vector
auto id = make_seq(n);
for(int i = 0; i < (sz-n+1); i++) {
if(i == 0)
sort(id.begin(), id.end(),
[&](int a, int b) { return x[a] < x[b]; });
else {
// remove index (i-1)
id.erase(find(id.begin(), id.end(), i-1));
// insert keeping sorted order
id.insert(lower_bound(id.begin(), id.end(), i+n-1,
[&](int a, int b) { return x[a] < x[b]; }), i+n-1);
}
res[i+n-1] = q.hlo * x[id[q.lo]] + q.hhi * x[id[q.hi]];
}
// pad the first n-1 elements with NA
fill(res.begin(), res.end()-sz+n-1, NA_REAL);
return res;
}
Please save above code in the quantile.cpp
file or download quantile.cpp.
Next let’s check for correctness and speed.
#*****************************************************************
# Rcpp Run Quantile
#*****************************************************************
# make sure to install Rtools on windows
# http://cran.r-project.org/bin/windows/Rtools/
library(SIT)
load.packages('quantmod')
load.packages('Rcpp')
# load run quantile functions
sourceCpp('quantile.cpp')
#*****************************************************************
# Test functionality and speed
#*****************************************************************
# inefficient R implementation
run_quantileR = function(x, n, probs) {
out = c( rep(NA,(n-1)), sapply(n:len(x), function(i) quantile(x[(i- (n-1) ):i], probs) ))
as.vector(out)
}
# basic test
load.packages('microbenchmark')
n = 10
probs = 0.5
x = runif(100)
test1 = run_quantileR(x, n, probs)
test2 = run_quantile0(x, n, probs)
test3 = run_quantile(x, n, probs)
print(all.equal(test1, test2))
TRUE
print(all.equal(test1, test3))
TRUE
print(to.nice(summary(microbenchmark(
run_quantileR(x, n, probs),
run_quantile0(x, n, probs),
run_quantile(x, n, probs),
times = 10
)),0))
expr | min | lq | mean | median | uq | max | neval | |
---|---|---|---|---|---|---|---|---|
1 | run_quantileR(x, n, probs) | 13,666 | 13,963 | 14,566 | 14,261 | 15,060 | 16,278 | 10 |
2 | run_quantile0(x, n, probs) | 40 | 41 | 50 | 54 | 57 | 61 | 10 |
3 | run_quantile(x, n, probs) | 13 | 13 | 20 | 17 | 30 | 34 | 10 |
Second implementation, run_quantile
, really shines over larger look back window
n = 1000
probs = 0.9
x = runif(100000)
test1 = run_quantile0(x, n, probs)
test2 = run_quantile(x, n, probs)
print(all.equal(test1, test2))
TRUE
print(to.nice(summary(microbenchmark(
run_quantile0(x, n, probs),
run_quantile(x, n, probs),
times = 1
)),0))
expr | min | lq | mean | median | uq | max | neval | |
---|---|---|---|---|---|---|---|---|
1 | run_quantile0(x, n, probs) | 5,570 | 5,570 | 5,570 | 5,570 | 5,570 | 5,570 | 1 |
2 | run_quantile(x, n, probs) | 82 | 82 | 82 | 82 | 82 | 82 | 1 |
To be continued.
(this report was produced on: 2015-03-07)