# 41 Rcpp

## 41.1 Getting started with C++

1. Q: With the basics of C++ in hand, it’s now a great time to practice by reading and writing some simple C++ functions. For each of the following functions, read the code and figure out what the corresponding base R function is. You might not understand every part of the code yet, but you should be able to figure out the basics of what the function does.

double f1(NumericVector x) {
int n = x.size();
double y = 0;

for(int i = 0; i < n; ++i) {
y += x[i] / n;
}
return y;
}

NumericVector f2(NumericVector x) {
int n = x.size();
NumericVector out(n);

out[0] = x[0];
for(int i = 1; i < n; ++i) {
out[i] = out[i - 1] + x[i];
}
return out;
}

bool f3(LogicalVector x) {
int n = x.size();

for(int i = 0; i < n; ++i) {
if (x[i]) return true;
}
return false;
}

int f4(Function pred, List x) {
int n = x.size();

for(int i = 0; i < n; ++i) {
LogicalVector res = pred(x[i]);
if (res[0]) return i + 1;
}
return 0;
}

NumericVector f5(NumericVector x, NumericVector y) {
int n = std::max(x.size(), y.size());
NumericVector x1 = rep_len(x, n);
NumericVector y1 = rep_len(y, n);

NumericVector out(n);

for (int i = 0; i < n; ++i) {
out[i] = std::min(x1[i], y1[i]);
}

return out;
}

A: The R equivalents are:

• f1: mean()
• f2: cumsum()
• f3: any()
• f4: Position()
• f5: pmin()
2. Q: To practice your function writing skills, convert the following functions into C++. For now, assume the inputs have no missing values.

1. all()
bool allC(LogicalVector x) {
int n = x.size();

for(int i = 0; i < n; ++i) {
if (!x[i]) return false;
}
return true;
}
1. cumprod(), cummin(), cummax().
NumericVector cumprodC(NumericVector x) {

int n = x.size();
NumericVector out(n);

out[0] = x[0];
for(int i = 1; i < n; ++i) {
out[i]  = out[i - 1] * x[i];
}
return out;
}

NumericVector cumminC(NumericVector x) {
int n = x.size();
NumericVector out(n);

out[0] = x[0];
for(int i = 1; i < n; ++i) {
out[i]  = std::min(out[i - 1], x[i]);
}
return out;
}

NumericVector cummaxC(NumericVector x) {
int n = x.size();
NumericVector out(n);

out[0] = x[0];
for(int i = 1; i < n; ++i) {
out[i]  = std::max(out[i - 1], x[i]);
}
return out;
}
1. diff(). Start by assuming lag 1, and then generalise for lag n.
NumericVector diffC(NumericVector x){
int n = x.size();
NumericVector out(n - 1);

for(int i = 1; i < n; i++){
out[i - 1] = x[i] - x[i - 1];
}
return out ;
}

NumericVector difflagC(NumericVector x, int lag){
int n = x.size();
NumericVector out(n - lag);

for(int i = lag; i < n; i++){
out[i - lag] = x[i] - x[i - lag];
}
return out;
}'
1. range.
NumericVector rangeC(NumericVector x){
double omin, omax;
int n = x.size();
NumericVector out(2);

omin = x[0];
omax = x[0];

for(int i = 1; i < n; i++){
omin = std::min(x[i], omin);
omax = std::max(x[i], omax);
}

out[0] = omin;
out[1] = omax;
return out;
1. var. Read about the approaches you can take on wikipedia. Whenever implementing a numerical algorithm, it’s always good to check what is already known about the problem.

## 41.2 Missing values

1. Q: Rewrite any of the functions from the first exercise to deal with missing values. If na.rm is true, ignore the missing values. If na.rm is false, return a missing value if the input contains any missing values. Some good functions to practice with are min(), max(), range(), mean(), and var().

A:

NumericVector minC(NumericVector x, bool narm){
int n = x.size();
LogicalVector index = is_na(x);
NumericVector omin(1);
bool na_check = false;
bool na_check_all = true;

for(int i = 0; i<n; i++){
if (index[i]) na_check = true;
}

for(int i = 0; i<n; i++){
if (!index[i]) na_check_all = false;
}

if (narm) {
for(int i = n-1; i >= 0; i--){
if (!index[i]) omin[0] = x[i];
}

for(int i = 1; i < n; i++) {
if (!index[i]) omin[0] = std::min(x[i], omin[0]);
}

if (na_check_all) {
omin[0] = NA_REAL;
}
} else if (na_check) {
omin[0] = NA_REAL;
} else {
omin[0] = x[0];
for(int i = 1; i < n; i++){
omin = std::min(x[i], omin[0]);
}
}

return omin;
}

NumericVector maxC(NumericVector x, bool narm){
int n = x.size();
LogicalVector index = is_na(x);
NumericVector omax(1);
bool na_check = false;
bool na_check_all = true;

for(int i = 0; i<n; i++){
if (index[i]) na_check = true;
}

for(int i = 0; i<n; i++){
if (!index[i]) na_check_all = false;
}

if (narm) {
for(int i = n-1; i >= 0; i--){
if (!index[i]) omax[0] = x[i];
}

for(int i = 1; i < n; i++) {
if (!index[i]) omax[0] = std::max(x[i], omax[0]);
}

if (na_check_all) {
omax[0] = NA_REAL;
}
} else if (na_check) {
omax[0] = NA_REAL;
} else {
omax[0] = x[0];
for(int i = 1; i < n; i++){
omax = std::max(x[i], omax[0]);
}
}

return omax;
}

NumericVector rangeC(NumericVector x, bool narm){
NumericVector out(2);
NumericVector omin(1);
NumericVector omax(1);

omin = minC(x, narm);
omax = maxC(x, narm);

out[0] = omin[0];
out[1] = omax[0];
return out;
}
}

NumericVector meanC(NumericVector x, bool narm){
int n = x.size();
LogicalVector index = is_na(x);
bool na_check = false;
bool na_check_all = true;
int n_corrected = 0;
NumericVector out(1);

for(int i = 0; i<n; i++){
if (index[i]) na_check = true;
}

for(int i = 0; i<n; i++){
if (!index[i]) na_check_all = false;
}

for(int i = 0; i<n; i++){
if (!index[i]) n_corrected++;
}

/* narm = T */
if (narm){
if (na_check_all) {
out[0] = NA_REAL;
}  else {
out[0] = 0;
for(int i = 0; i < n; ++i) {
if (!index[i]) out[0] += x[i] / n_corrected;
}
}
}

/* narm = F */
if (!narm){
if (na_check) {
out[0] = NA_REAL;
} else {
for(int i = 0; i < n; ++i) {
out[0] += x[i] / n;
}
}
}

return out;
}
2. Q: Rewrite cumsum() and diff() so they can handle missing values. Note that these functions have slightly more complicated behaviour.

A:

NumericVector cumsumC(NumericVector x) {
int n = x.size();
NumericVector out(n);
LogicalVector index = is_na(x);

out[0] = x[0];
for(int i = 1; i < n; ++i) {
if (index[i - 1]) {
out[i] = NA_REAL;
} else{
out[i] = out[i - 1] + x[i];
}
}

return out;
}

NumericVector difflagC(NumericVector x, int lag){
int n = x.size();
NumericVector out(n - lag);
LogicalVector index = is_na(x);

for(int i = lag; i < n; i++){
if ((index[i]) || (index[i - lag])) {
out[i - lag] = NA_REAL;
} else {
out[i - lag] = x[i] - x[i - lag];
}
}
return out;
}

## 41.3 The STL

To practice using the STL algorithms and data structures, implement the following using R functions in C++, using the hints provided:

1. median.default() using partial_sort.

#include <algorithm>
#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double medianC(NumericVector x) {
int n = x.size();
double out;
if (n % 2 == 0){
std::partial_sort (x.begin(), x.begin() + n / 2 + 1, x.end());
out = (x[n / 2 - 1] + x[n / 2]) / 2;
} else {
std::partial_sort (x.begin(), x.begin() + (n + 1) / 2, x.end());
out = x[(n + 1) / 2 - 1];
}

return out;
}
2. %in% using unordered_set and the find() or count() methods.

3. unique() using an unordered_set (challenge: do it in one line!).

// [[Rcpp::plugins(cpp11)]]
#include <Rcpp.h>
#include <unordered_set>
using namespace Rcpp;

// [[Rcpp::export]]
NumericVector uniqueC(NumericVector x) {
std::unordered_set<int> seen;
int n = x.size();
std::vector<double> out;

for (int i = 0; i < n; ++i) {
if (seen.insert(x[i]).second) out.push_back(x[i]);
}

return wrap(out);
}
4. min() using std::min(), or max() using std::max().

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double minC(NumericVector x){
int n = x.size();
double out = x[0];

for (int i = 0; i < n; i++){
out = std::min(out, x[i]);
}

return out;
}
5. which.min() using min_element, or which.max() using max_element.

#include <Rcpp.h>
#include <algorithm>
#include <iterator>

using namespace Rcpp;

// [[Rcpp::export]]
double which_minC(NumericVector x){
int out;
out = std::distance(x.begin(),std::min_element(x.begin(),x.end()));
out++;

return out;
}
6. setdiff(), union(), and intersect() for integers using sorted ranges and set_union, set_intersection and set_difference.