Skip to content

Commit

Permalink
Move EFTHorner and correction loops into block where n > 1. This way …
Browse files Browse the repository at this point in the history
…nm1 > 0 and creating the pi and sigma matrices will not upset ASAN/UBSAN since they will always have at least one row.
  • Loading branch information
aadler committed Aug 23, 2023
1 parent c291392 commit d8f35d2
Showing 1 changed file with 39 additions and 35 deletions.
74 changes: 39 additions & 35 deletions src/compHorner.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@

// This is the y component of twoSum; the x component is the sum itself.
double twoSumy(double a, double b) {
volatile double x, y, z;
x = a + b;
z = x - a;
y = (a - (x - z)) + (b - z);
volatile double x = a + b;
volatile double z = x - a;
volatile double y = (a - (x - z)) + (b - z);
return(y);
}

Expand All @@ -36,33 +35,29 @@ double twoPrody(double a, double b) {
extern SEXP compHorner_c(SEXP x, SEXP a) {
const int m = LENGTH(x);
const int n = LENGTH(a);

// ret will be the final value returned to R. In the EFTHorner portion of the
// algorithm it replaces s0. In the HornerSum portion it replaces r0.
SEXP ret = PROTECT(allocVector(REALSXP, m));

// Error-Free-Transformation (EFT) Horner portion of Langlois et al. (2006)
const int nm1 = n - 1;
double piM[nm1][m];
double sigM[nm1][m];
// The first element of twoProd is used more than once so save to Ax.
volatile double Ax;
const int nm1 = n - 1; // Used often

// Per Hadley's suggestion to create helper pointer variable once
// http://adv-r.had.co.nz/C-interface.html#c-vectors - Accessing vector data
double *px = REAL(x);
double *pa = REAL(a);

// ret will be the final value returned to R. In the EFTHorner portion of the
// algorithm it replaces s0. In the HornerSum portion it replaces r0.
SEXP ret = PROTECT(allocVector(REALSXP, m));
double *pret = REAL(ret);

// Correction vector for compensated Horner
double correction[m];

// Since we will return 0 if n = 0---which is technically legal if asking for
// the best constant estimate or using rational to test polynomial---we must
// initialize the ret vector and the pi and sigma matrices with 0.
// initialize the ret vector and the correction vector to 0.
memset(pret, 0, m * sizeof(double));
memset(piM, 0, m * nm1 * sizeof(double));
memset(sigM, 0, m * nm1 * sizeof(double));
memset(correction, 0, m * sizeof(double));

// If n is at least 1, initialize ret with the last value of a. If n == 1 then
// there is no need to calculate piM or sigM.
// there is no need to calculate piM or sigM and correction is already 0.
if (n > 0) {
for (int i = 0; i < m; ++i) {
pret[i] = pa[nm1];
Expand All @@ -73,12 +68,30 @@ extern SEXP compHorner_c(SEXP x, SEXP a) {
// polynomial. The algorithm will follow Langlois et al.(2006), which as a
// Horner method, starts at the end and works backwards.
//
// Here is where piM and sigM are needed, but nm1 > 0 so they will be properly
// initialized.
//
// Also, looking at the algorithm, one doesn't need the entire matrix---just
// the "last" value. And one can simply overwrite old with new since each
// cell, once calculated, is never called on for a new cell. Since in C we are
// working one cell at a time anyway, we can read the old and write the new to
// it directly.
//
// The first loop will calculate the "standard" return and the pi and sigma
// matrices. The second loop applies the correction. Now, ASAN/UBSAN should
// not complain since piM and sigM are only defined when nm1 > 0.
//
// (AA: 2023-08-23)

if (n > 1) {
// x element of twoProdFMA used more than once so define as variable
volatile double Ax;
double piM[nm1][m];
double sigM[nm1][m];
memset(piM, 0, m * nm1 * sizeof(double));
memset(sigM, 0, m * nm1 * sizeof(double));

// Error-Free-Transformation (EFT) Horner portion of Langlois et al. (2006)
for (int j = nm1; j-- > 0; ) {
for (int i = 0; i < m; ++i) {
Ax = pret[i] * px[i];
Expand All @@ -87,25 +100,16 @@ extern SEXP compHorner_c(SEXP x, SEXP a) {
sigM[j][i] = twoSumy(Ax, pa[j]);
}
}
}

// Horner Sum portion of Langlois et al. (2006)
// Use correction vector instead of matrix, as above, and overwrite as we
// traverse, as above. Initialize and set as 0 which will be the result if a
// is of length 1 (constant polynomial).
double correction[m];
memset(correction, 0, m * sizeof(double));

// Now build "upwards" if needed.
if (nm1 > 0) {
for (int j = nm1; j-- > 0; ) {
for (int i = 0; i < m; ++i) {
correction[i] *= px[i];
correction[i] += piM[j][i] + sigM[j][i];
// Horner Sum portion of Langlois et al. (2006)
if (nm1 > 0) {
for (int j = nm1; j-- > 0; ) {
for (int i = 0; i < m; ++i) {
correction[i] *= px[i];
correction[i] += piM[j][i] + sigM[j][i];
}
}
}
}

// Add correction to value
for (int i = 0; i < m; ++i) {
pret[i] += correction[i];
Expand Down

0 comments on commit d8f35d2

Please sign in to comment.