/* Copyright (c) 2007 Massachusetts Institute of Technology
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/* Generation of Sobol sequences in up to 1111 dimensions, based on
   the algorithms described in: P. Bratley and B. L. Fox, Algorithm
   659, ACM Trans.  Math. Soft. 14 (1), 88-100 (1988), as modified by:
   S. Joe and F. Y. Kuo, ACM Trans. Math. Soft 29 (1), 49-57 (2003).

   Note that the code below was written without even looking at the
   Fortran code from the TOMS paper, which is only semi-free (being
   under the restrictive ACM copyright terms).  Then I went to the
   Fortran code and took out the table of primitive polynomials and
   starting direction #'s ... since this is just a table of numbers
   generated by a deterministic algorithm, it is not copyrightable.
   (Obviously, the format of these tables then necessitated some
   slight modifications to the code.)

   For the test integral of Joe and Kuo (see the main() program
   below), I get exactly the same results for integrals up to 1111
   dimensions compared to the table of published numbers (to the 5
   published significant digits).

   This is not to say that the authors above should not be credited
   for their clear description of the algorithm (and their tabulation
   of the critical numbers).  Please cite them.  Just that I needed a
   free/open-source implementation. */

#include "common.h"

#include <stdlib.h>
#include <math.h>

#include "bu/assert.h"
#include "bu/malloc.h"
#include "bn/rand.h"
#include "bn/sobol.h"
#include "soboldata.h"


/* Period parameters */
#define NL_N 624
#define NL_M 397
#define NL_MATRIX_A 0x9908b0dfUL   /* constant vector a */
#define NL_UPPER_MASK 0x80000000UL /* most significant w-r bits */
#define NL_LOWER_MASK 0x7fffffffUL /* least significant r bits */

/* Maximum supported dimension of Sobol output array */
#define SOBOL_MAXDIM 1111


struct bn_soboldata {
    unsigned sdim; /* dimension of sequence being generated */
    uint32_t *mdata; /* array of length 32 * sdim */
    uint32_t *m[32]; /* more convenient pointers to mdata, of direction #s */
    uint32_t *x; /* previous x = x_n, array of length sdim */
    unsigned *b; /* position of fixed point in x[i] is after bit b[i] */
    uint32_t n; /* number of x's generated so far */
    uint32_t NL_mt[NL_N]; /* the array for the state vector  */
    int NL_mti; /* mti==N+1 means mt[N] is not initialized */
    double *cvec;  /* The current sequence vector */
};


/* initializes NLmt[N] with a seed */
static void nlopt_init_genrand(struct bn_soboldata *sd, unsigned long s)
{
    sd->NL_mt[0]= s & 0xffffffffUL;
    for (sd->NL_mti=1; sd->NL_mti<NL_N; sd->NL_mti++) {
	sd->NL_mt[sd->NL_mti] = (1812433253UL * (sd->NL_mt[sd->NL_mti-1] ^ (sd->NL_mt[sd->NL_mti-1] >> 30)) + sd->NL_mti);
	/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
	/* In the previous versions, MSBs of the seed affect   */
	/* only MSBs of the array mt[].                        */
	/* 2002/01/09 modified by Makoto Matsumoto             */
	sd->NL_mt[sd->NL_mti] &= 0xffffffffUL;
	/* for >32 bit machines */
    }
}


/* generates a random number on [0, 0xffffffff]-interval */
static uint32_t nlopt_genrand_int32(struct bn_soboldata *sd)
{
    uint32_t y;
    static uint32_t mag01[2]={0x0UL, NL_MATRIX_A};

    if (sd->NL_mti >= NL_N) { /* generate N words at one time */
	int kk;

	if (sd->NL_mti == NL_N+1)   /* if init_genrand() has not been called, */
	    nlopt_init_genrand(sd, 5489UL); /* a default initial seed is used */

	for (kk=0;kk<NL_N-NL_M;kk++) {
	    y = (sd->NL_mt[kk]&NL_UPPER_MASK)|(sd->NL_mt[kk+1]&NL_LOWER_MASK);
	    sd->NL_mt[kk] = sd->NL_mt[kk+NL_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
	}
	for (;kk<NL_N-1;kk++) {
	    y = (sd->NL_mt[kk]&NL_UPPER_MASK)|(sd->NL_mt[kk+1]&NL_LOWER_MASK);
	    sd->NL_mt[kk] = sd->NL_mt[kk+(NL_M-NL_N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
	}
	y = (sd->NL_mt[NL_N-1]&NL_UPPER_MASK)|(sd->NL_mt[0]&NL_LOWER_MASK);
	sd->NL_mt[NL_N-1] = sd->NL_mt[NL_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];

	sd->NL_mti = 0;
    }

    y = sd->NL_mt[sd->NL_mti++];

    /* Tempering */
    y ^= (y >> 11);
    y ^= (y << 7) & 0x9d2c5680UL;
    y ^= (y << 15) & 0xefc60000UL;
    y ^= (y >> 18);

    return y;
}


/* generates a random number on [0, 1) with 53-bit resolution*/
static double nlopt_genrand_res53(struct bn_soboldata *sd)
{
    uint32_t a=nlopt_genrand_int32(sd)>>5;
    uint32_t b=nlopt_genrand_int32(sd)>>6;
    return(a*67108864.0+b)*(1.0/9007199254740992.0);
}
/* These real versions are due to Isaku Wada, 2002/01/09 added */


/* generate uniform random number in [a, b) with 53-bit resolution,
 * added by SGJ.  Not static because we use this in libbn testing, but
 * it is not public API. */
BN_EXPORT double _sobol_urand(struct bn_soboldata *sd, double a, double b)
{
    return(a + (b - a) * nlopt_genrand_res53(sd));
}


/* Return position (0, 1, ...) of rightmost (least-significant) zero bit in n.
 *
 * This code uses a 32-bit version of algorithm to find the rightmost
 * one bit in Knuth, _The Art of Computer Programming_, volume 4A
 * (draft fascicle), section 7.1.3, "Bitwise tricks and techniques."
 *
 * Assumes n has a zero bit, i.e. n < 2^32 - 1.
 *
 */
static unsigned rightzero32(uint32_t n)
{
#if defined(__GNUC__) && \
    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ > 3)
    return __builtin_ctz(~n); /* gcc builtin for version >= 3.4 */
#else
    const uint32_t a = 0x05f66a47; /* magic number, found by brute force */
    static const unsigned decode[32] = {0, 1, 2, 26, 23, 3, 15, 27, 24, 21, 19, 4, 12, 16, 28, 6, 31, 25, 22, 14, 20, 18, 11, 5, 30, 13, 17, 10, 29, 9, 8, 7};
    n = ~n; /* change to rightmost-one problem */
    n = a * (n & (~n + 1u)); /* store in n to make sure mult. is 32 bits */
    return decode[n >> 27];
#endif
}


/* generate the next term x_{n+1} in the Sobol sequence, as an array
   x[sdim] of numbers in (0, 1).  Returns 1 on success, 0 on failure
   (if too many #'s generated) */
static int sobol_gen(struct bn_soboldata *sd, double *x)
{
    unsigned c, b, i, sdim;

    if (sd->n == 4294967295U)
	return 0; /* n == 2^32 - 1 ... we would need to switch to a
		     64-bit version to generate more terms. */

    c = rightzero32(sd->n++);
    sdim = sd->sdim;
    for (i = 0; i < sdim; ++i) {
	b = sd->b[i];
	if (b >= c) {
	    sd->x[i] ^= sd->m[c][i] << (b - c);
	    x[i] = ((double) (sd->x[i])) / (1U << (b+1));
	}
	else {
	    sd->x[i] = (sd->x[i] << (c - b)) ^ sd->m[c][i];
	    sd->b[i] = c;
	    x[i] = ((double) (sd->x[i])) / (1U << (c+1));
	}
    }
    return 1;
}


/* next vector x[sdim] in Sobol sequence, with each x[i] in (0, 1) */
static void sobol_next_01(struct bn_soboldata *s)
{
    if (!sobol_gen(s, s->cvec)) {
	/* fall back on pseudo random numbers in the unlikely event
	   that we exceed 2^32-1 points */
	unsigned int i;
	for (i = 0; i < s->sdim; ++i)
	    s->cvec[i] = _sobol_urand(s, 0.0, 1.0);
    }
}


static int sobol_init(struct bn_soboldata *sd, unsigned sdim, unsigned long seed)
{
    unsigned i, j;

    if (!sdim || sdim > MAXDIM)
	return 0;

    sd->mdata = (uint32_t *)bu_calloc(sdim * 32, sizeof(uint32_t), "sobol mdata");

    /* mti==N+1 means mt[N] is not initialized */
    sd->NL_mti=NL_N+1;

    for (j = 0; j < 32; ++j) {
	sd->m[j] = sd->mdata + j * sdim;
	sd->m[j][0] = 1; /* special-case Sobol sequence */
    }
    for (i = 1; i < sdim; ++i) {
	uint32_t a = sobol_a[i-1];
	unsigned d = 0, k;

	while (a) {
	    ++d;
	    a >>= 1;
	}
	d--; /* d is now degree of poly */

	/* set initial values of m from table */
	for (j = 0; j < d; ++j)
	    sd->m[j][i] = sobol_minit[j][i-1];

	/* fill in remaining values using recurrence */
	for (j = d; j < 32; ++j) {
	    a = sobol_a[i-1];
	    sd->m[j][i] = sd->m[j - d][i];
	    for (k = 0; k < d; ++k) {
		sd->m[j][i] ^= ((a & 1) * sd->m[j-d+k][i]) << (d-k);
		a >>= 1;
	    }
	}
    }

    sd->x = (uint32_t *)bu_calloc(sdim, sizeof(uint32_t), "sobol x");
    sd->b = (unsigned *)bu_calloc(sdim, sizeof(unsigned), "sobol b");

    for (i = 0; i < sdim; ++i) {
	sd->x[i] = 0;
	sd->b[i] = 0;
    }

    sd->n = 0;
    sd->sdim = sdim;

    if (seed)
	nlopt_init_genrand(sd, seed);

    return 1;
}


/********************************************************************/
/* BN API for Sobol sequences */


struct bn_soboldata *
bn_sobol_create(unsigned int sdim, unsigned long seed)
{
    struct bn_soboldata *s = NULL;
    BU_ASSERT(sdim <= BN_SOBOL_MAXDIM);

    s = (struct bn_soboldata *)bu_calloc(1, sizeof(struct bn_soboldata), "sobol data");
    s->cvec = (double *)bu_calloc(SOBOL_MAXDIM, sizeof(double), "results array");

    sobol_init(s, sdim, seed);

    return s;
}


void
bn_sobol_destroy(struct bn_soboldata *sd)
{
    if (!sd)
	return;

    bu_free(sd->mdata, "sobol mdata");
    bu_free(sd->x, "sobol x");
    bu_free(sd->b, "sobol b");
    bu_free(sd->cvec, "sobol cvec");
    bu_free(sd, "sobol");
}


double *
bn_sobol_next(struct bn_soboldata *s, const double *lb, const double *ub)
{
    unsigned int i;

    sobol_next_01(s);

    if (lb && ub) {
	for (i = 0; i < s->sdim; ++i) {
	    s->cvec[i] = lb[i] + (ub[i] - lb[i]) * s->cvec[i];
	}
    }

    return s->cvec;
}


void
bn_sobol_skip(struct bn_soboldata *s, unsigned n)
{
    if (!s)
	return;

    unsigned int k = 1;
    while (k*2 < n)
	k *= 2;
    while (k-- > 0)
	sobol_gen(s, s->cvec);
}


/*
 * Local Variables:
 * mode: C
 * tab-width: 8
 * indent-tabs-mode: t
 * c-file-style: "stroustrup"
 * End:
 * ex: shiftwidth=4 tabstop=8
 */

