Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

randomInt function that can uniformly handle the full range of MIN and MAX_SAFE_INTEGER

Requirements and background

I want a generic randomInt function that can handle a range of values upto and including Number.MIN_SAFE_INTEGER to Number.MAX_SAFE_INTEGER and that the values returned are uniformly distributed.

So, I started at MDN and looked at the Math.random page. They give an example, which appears to be uniformly distributed.

// Returns a random integer between min (included) and max (excluded)
// Using Math.round() will give you a non-uniform distribution!
function getRandomInt(min, max) {
  return Math.floor(Math.random() * (max - min)) + min;
}

But it comes with the following note.

Note that as numbers in JavaScript are IEEE 754 floating point numbers with round-to-nearest-even behavior, the ranges claimed for the functions below (excluding the one for Math.random() itself) aren't exact. If extremely large bounds are chosen (2^53 or higher), it's possible in extremely rare cases to calculate the usually-excluded upper bound.

I'm wanting to use the range -(2^53 - 1) and 2^53 - 1, so I believe that this note does not apply. I then notice max - min: this is going to be a problem for the larger ranges that I have specified:

Example - max range

Number.MAX_SAFE_INTEGER - Number.MIN_SAFE_INTEGER > Number.MAX_SAFE_INTEGER

Solution 1 - not a solution

Off I go and have a little play and come up with the following code, based on the MDN example and my requirements.

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        throw new RangeError('Difference of max and min is greater than Number.MAX_SAFE_INTEGER: ' + tmp);
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }
    
    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

But as you can see, this is going to throw an error well before the larger ranges that I require.

Solution 2 - solves the math issue but seems to break uniformity

So I have a fiddle and come up with the following.

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        if (Math.floor(Math.random() * 2)) {
            val = Math.floor(Math.random() * (max - 0 + 1)) + 0;
        } else {
            val = Math.floor(Math.random() * (0 - min + 1)) + min;
        }
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }
    
    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

While we no longer throw an error and the math appears to be within the max safe integer values, I am not sure exactly how this has effected the uniform distribution of the original MDN example (if it was uniformly distributed)?

My testing seems to suggest that this breaks the uniform distribution.

Graph of distribution

function getData() {
  var x = {},
    c = 1000000,
    min = -20,
    max = 20,
    q,
    i;

  for (i = 0; i < c; i += 1) {
    if (Math.floor(Math.random() * 2)) {
      q = Math.floor(Math.random() * (max - 0 + 1)) + 0;
    } else {
      q = Math.floor(Math.random() * (1 - min + 1)) + min;
    }

    if (!x[q]) {
      x[q] = 1;
    } else {
      x[q] += 1;
    }
  };

  return Object.keys(x).sort(function(x, y) {
    return x - y;
  }).map(function(key, index) {
    return {
      'q': +key,
      'p': (x[key] / c) * 100
    };
  });
}

var data = getData(),
  margin = {
    top: 20,
    right: 20,
    bottom: 30,
    left: 50
  },
  width = 960 - margin.left - margin.right,
  height = 500 - margin.top - margin.bottom,
  x = d3.scale.linear().range([0, width]),
  y = d3.scale.linear().range([height, 0]),
  xAxis = d3.svg.axis().scale(x).orient("bottom"),
  yAxis = d3.svg.axis().scale(y).orient("left"),
  line = d3.svg.line().x(function(d) {
    return x(d.q);
  }).y(function(d) {
    return y(d.p);
  }),
  svg = d3.select("body").append("svg")
  .attr("width", width + margin.left + margin.right)
  .attr("height", height + margin.top + margin.bottom)
  .append("g")
  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

x.domain(d3.extent(data, function(d) {
  return d.q;
}));

y.domain(d3.extent(data, function(d) {
  return d.p;
}));

svg.append("g")
  .attr("class", "x axis")
  .attr("transform", "translate(0," + height + ")")
  .call(xAxis);

svg.append("g")
  .attr("class", "y axis")
  .call(yAxis);

svg.append("path")
  .datum(data)
  .attr("class", "line")
  .attr("d", line);
body {
  font: 10px sans-serif;
}
.axis path,
.axis line {
  fill: none;
  stroke: #000;
  shape-rendering: crispEdges;
}
.line {
  fill: none;
  stroke: steelblue;
  stroke-width: 1.5px;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>

Solution 3 - not a solution

So on I press and take a look creating Box-Muller Transform function for creating the random normally distributed range that I thought I required (but my mistake I wanted uniformly distributed). I did some reading and chose rejection sampling as the method to generate observations from a distribution. Found out how to calculate the deviation for a range without having to use Math.sqrt:

If the value of x is negative, Math.sqrt() returns NaN

This is what I came up with.

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

var boxMullerRandom = (function () {
    var phase = 0,
        RAND_MAX,
        array,
        random,
        x1, x2, w, z;

    if (crypto && crypto.getRandomValues) {
        RAND_MAX = Math.pow(2, 32) - 1;
        array = new Uint32Array(1);
        random = function () {
            crypto.getRandomValues(array);

            return array[0] / RAND_MAX;
        };
    } else {
        random = Math.random;
    }

    return function () {
        if (!phase) {
            do {
                x1 = 2.0 * random() - 1.0;
                x2 = 2.0 * random() - 1.0;
                w = x1 * x1 + x2 * x2;
            } while (w >= 1.0);

            w = Math.sqrt((-2.0 * Math.log(w)) / w);
            z = x1 * w;
        } else {
            z = x2 * w;
        }

        phase ^= 1;

        return z;
    }
}());

function rejectionSample(stdev, mean, from, to) {
    var retVal;
    
    do {
        retVal = (boxMullerRandom() * stdev) + mean;
    } while (retVal < from || to < retVal);

    return retVal;
}

function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = {};
    tmp.mean = (min / 2) + (max / 2);
    tmp.variance = (Math.pow(min - tmp.mean, 2) + Math.pow(max - tmp.mean, 2)) / 2;
    tmp.deviation = Math.sqrt(tmp.variance);
    console.log(tmp);
    return Math.floor(rejectionSample(tmp.deviation, tmp.mean, min, max + 1));
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

Not sure that I have done everything correctly (haven't broken the normal distribution), but on small integer ranges I am seeing the correct range of random integers generated.

But there is still a problem when I use the maximum limits of the range (or actually before those limits). The math still goes outside of the Number.MAX_SAFE_INTEGER value. Output from above console.log(tmp);

{mean: 0, variance: 8.112963841460666e+31, deviation: 9007199254740991} 

As you can see the calculated variance is not safe. This algorithm can be ignored due to my confusion in distribution types.

Graph of distribution

I've included this so that you can see that I was actually quite close to having this work as a normal distribution, even though this is not what I actually required. It may aid someone that is looking to perform this kind of distribution.

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function(inputArg) {
  var number = +inputArg,
    val = 0;

  if (number === number) {
    if (!number || number === Infinity || number === -Infinity) {
      val = number;
    } else {
      val = (number > 0 || -1) * Math.floor(Math.abs(number));
    }
  }

  return val;
};

function clampSafeInt(number) {
  return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

var boxMullerRandom = (function() {
  var phase = 0,
    RAND_MAX,
    array,
    random,
    x1, x2, w, z;

  if (crypto && crypto.getRandomValues) {
    RAND_MAX = Math.pow(2, 32) - 1;
    array = new Uint32Array(1);
    random = function() {
      crypto.getRandomValues(array);

      return array[0] / RAND_MAX;
    };
  } else {
    random = Math.random;
  }

  return function() {
    if (!phase) {
      do {
        x1 = 2.0 * random() - 1.0;
        x2 = 2.0 * random() - 1.0;
        w = x1 * x1 + x2 * x2;
      } while (w >= 1.0);

      w = Math.sqrt((-2.0 * Math.log(w)) / w);
      z = x1 * w;
    } else {
      z = x2 * w;
    }

    phase ^= 1;

    return z;
  }
}());

function rejectionSample(stdev, mean, from, to) {
  var retVal;

  do {
    retVal = (boxMullerRandom() * stdev) + mean;
  } while (retVal < from || to < retVal);

  return retVal;
}

function randomInt(min, max) {
  var tmp,
    val;

  if (arguments.length === 1) {
    max = min;
    min = 0;
  }

  min = clampSafeInt(min);
  max = clampSafeInt(max);
  if (min > max) {
    tmp = min;
    min = max;
    max = tmp;
  }

  tmp = {};
  tmp.mean = (min / 2) + (max / 2);
  tmp.variance = (Math.pow(min - tmp.mean, 2) + Math.pow(max - tmp.mean, 2)) / 2;
  tmp.deviation = Math.sqrt(tmp.variance);

  return Math.floor(rejectionSample(tmp.deviation, tmp.mean, min, max + 1));
}

function getData() {
  var x = {},
    c = 1000000,
    q,
    i;

  for (i = 0; i < c; i += 1) {
    q = randomInt(-9, 3);
    if (!x[q]) {
      x[q] = 1;
    } else {
      x[q] += 1;
    }
  };

  return Object.keys(x).sort(function(x, y) {
    return x - y;
  }).map(function(key) {
    return {
      'q': +key,
      'p': x[key] / c
    };
  });
}

var data = getData(),
  margin = {
    top: 20,
    right: 20,
    bottom: 30,
    left: 50
  },
  width = 960 - margin.left - margin.right,
  height = 500 - margin.top - margin.bottom,
  x = d3.scale.linear().range([0, width]),
  y = d3.scale.linear().range([height, 0]),
  xAxis = d3.svg.axis().scale(x).orient("bottom"),
  yAxis = d3.svg.axis().scale(y).orient("left"),
  line = d3.svg.line().x(function(d) {
    return x(d.q);
  }).y(function(d) {
    return y(d.p);
  }),
  svg = d3.select("body").append("svg")
  .attr("width", width + margin.left + margin.right)
  .attr("height", height + margin.top + margin.bottom)
  .append("g")
  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

x.domain(d3.extent(data, function(d) {
  return d.q;
}));

y.domain(d3.extent(data, function(d) {
  return d.p;
}));

svg.append("g")
  .attr("class", "x axis")
  .attr("transform", "translate(0," + height + ")")
  .call(xAxis);

svg.append("g")
  .attr("class", "y axis")
  .call(yAxis);

svg.append("path")
  .datum(data)
  .attr("class", "line")
  .attr("d", line);
body {
  font: 10px sans-serif;
}
.axis path,
.axis line {
  fill: none;
  stroke: #000;
  shape-rendering: crispEdges;
}
.line {
  fill: none;
  stroke: steelblue;
  stroke-width: 1.5px;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>

What is or is there a solution?

So, what am I missing? Is there some simple method that I have overlooked? Must I use a big number library as a solution? How to test the distribution: I have some graphs that I'm plotting, which is fine for a small range but the larger ranges are just not possible?

Please put me out of my misery on this one. :)

like image 306
Xotic750 Avatar asked Feb 11 '15 18:02

Xotic750


2 Answers

A solution that works, and the route that I have gone down is to use a BigNumber library. I still feel that there must be a solution to this problem without the need to rely on a BigNumber library, but I haven't found any other way as of yet.

console.log(window);
Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
    val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        tmp = new Big(max).minus(min).plus(1);
        val = Math.floor(tmp.times(Math.random())) + min;
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }

    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));
<script src="https://rawgithub.com/MikeMcl/big.js/master/big.min.js"></script>
like image 150
Xotic750 Avatar answered Oct 10 '22 05:10

Xotic750


For anyone else stumbling on this old question: there's a way to completely side-step the numeric boundary issues the OP describes by altogether avoiding interpreting the return value of Math.random() as a floating point type:

const all64RandomBits = Float64Array.of(Math.random()).buffer

The above line simply grabs the binary buffer underlying the Typed Array type that has the same IEEE 754 encoding as the JavaScript Number type: a Float64Array.

Once you have this BufferArray, you can go obtain whatever amount of random bits you need by choosing an appropriate integer view and/or applying bitwise ANDs, ORs, and shifts as needed. For example, want to convert all 64 random bits into 2 numbers each with an unsigned integer value in the range 0 - UINT32_MAX? All you need is this cute little one-liner:

const [x, y] = new Uint32Array(Float64Array.of(Math.random()).buffer)
like image 41
Will Avatar answered Oct 10 '22 04:10

Will