A fast algorithm for maximum normalized subarray sum?

2 Answers

UPDATE

I added a version of estabroo's Kadane-based code to the testing below. It seems to show a difference of up to 10% in my testing (run the snippet for random tests).

(End update)

The best I could come up with so far as an approximation is a binary search on the target with random samples of window-size during the search O(log m * n * num_samples_constant), where m is the range. In testing I saw the variation between brute-force (limited to 5000 element array, ranged ±1000000000) and the latter vary between 0 and 30% with a sample size of 200 window-lengths. (Maybe another routine could further refine?)

The JavaScript code below runs 10 tests and reports smallest and largest diffs, followed by just the binary search on a longer array.

Other thoughts included using FFT to generate the sums but I don't know if there could be an efficient way to then correlate each sum with the subarray-length that generated it; as well as trying to find another representation of the problem:

f = sqrt(i - j) * (si - sj), for j < i
f^2 = sqrt(i - j) * (si - sj) * sqrt(i - j) * (si - sj)
    = (i - j) * (si^2 - 2si*sj + sj^2)
    = i*si^2 - 2i*si*sj + i*sj^2
      -j*si^2 + 2j*si*sj - j*sj^2

    = i*si^2 + 
      (-2sj, sj^2, -j, 2j*sj, -j*sj^2) // known before i
        dot (i*si, 1, si^2, si, 1)

(So if we solved a 5-dimensional convex hull update in log time, the 5-dimensional extreme point problem, and figured out if our candidate was positive or negative, we'd be good to go :)

function prefix_sums(A){
  let ps = new Array(A.length + 1).fill(0)
  for (let i=0; i<A.length; i++)
    ps[i + 1] = A[i] + ps[i]
  return ps
}

function brute_force(ps){
  let best = -Infinity
  let best_idxs = [-1, -1]
  for (let i=1; i<ps.length; i++){
    for (let j=0; j<i; j++){
      let s = (ps[i] - ps[j]) / Math.sqrt(i - j)
      if (s > best){
        best = s
        best_idxs = [j, i - 1]
      }
    }
  }
  return [best, best_idxs]
}

function get_high(A){
  return A.reduce((acc, x) => x > 0 ? acc + x : acc, 0)
}

function get_low(A){
  return Math.min.apply(null, A)
}


function f(A){
  let n = A.length
  let ps = prefix_sums(A)
  let high = get_high(A)
  let low = get_low(A)
  let best = [-1, -1]
  let T = low + (high - low) / 2
  let found = false

  while (low + EPSILON < high){
    T = low + (high - low) / 2
    // Search for T
    found = false

    for (let l=0; l<NUM_SAMPLES; l++){
      let w = Math.max(1, ~~(Math.random() * (n + 1)))

      for (let i=w; i<ps.length; i++){
        let s = (ps[i] - ps[i - w]) / Math.sqrt(w)
        if (s >= T){
          found = true
          best = [i - w, i - 1]
          break
        }
      }
      if (found)
        break
    }
    // Binary search
    if (found)
      low = T
    else
      high = T - EPSILON 
  }

  return [low, best]
}

function max_subarray(A){
    var max_so_far = max_ending_here = A[0]
    var startOld = start = end = 0
    var divb = divbo = 1
    //for i, x in enumerate(A[1:], 1):
    for (let i=1; i<A.length; i++){
        var x = A[i]
        divb = i - start + 1
        divbo = divb - 1
        if (divb <= 1){
            divb = 1
            divbo = 1
        }
        undo = max_ending_here * Math.sqrt(divbo)
        max_ending_here = Math.max(x, (undo + x)/Math.sqrt(divb))
        if (max_ending_here == x)
            start = i
        max_so_far = Math.max(max_so_far, max_ending_here)
        if (max_ending_here < 0)
            start = i + 1
        else if (max_ending_here == max_so_far){
            startOld = start
            end = i
        }
    }
    if (end == A.length-1){
        start = startOld + 1
        var new_max = max_so_far
        divbo = end - startOld + 1
        divb = divbo - 1
        while (start < end){
            new_max = (new_max * Math.sqrt(divbo) - A[start-1])/Math.sqrt(divb)
            if (new_max > max_so_far){
                max_so_far = new_max
                startOld = start
            }
            start += 1
        }
    }
    return [max_so_far , startOld, end]
}

const EPSILON = 1
const NUM_SAMPLES = 200

let m = 1000000000
let n = 5000
let A

let max_diff = 0
let min_diff = Infinity
let max_diff2 = 0
let min_diff2 = Infinity
let num_tests = 10

for (let i=0; i<num_tests; i++){
  A = []
  for (let i=0; i<n; i++)
    A.push([-1, 1][~~(2 * Math.random())] * Math.random() * m + Math.random())

  let f_A = f(A)
  let g_A = brute_force(prefix_sums(A))
  let m_A = max_subarray(A)
  let diff = (g_A[0] - f_A[0]) / g_A[0]
  max_diff = Math.max(max_diff, diff)
  min_diff = Math.min(min_diff, diff)
  let diff2 = (g_A[0] - m_A[0]) / g_A[0]
  max_diff2 = Math.max(max_diff2, diff2)
  min_diff2 = Math.min(min_diff2, diff2)
}

console.log(`${ n } element array`)
console.log(`${ num_tests } tests`)
console.log(`min_diff: ${ min_diff * 100 }%`)
console.log(`max_diff: ${ max_diff * 100 }%`)
console.log(`min_diff (Kadane): ${ min_diff2 * 100 }%`)
console.log(`max_diff (Kadane): ${ max_diff2 * 100 }%`)

n = 100000
A = []
for (let i=0; i<n; i++)
  A.push([-1, 1][~~(2 * Math.random())] * Math.random() * m + Math.random())

var start = +new Date()
console.log(`\n${ n } element array`)
console.log(JSON.stringify(f(A)))
console.log(`${ (new Date() - start) / 1000 } seconds`)

142

answered Sep 28 '22 09:09

גלעד ברקן

Interesting problem. Since you mentioned an interest in approximations, here's a 1−O(ε) approximation scheme that runs in O(nε⁻¹) time. It has the nice property of using only + and max, sidestepping the problem of catastrophic cancellation brought on by subtracting prefix sums. (Since the input contains negative numbers, it's still possible to get catastrophic cancellation, but then we could settle for the subarray containing only the large positive integer involved.)

Let k = ceil(ε⁻¹). In O(nε⁻¹) time we can evaluate every subarray of length between 1 and k using a straightforward algorithm. We evaluate longer subarrays in an approximate manner by iteratively coarsening the input and running basically the same algorithm. Since the input shrinks by a constant factor in each iteration, the total running time is O(nε⁻¹).

The coarsening procedure works as follows. We keep three derived arrays of the same length. Each position in the derived arrays corresponds to a subarray of length 2^ℓ in the original input. The three derived arrays are S (each element is the maximum sum of a suffix of the corresponding subarray), A (each element is the sum of the corresponding subarray), and P (each element is maximum sum of a prefix of the corresponding subarray). Considering S[i] + A[i+1] + … + A[j−1] + P[j] for some i < j, we have the maximum sum of a subarray of the input that starts in the subarray corresponding to position i and ends in the subarray corresponding to position j. The length of the subarray attaining this sum is between 2^ℓ (j−i−1) + 2 and 2^ℓ (j−i+1). This suffices to bound the objective (if the sum is positive, use the latter as the estimate for the number of elements; if the sum is negative, use the former) and can thus can approximately compare it to other subarrays.

To derive S′, A′, P′ from S, A, P in each iteration, we compute S′[i] = max(S[2i]+A[2i+1], S[2i+1]) and A′[i] = A[2i] + A[2i+1] and P′[i] = max(P[2i], A[2i] + P[2i+1]). If index 2i exists but 2i+1 does not, we set S′[2i] = S[2i] and A′[2i] = A[2i] and P′[2i] = P[2i].

The proof sketch of 1−O(ε) approximation is that, given the optimal subarray, we find the least ℓ such that its length is at most 2^ℓ−1k. Then we look at iteration ℓ, find the i and j, observe that S[i] + A[i+1] + … + A[j−1] + P[j] is at least as large as the sum of the optimal subarray, and bound the loss incurred by rounding up the denominator by a multiplicative factor of 1+O(ε).

answered Sep 28 '22 09:09

David Eisenstat

Related questions
                            
                                How to determine string S can be made from string T by deleting some characters, but at most K successive characters
                            
                                Querying on Java Object's without Database
                            
                                Curious memory consumption of pandas.unique()
                            
                                What is the best algorithm for this array-comparison problem?
                            
                                Finding k-nearest neighbors for a given vector?
                            
                                Route problem in a graph: minimize average edge cost instead of total cost
                            
                                Brodal priority queue implementation
                            
                                Which floating-point comparison is more accurate, and why?
                            
                                To find all the repeating substring in a given string
                            
                                amazon interview prob
                            
                                Programming Challenge: Easiest way to code this grid?
                            
                                How to implement solution for Race simulation problems
                            
                                Cycle finding algorithm
                            
                                How to work on big integers that don't fit into any of language's data structures
                            
                                Are there known implementations of the CIEDE2000 or CIE94 Delta-E color difference calculation algorithm?
                            
                                Seating people in a movie theater
                            
                                Find largest cuboid containing only 1's in an NxNxN binary array
                            
                                Difference between DIjkstra and BellmanFord algorithm [closed]
                            
                                Integer division by 7
                            
                                Given array and key, find the number of elements smaller than or equal to itself in its left and right sub array?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

A fast algorithm for maximum normalized subarray sum?

Tags:

algorithm

graffe

People also ask

2 Answers

UPDATE

גלעד ברקן

David Eisenstat

Recent Activity

Donate For Us