Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Find all k-size subsets with sum s of an n-size bag of duplicate unsorted positive integers

Please note that this is required for a C# .NET 2.0 project (Linq not allowed).

I know very similar questions have been asked here and I have already produce some working code (see below) but still would like an advice as to how to make the algorithm faster given k and s conditions.

This is what I've learnt so far: Dynamic programming is the most efficient way to finding ONE (not all) subsets. Please correct me if I am wrong. And is there a way of repeatedly calling the DP code to produce newer subsets till the bag (set with duplicates) is exhausted?

If not, then is there a way that may speed up the backtracking recursive algorithm I have below which does produce what I need but runs in O(2^n), I think, by taking s and k into account?

Here is my fixed bag of numbers that will NEVER change with n=114 and number range from 3 to 286:

    int[] numbers = new int[]
    {
        7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
        123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
        112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
        34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
        54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
        60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
        14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
        28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
        29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
        15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
        11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
        5, 4, 5, 6
    };

Requirements

  • Space limit to 2-3GB max but time should be O(n^something) not (something^n).

  • The bag must not be sorted and duplicate must not be removed.

  • The result should be the indices of the numbers in the matching subset, not the numbers themselves (as we have duplicates).

Dynamic Programming Attempt

Here is the C# dynamic programming version adapted from an answer to a similar question here on stackoverflow.com:

using System;
using System.Collections.Generic;

namespace Utilities
{
    public static class Combinations
    {
        private static Dictionary<int, bool> m_memo = new Dictionary<int, bool>();
        private static Dictionary<int, KeyValuePair<int, int>> m_previous = new Dictionary<int, KeyValuePair<int, int>>();
        static Combinations()
        {
            m_memo.Clear();
            m_previous.Clear();
            m_memo[0] = true;
            m_previous[0] = new KeyValuePair<int, int>(-1, 0);

        }

        public static bool FindSubset(IList<int> set, int sum)
        {
            //m_memo.Clear();
            //m_previous.Clear();
            //m_memo[0] = true;
            //m_previous[0] = new KeyValuePair<int, int>(-1, 0);

            for (int i = 0; i < set.Count; ++i)
            {
                int num = set[i];
                for (int s = sum; s >= num; --s)
                {
                    if (m_memo.ContainsKey(s - num) && m_memo[s - num] == true)
                    {
                        m_memo[s] = true;

                        if (!m_previous.ContainsKey(s))
                        {
                            m_previous[s] = new KeyValuePair<int, int>(i, num);
                        }
                    }
                }
            }

            return m_memo.ContainsKey(sum) && m_memo[sum];
        }
        public static IEnumerable<int> GetLastIndex(int sum)
        {
            while (m_previous[sum].Key != -1)
            {
                yield return m_previous[sum].Key;
                sum -= m_previous[sum].Value;
            }
        }

        public static void SubsetSumMain(string[] args)
        {
            int[] numbers = new int[]
        {
            7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6
        };

            int sum = 400;
            //int size = 4; // don't know to use in dynamic programming

            // call dynamic programming
            if (Numbers.FindSubset(numbers, sum))
            {
                foreach (int index in Numbers.GetLastIndex(sum))
                {
                    Console.Write((index + 1) + "." + numbers[index] + "\t");
                }
                Console.WriteLine();
            }
            Console.WriteLine();

            Console.ReadKey();
        }
    }
}

Recursive Programming Attempt

and Here is the C# recursive programming version adapted from an answer to a similar question here on stackoverflow.com:

using System;
using System.Collections.Generic;

namespace Utilities
{
    public static class Combinations
    {
        private static int s_count = 0;
        public static int CountSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
        {
            if ((numbers.Length <= index) || (current > sum)) return 0;
            if (result == null) result = new List<int>();

            List<int> temp = new List<int>(result);
            if (current + numbers[index] == sum)
            {
                temp.Add(index);
                if ((size == 0) || (temp.Count == size))
                {
                    s_count++;
                }
            }
            else if (current + numbers[index] < sum)
            {
                temp.Add(index);
                CountSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
            }

            CountSubsets(numbers, index + 1, current, sum, size, result);
            return s_count;
        }

        private static List<List<int>> m_subsets = new List<List<int>>();
        public static List<List<int>> FindSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
        {
            if ((numbers.Length <= index) || (current > sum)) return m_subsets;
            if (result == null) result = new List<int>();

            List<int> temp = new List<int>(result);
            if (current + numbers[index] == sum)
            {
                temp.Add(index);
                if ((size == 0) || (temp.Count == size))
                {
                    m_subsets.Add(temp);
                }
            }
            else if (current + numbers[index] < sum)
            {
                temp.Add(index);
                FindSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
            }

            FindSubsets(numbers, index + 1, current, sum, size, result);

            return m_subsets;
        }

        public static void SubsetSumMain(string[] args)
        {
            int[] numbers = new int[]
        {
            7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6
        };

            int sum = 17;
            int size = 2;

            // call backtracking recursive programming
            Console.WriteLine("CountSubsets");
            int count = Numbers.CountSubsets(numbers, 0, 0, sum, size, null);
            Console.WriteLine("Count = " + count);
            Console.WriteLine();

            // call backtracking recursive programming
            Console.WriteLine("FindSubsets");
            List<List<int>> subsets = Numbers.FindSubsets(numbers, 0, 0, sum, size, null);
            for (int i = 0; i < subsets.Count; i++)
            {
                if (subsets[i] != null)
                {
                    Console.Write((i + 1).ToString() + ":\t");
                    for (int j = 0; j < subsets[i].Count; j++)
                    {
                        int index = subsets[i][j];
                        Console.Write((index + 1) + "." + numbers[index] + " ");
                    }
                    Console.WriteLine();
                }
            }
            Console.WriteLine("Count = " + subsets.Count);

            Console.ReadKey();
        }
    }
}

Please let me know how to restrict the dynamic programming version to subsets of size k and if I can call it repeatedly so it returns different subsets on every call until there are no more matching subsets.

Also I am not sure where to initialize the memo of the DP algorithm. I did it in the static constructor which auto-runs when accessing any method. Is this the correct initialization place or does it need to be moved to inside the FindSunset() method [commented out]?

As for the recursive version, is it backtracking? and how can we speed it up. It works correctly and takes k and s into account but totally inefficient.

Let's make this thread the mother of all C# SubsetSum related questions!

like image 603
Ali Adams Avatar asked Oct 20 '22 12:10

Ali Adams


2 Answers

My previous answer works on the principle of cutting off the number of combinations to check. But this can be improved significantly once you sort the array. The principle is similar, but since the solution is entirely different, I've decided to put it in a separate answer.

I was careful to use only .Net Framework 2.0 features. Might add a visual explanation later, but the comments should be enough.

class Puzzle
{
    private readonly int[] _tailSums;
    public readonly SubsetElement[] Elements;
    public readonly int N;

    public Puzzle(int[] numbers)
    {
        // Set N and make Elements array
        // (to remember the original index of each element)
        this.N = numbers.Length;
        this.Elements = new SubsetElement[this.N];

        for (var i = 0; i < this.N; i++)
        {
            this.Elements[i] = new SubsetElement(numbers[i], i);
        }

        // Sort Elements descendingly by their Number value
        Array.Sort(this.Elements, (a, b) => b.Number.CompareTo(a.Number));

        // Save tail-sums to allow immediate access by index
        // Allow immedate calculation by index = N, to sum 0
        this._tailSums = new int[this.N + 1];
        var sum = 0;

        for (var i = this.N - 1; i >= 0; i--)
        {
            this._tailSums[i] = sum += this.Elements[i].Number;
        }
    }

    public void Solve(int s, Action<SubsetElement[]> callback)
    {
        for (var k = 1; k <= this.N; k++)
            this.Solve(k, s, callback);
    }

    public void Solve(int k, int s, Action<SubsetElement[]> callback)
    {
        this.ScanSubsets(0, k, s, new List<SubsetElement>(), callback);
    }

    private void ScanSubsets(int startIndex, int k, int s,
                             List<SubsetElement> subset, Action<SubsetElement[]> cb)
    {
        // No more numbers to add, and current subset is guranteed to be valid
        if (k == 0)
        {
            // Callback with current subset
            cb(subset.ToArray());
            return;
        }

        // Sum the smallest k elements
        var minSubsetStartIndex = this.N - k;
        var minSum = this._tailSums[minSubssetStartIndex];

        // Smallest possible sum is greater than wanted sum,
        // so a valid subset cannot be found
        if (minSum > s)
        {
            return;
        }

        // Find largest number that satisfies the condition
        // that a valid subset can be found
        minSum -= this.Elements[minSubsetStartIndex].Number;

        // But remember the last index that satisfies the condition
        var minSubsetEndIndex = minSubsetStartIndex;

        while (minSubsetStartIndex > startIndex &&
               minSum + this.Elements[minSubsetStartIndex - 1].Number <= s)
        {
            minSubsetStartIndex--;
        }

        // Find the first number in the sorted sequence that is
        // the largest number we just found (in case of duplicates)
        while (minSubsetStartIndex > startIndex &&
               Elements[minSubsetStartIndex] == Elements[minSubsetStartIndex - 1])
        {
            minSubsetStartIndex--;
        }

        // [minSubsetStartIndex .. maxSubsetEndIndex] is the
        // full range we must check in recursion

        for (var subsetStartIndex = minSubsetStartIndex;
             subsetStartIndex <= minSubsetEndIndex;
             subsetStartIndex++)
        {
            // Find the largest possible sum, which is the sum of the
            // k first elements, starting at current subsetStartIndex
            var maxSum = this._tailSums[subsetStartIndex] -
                         this._tailSums[subsetStartIndex + k];

            // The largest possible sum is less than the wanted sum,
            // so a valid subset cannot be found
            if (maxSum < s)
            {
                return;
            }

            // Add current number to the subset
            var x = this.Elements[subsetStartIndex];
            subset.Add(x);

            // Recurse through the sub-problem to the right
            this.ScanSubsets(subsetStartIndex + 1, k - 1, s - x.Number, subset, cb);

            // Remove current number and continue loop
            subset.RemoveAt(subset.Count - 1);
        }
    }

    public sealed class SubsetElement
    {
        public readonly int Number;
        public readonly int Index;

        public SubsetElement(int number, int index)
        {
            this.Number = number;
            this.Index = index;
        }

        public override string ToString()
        {
            return string.Format("{0}({1})", this.Number, this.Index);
        }
    }
}

Usage and performance testing:

private static void Main()
{
    var sw = Stopwatch.StartNew();
    var puzzle = new Puzzle(new[]
        {
            7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6
        });

    puzzle.Solve(2, 17, PuzzleOnSubsetFound);

    sw.Stop();
    Console.WriteLine("Subsets found: " + _subsetsCount);
    Console.WriteLine(sw.Elapsed);
}

private static int _subsetsCount;

private static void PuzzleOnSubsetFound(Puzzle.SubsetElement[] subset)
{
    _subsetsCount++;
    return; // Skip prints when speed-testing

    foreach (var el in subset)
    {
        Console.Write(el.ToString());
        Console.Write("  ");
    }

    Console.WriteLine();
}

Output:

Each line is a found subset, numbers in () are the original index of the number used in the subset

14(60) 3(107)
14(60) 3(109)
14(60) 3(102)
13(59) 4(105)
13(59) 4(111)
12(64) 5(96)
12(64) 5(104)
12(64) 5(112)
12(64) 5(110)
12(65) 5(96)
12(65) 5(104)
12(65) 5(112)
12(65) 5(110)
11(100) 6(108)
11(100) 6(113)
11(61) 6(108)
11(61) 6(113)
11(92) 6(108)
11(92) 6(113)
11(62) 6(108)
11(62) 6(113)
11(99) 6(108)
11(99) 6(113)
9(103) 8(93)
9(103) 8(94)
9(103) 8(97)
9(103) 8(98)
9(103) 8(101)
Subsets found: 28
00:00:00.0017020 (measured when no printing is performed)


The higher k is, the more cutoffs can be made. This is when you'll see the major performance difference. Your current code (the recursive version) also performs significantly slower when s is goes higher.

With k=5, s=60
Your current code: found 45070 subsets in 2.8602923 seconds
My code: found 45070 subsets in 0.0116727 seconds
That is 99.6% speed improvement

like image 73
SimpleVar Avatar answered Oct 22 '22 01:10

SimpleVar


There are multiple solutions, but nobody showed how to use dynamic programming to find the answer.

The key is to use dynamic programming to build up a data structure from which all solutions can later be found.

In addition to the requested function, I collected information about how many solutions there are, and wrote FindSolution(node, position) to return the solution at position position starting with node without calculating the rest. If you want all of them, using that function would be inefficient. But, for example, with this function it is doable to calculate the billionth way to express 10000 as a sum of 20 primes. That would not be feasible with the other approaches given.

using System;
using System.Collections.Generic;

public class SubsetSum
{
    public class SolutionNode<T>
    {
        // The index we found the value at
        public int Index {get; set;}
        // The value we add for this solution
        public T Value {get; set;}
        // How many solutions we have found.
        public int Count {get; set;}
        // The rest of this solution.
        public SolutionNode<T> Tail {get; set;}
        // The next solution.
        public SolutionNode<T> Next {get; set;}
    }

    // This uses dynamic programming to create a summary of all solutions.
    public static SolutionNode<int> FindSolution(int[] numbers, int target, int subsetSize)
    {
        // By how many are in our list, by what they sum to, what SolutionNode<int> has our answer?
        List<Dictionary<int, SolutionNode<int>>> solutionOf = new List<Dictionary<int, SolutionNode<int>>>();

        // Initialize empty solutions.
        for (int i = 0; i <= subsetSize; i++)
        {
            solutionOf.Add(new Dictionary<int, SolutionNode<int>>());
        }

        // We discover from the last number in the list forward.
        // So discovering from the last index forward makes them ordered.
        for (int i = numbers.Length - 1; -1 < i; i--)
        {
            int number = numbers[i];
            // Descending here so we don't touch solutionOf[j-1] until after we have solutionOf[j] updated.
            for (int j = subsetSize; 0 < j; j--)
            {
                // All previously found sums with j entries
                Dictionary<int, SolutionNode<int>> after = solutionOf[j];
                // All previously found sums with j-1 entries
                Dictionary<int, SolutionNode<int>> before = solutionOf[j-1];
                foreach (KeyValuePair<int, SolutionNode<int>> pair in before)
                {
                    SolutionNode<int> newSolution = new SolutionNode<int>();
                    int newSum = pair.Key + number;
                    newSolution.Index = i;
                    newSolution.Value = number;
                    newSolution.Count = pair.Value.Count;
                    newSolution.Tail = pair.Value;
                    if (after.ContainsKey(newSum))
                    {
                        newSolution.Next = after[newSum];
                        newSolution.Count = pair.Value.Count + after[newSum].Count;
                    }
                    after[newSum] = newSolution;
                }

                // And special case empty set.
                if (1 == j)
                {
                    SolutionNode<int> newSolution = new SolutionNode<int>();
                    newSolution.Index = i;
                    newSolution.Value = number;
                    newSolution.Count = 1;
                    if (after.ContainsKey(number))
                    {
                        newSolution.Next = after[number];
                        newSolution.Count = after[number].Count;
                    }
                    after[number] = newSolution;
                }
            }
        }

        // Return what we found.
        try
        {
            return solutionOf[subsetSize][target];
        }
        catch
        {
            throw new Exception("No solutions found");
        }
    }

    // The function we were asked for.
    public static IEnumerable<List<int>> ListSolutions (SolutionNode<int> node)
    {
        List<int> solution = new List<int>();
        List<SolutionNode<int>> solutionPath = new List<SolutionNode<int>>();

        // Initialize with starting information.
        solution.Add(0); // This will be removed when we get node
        solutionPath.Add(node); // This will be our start.

        while (0 < solutionPath.Count)
        {
            // Erase the tail of our previous solution
            solution.RemoveAt(solution.Count - 1);
            // Pick up our next.
            SolutionNode<int> current = solutionPath[solutionPath.Count - 1];
            solutionPath.RemoveAt(solutionPath.Count - 1);
            while (current != null)
            {
                solution.Add(current.Index);
                solutionPath.Add(current.Next);
                if (current.Tail == null)
                {
                    yield return solution;
                }
                current = current.Tail;
            }
        }
    }

    // And for fun, a function that dynamic programming makes easy - return any one of them!
    public static List<int> FindSolution(SolutionNode<int> node, int position)
    {
        // Switch to counting from the end.
        position = node.Count - position - 1;
        List<int> solution = new List<int>();
        while (node != null)
        {
            while (node.Next != null && position < node.Next.Count)
            {
                node = node.Next;
            }
            solution.Add(node.Index);
            node = node.Tail;
        }
        return solution;
    }

    public static void Main(string[] args)
    {
        SolutionNode<int> solution = FindSolution(
            new[]{
                7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6}
            , 400, 4);
        IEnumerable<List<int>> listing = ListSolutions(solution);
        foreach (List<int> sum in listing)
        {
            Console.WriteLine ("solution {0}", string.Join(", ", sum.ToArray()));
        }
    }
}

Incidentally this is my first time trying to write C#. It was...painfully verbose.

like image 42
btilly Avatar answered Oct 22 '22 03:10

btilly