Sort elements to maximise amount of positive differences

Question

I have a list of integers. Numbers can be repeated. I would like "sort" them in that way to get as many "jumps" (difference from the very next element to the current one is positive) as possible.

Examples:

[10, 10, 10, 20, 20, 20]  # only one "jump" from 10 to 20
[10, 20, 10, 20, 10, 20]  # three jumps (10->20, 10->20, 10->20) - the correct answer
[20, 10, 20, 10, 20, 10]  # two jumps

[11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]  # 9
[9, 11, 2, 19, 4, 11, 15, 5, 7, 11, 16, 19, 1, 4, 8, 11, 16, 19, 9, 17]  # 14
[2, 9, 11, 16, 17, 19, 4, 5, 8, 15, 16, 9, 11, 1, 7, 11, 19, 4, 11, 19]  # 15
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]  # 16

My totally inefficient (but working) code.:

def sol1(my_list):
    my_list.sort()
    final_list = []
    to_delete = []
    i = 0
    last_element = float('-inf')
    while my_list:
        if i >= len(my_list):
            i = 0
            for index in to_delete[::-1]:
                my_list.pop(index)
            if len(my_list):
                last_element = my_list.pop(0)
                final_list.append(last_element)
            to_delete = []
            continue

        curr_element = my_list[i]
        if curr_element > last_element:
            final_list.append(curr_element)
            last_element = curr_element
            to_delete.append(i)
        i += 1
    return final_list

Does anyone know a way to optimize the solution? For now I'm iterating the list many times. It doesn't need to be in Python.

Kelly Bundy · Accepted Answer

I think this should be equivalent and only take O(n log n) time for sorting and O(n) time for the rest.

from collections import Counter, OrderedDict

arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = OrderedDict(Counter(sorted(arr)))

ans = []
while d:
    ans += d
    for x in list(d):
        d[x] -= 1
        if not d[x]:
            del d[x]
print(ans)

Another, inspired by trincot:

from collections import defaultdict
from itertools import count

arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]

d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))

print(arr)

Benchmarks along with other solutions, on your own suggested input and on two of mine (for each input, each solution's best three times from multiple attempts are shown):

[randint(1, 10**5) for i in range(10**4)]
 2.14 ms   2.15 ms   2.18 ms  Kelly4c
 2.19 ms   2.24 ms   2.32 ms  Kelly4b
 2.23 ms   2.25 ms   2.37 ms  Kelly4
 5.83 ms   6.02 ms   6.03 ms  original
 7.05 ms   7.12 ms   7.54 ms  Kelly1
 7.82 ms   8.43 ms   8.45 ms  Kelly3b
 8.13 ms   8.15 ms   8.92 ms  Kelly3
 9.06 ms   9.44 ms   9.50 ms  db0
10.25 ms  10.28 ms  10.31 ms  db
11.09 ms  11.11 ms  11.23 ms  trincot
11.19 ms  11.25 ms  11.58 ms  Kelly2
11.29 ms  11.65 ms  11.74 ms  db1
11.64 ms  11.65 ms  12.49 ms  Kelly3c

list(range(n := 1000)) + [n] * n
 0.57 ms   0.60 ms   0.63 ms  Kelly2
 0.64 ms   0.65 ms   0.68 ms  Kelly3
 0.66 ms   0.69 ms   0.69 ms  trincot
 0.69 ms   0.71 ms   0.71 ms  db
 0.69 ms   0.70 ms   0.70 ms  db1
 0.72 ms   0.74 ms   0.75 ms  Kelly3b
 0.99 ms   1.04 ms   1.11 ms  Kelly3c
 1.04 ms   1.08 ms   1.09 ms  Kelly1
28.27 ms  28.56 ms  28.63 ms  Kelly4b
36.58 ms  36.81 ms  37.03 ms  Kelly4c
39.78 ms  40.07 ms  40.37 ms  Kelly4
80.41 ms  80.96 ms  81.99 ms  original
81.00 ms  81.90 ms  82.08 ms  db0

list(range(n := 10000)) + [n] * n
 7.11 ms   7.37 ms   7.42 ms  Kelly2
 7.30 ms   7.62 ms   7.63 ms  db
 7.31 ms   7.31 ms   7.37 ms  Kelly3
 7.52 ms   7.64 ms   7.80 ms  trincot
 7.64 ms   7.82 ms   7.94 ms  db1
 8.81 ms   8.83 ms   8.84 ms  Kelly3b
10.18 ms  10.41 ms  10.52 ms  Kelly1
10.85 ms  10.92 ms  11.16 ms  Kelly3c

Benchmark code (Try it online!):

from timeit import timeit
from collections import Counter, OrderedDict, defaultdict, deque
from itertools import count, chain, repeat
from random import randint, shuffle
from bisect import insort

def Kelly1(arr):
  d = OrderedDict(Counter(sorted(arr)))
  ans = []
  while d:
    ans += d
    for x in list(d):
        d[x] -= 1
        if not d[x]:
            del d[x]
  return ans

def Kelly2(arr):
  d = defaultdict(count)
  arr.sort(key=lambda x: (next(d[x]), x))
  return arr

def Kelly3(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  for x, count in sorted(ctr.items()):
    for rnd in rounds[:count]:
      rnd.append(x)
  return list(chain.from_iterable(rounds))

def Kelly3b(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  appends = [rnd.append for rnd in rounds]
  for x, count in sorted(ctr.items()):
    for append in appends[:count]:
      append(x)
  return list(chain.from_iterable(rounds))

def Kelly3c(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  for x, count in sorted(ctr.items()):
    deque(map(list.append, rounds[:count], repeat(x)), 0)
  return list(chain.from_iterable(rounds))

def Kelly4(arr):
  arr.sort()
  out = [].append
  while arr:
    postpone = [].append
    last = None
    for x in arr:
      if last != x:
        out(x)
      else:
        postpone(x)
      last = x
    arr = postpone.__self__
  return out.__self__

def Kelly4b(arr):
  arr.sort()
  out = [].append
  while arr:
    postpone = [].append
    last = None
    arr = [x 
           for x in arr
           if last == x
           or out(last := x)]
  return out.__self__

def Kelly4c(arr):
  arr.sort()
  out = []
  while arr:
    postpone = [].append
    last = None
    out += [last := x 
            for x in arr
            if last != x
            or postpone(x)]
    arr = postpone.__self__
  return out

def original(my_list):
    my_list.sort()
    final_list = []
    to_delete = []
    i = 0
    last_element = float('-inf')
    while my_list:
        if i >= len(my_list):
            i = 0
            for index in to_delete[::-1]:
                my_list.pop(index)
            if len(my_list):
                last_element = my_list.pop(0)
                final_list.append(last_element)
            to_delete = []
            continue

        curr_element = my_list[i]
        if curr_element > last_element:
            final_list.append(curr_element)
            last_element = curr_element
            to_delete.append(i)
        i += 1
    return final_list

def db(arr):
  cumcount = []
  d = dict.fromkeys(arr, 0)
  for el in arr:
    cumcount.append(d[el])
    d[el] += 1
  return [x[1] for x in sorted(zip(cumcount, arr))]

def db0(arr):
  d = Counter(arr)
  keys = sorted(d.keys())
  ans = []
  while len(ans) < len(arr):
    for k in keys:
        if d.get(k, 0) > 0:
            ans.append(k)
            d[k] -= 1
  return ans

def db1(arr):
  cumcount = []
  d = {k: 0 for k in set(arr)}
  for el in arr:
    cumcount.append(d[el])
    d[el] += 1
  return [x[1] for x in sorted(zip(cumcount, arr))]

def trincot(lst):
  return [num for _,num in sorted(
    (i, num)
    for num, freq in Counter(lst).items()
        for i in range(freq)
  )]

funcs = [Kelly1, Kelly2, Kelly3, Kelly3b, Kelly3c, Kelly4, Kelly4b, Kelly4c, original, db, db0, db1, trincot]

def test(arr, funcs):
  print(arr)
  arr = eval(arr)

  expect = original(arr[:])
  for func in funcs:
    result = func(arr[:])
    if result != expect:
      print(expect[:20])
      print(result[:20])
    assert result == expect, func
  
  times = {func: [] for func in funcs}
  for _ in range(20):
    shuffle(funcs)
    for func in funcs:
      copy = arr[:]
      t = timeit(lambda: func(copy), 'gc.enable()', number=1)
      insort(times[func], t)
  for func in sorted(funcs, key=times.get):
    print(*('%5.2f ms ' % (t * 1e3) for t in times[func][:3]), func.__name__)
  print()

test('[randint(1, 10**5) for i in range(10**4)]',
     funcs)
test('list(range(n := 1000)) + [n] * n',
     funcs)
test('list(range(n := 10000)) + [n] * n',
     list(set(funcs) - {original, Kelly4, Kelly4b, Kelly4c, db0}))

Sort elements to maximise amount of positive differences

Tags:

python

algorithm

sorting

Piotr Wasilewicz

1 Answers

Kelly Bundy

Recent Activity

Donate For Us

Sort elements to maximise amount of positive differences

Tags:

python

algorithm

sorting

Piotr Wasilewicz

1 Answers

Kelly Bundy

Related questions

Recent Activity

Donate For Us