I have a list of integers. Numbers can be repeated. I would like "sort" them in that way to get as many "jumps" (difference from the very next element to the current one is positive) as possible.
Examples:
[10, 10, 10, 20, 20, 20] # only one "jump" from 10 to 20
[10, 20, 10, 20, 10, 20] # three jumps (10->20, 10->20, 10->20) - the correct answer
[20, 10, 20, 10, 20, 10] # two jumps
[11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11] # 9
[9, 11, 2, 19, 4, 11, 15, 5, 7, 11, 16, 19, 1, 4, 8, 11, 16, 19, 9, 17] # 14
[2, 9, 11, 16, 17, 19, 4, 5, 8, 15, 16, 9, 11, 1, 7, 11, 19, 4, 11, 19] # 15
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11] # 16
My totally inefficient (but working) code.:
def sol1(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
Does anyone know a way to optimize the solution? For now I'm iterating the list many times. It doesn't need to be in Python.
I think this should be equivalent and only take O(n log n) time for sorting and O(n) time for the rest.
from collections import Counter, OrderedDict
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
print(ans)
Another, inspired by trincot:
from collections import defaultdict
from itertools import count
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
print(arr)
Benchmarks along with other solutions, on your own suggested input and on two of mine (for each input, each solution's best three times from multiple attempts are shown):
[randint(1, 10**5) for i in range(10**4)]
2.14 ms 2.15 ms 2.18 ms Kelly4c
2.19 ms 2.24 ms 2.32 ms Kelly4b
2.23 ms 2.25 ms 2.37 ms Kelly4
5.83 ms 6.02 ms 6.03 ms original
7.05 ms 7.12 ms 7.54 ms Kelly1
7.82 ms 8.43 ms 8.45 ms Kelly3b
8.13 ms 8.15 ms 8.92 ms Kelly3
9.06 ms 9.44 ms 9.50 ms db0
10.25 ms 10.28 ms 10.31 ms db
11.09 ms 11.11 ms 11.23 ms trincot
11.19 ms 11.25 ms 11.58 ms Kelly2
11.29 ms 11.65 ms 11.74 ms db1
11.64 ms 11.65 ms 12.49 ms Kelly3c
list(range(n := 1000)) + [n] * n
0.57 ms 0.60 ms 0.63 ms Kelly2
0.64 ms 0.65 ms 0.68 ms Kelly3
0.66 ms 0.69 ms 0.69 ms trincot
0.69 ms 0.71 ms 0.71 ms db
0.69 ms 0.70 ms 0.70 ms db1
0.72 ms 0.74 ms 0.75 ms Kelly3b
0.99 ms 1.04 ms 1.11 ms Kelly3c
1.04 ms 1.08 ms 1.09 ms Kelly1
28.27 ms 28.56 ms 28.63 ms Kelly4b
36.58 ms 36.81 ms 37.03 ms Kelly4c
39.78 ms 40.07 ms 40.37 ms Kelly4
80.41 ms 80.96 ms 81.99 ms original
81.00 ms 81.90 ms 82.08 ms db0
list(range(n := 10000)) + [n] * n
7.11 ms 7.37 ms 7.42 ms Kelly2
7.30 ms 7.62 ms 7.63 ms db
7.31 ms 7.31 ms 7.37 ms Kelly3
7.52 ms 7.64 ms 7.80 ms trincot
7.64 ms 7.82 ms 7.94 ms db1
8.81 ms 8.83 ms 8.84 ms Kelly3b
10.18 ms 10.41 ms 10.52 ms Kelly1
10.85 ms 10.92 ms 11.16 ms Kelly3c
Benchmark code (Try it online!):
from timeit import timeit
from collections import Counter, OrderedDict, defaultdict, deque
from itertools import count, chain, repeat
from random import randint, shuffle
from bisect import insort
def Kelly1(arr):
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
return ans
def Kelly2(arr):
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
return arr
def Kelly3(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
for rnd in rounds[:count]:
rnd.append(x)
return list(chain.from_iterable(rounds))
def Kelly3b(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
appends = [rnd.append for rnd in rounds]
for x, count in sorted(ctr.items()):
for append in appends[:count]:
append(x)
return list(chain.from_iterable(rounds))
def Kelly3c(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
deque(map(list.append, rounds[:count], repeat(x)), 0)
return list(chain.from_iterable(rounds))
def Kelly4(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
for x in arr:
if last != x:
out(x)
else:
postpone(x)
last = x
arr = postpone.__self__
return out.__self__
def Kelly4b(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
arr = [x
for x in arr
if last == x
or out(last := x)]
return out.__self__
def Kelly4c(arr):
arr.sort()
out = []
while arr:
postpone = [].append
last = None
out += [last := x
for x in arr
if last != x
or postpone(x)]
arr = postpone.__self__
return out
def original(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
def db(arr):
cumcount = []
d = dict.fromkeys(arr, 0)
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def db0(arr):
d = Counter(arr)
keys = sorted(d.keys())
ans = []
while len(ans) < len(arr):
for k in keys:
if d.get(k, 0) > 0:
ans.append(k)
d[k] -= 1
return ans
def db1(arr):
cumcount = []
d = {k: 0 for k in set(arr)}
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def trincot(lst):
return [num for _,num in sorted(
(i, num)
for num, freq in Counter(lst).items()
for i in range(freq)
)]
funcs = [Kelly1, Kelly2, Kelly3, Kelly3b, Kelly3c, Kelly4, Kelly4b, Kelly4c, original, db, db0, db1, trincot]
def test(arr, funcs):
print(arr)
arr = eval(arr)
expect = original(arr[:])
for func in funcs:
result = func(arr[:])
if result != expect:
print(expect[:20])
print(result[:20])
assert result == expect, func
times = {func: [] for func in funcs}
for _ in range(20):
shuffle(funcs)
for func in funcs:
copy = arr[:]
t = timeit(lambda: func(copy), 'gc.enable()', number=1)
insort(times[func], t)
for func in sorted(funcs, key=times.get):
print(*('%5.2f ms ' % (t * 1e3) for t in times[func][:3]), func.__name__)
print()
test('[randint(1, 10**5) for i in range(10**4)]',
funcs)
test('list(range(n := 1000)) + [n] * n',
funcs)
test('list(range(n := 10000)) + [n] * n',
list(set(funcs) - {original, Kelly4, Kelly4b, Kelly4c, db0}))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With