I have a list, which is made up of the following elements,
list1 = [a1,a2,a3]
Where each element of this list can itself be a variable size list, eg,
a1 = [x1,y1,z1], a2 = [w2,x2,y2,z2], a3 = [p3,r3,t3,n3]
It's straight forward for me to set up a generator that loops through list1, and yields the constituents of each element;
array = []
for i in list1:
for j in i:
array.append[j]
yield array
However, is there a way of doing this so I can specify the size of array?
eg - batch size of two;
1st yield : [x1,y1]
2nd yield : [z1,w1]
3rd yield : [x2,y2]
4th yield : [z2,p3]
5th yield : [r3,t3]
6th yield : [n3]
7th yield : repeat 1st
or batch size of 4;
1st yield : [x1,y1,z1,w1]
2nd yield : [x2,y2,z2,p3]
3rd yield : [r3,t3,n3]
4th yield : repeat first
It seems non-trivial to carry this out for different sized lists each containing other different sized lists inside.
This is pretty easy, actually, use itertools
:
>>> a1 = ['x1','y1','z1']; a2 = ['w2','x2','y2','z2']; a3 = ['p3','r3','t3','n3']
>>> list1 = [a1,a2,a3]
>>> from itertools import chain, islice
>>> flatten = chain.from_iterable
>>> def slicer(seq, n):
... it = iter(seq)
... return lambda: list(islice(it,n))
...
>>> def my_gen(seq_seq, batchsize):
... for batch in iter(slicer(flatten(seq_seq), batchsize), []):
... yield batch
...
>>> list(my_gen(list1, 2))
[['x1', 'y1'], ['z1', 'w2'], ['x2', 'y2'], ['z2', 'p3'], ['r3', 't3'], ['n3']]
>>> list(my_gen(list1, 4))
[['x1', 'y1', 'z1', 'w2'], ['x2', 'y2', 'z2', 'p3'], ['r3', 't3', 'n3']]
Note, we can use yield from
in Python 3.3+:
>>> def my_gen(seq_seq, batchsize):
... yield from iter(slicer(flatten(seq_seq), batchsize), [])
...
>>> list(my_gen(list1,2))
[['x1', 'y1'], ['z1', 'w2'], ['x2', 'y2'], ['z2', 'p3'], ['r3', 't3'], ['n3']]
>>> list(my_gen(list1,3))
[['x1', 'y1', 'z1'], ['w2', 'x2', 'y2'], ['z2', 'p3', 'r3'], ['t3', 'n3']]
>>> list(my_gen(list1,4))
[['x1', 'y1', 'z1', 'w2'], ['x2', 'y2', 'z2', 'p3'], ['r3', 't3', 'n3']]
>>>
You could use itertools
here, in your case I would use chain
and islice
import itertools
a1 = ['x1','y1','z1']
a2 = ['w2','x2','y2','z2']
a3 = ['p3','r3','t3','n3']
list1 = [a1,a2,a3]
def flatten_and_batch(lst, size):
it = itertools.chain.from_iterable(lst)
while True:
res = list(itertools.islice(it, size))
if not res:
break
else:
yield res
list(flatten_and_batch(list1, 2))
# [['x1', 'y1'], ['z1', 'w2'], ['x2', 'y2'], ['z2', 'p3'], ['r3', 't3'], ['n3']]
list(flatten_and_batch(list1, 3))
# [['x1', 'y1', 'z1'], ['w2', 'x2', 'y2'], ['z2', 'p3', 'r3'], ['t3', 'n3']]
If you don't mind an additional dependency you could also use iteration_utilities.grouper
(although it returns tuples not lists) 1 here:
from iteration_utilities import flatten, grouper, Iterable
>>> list(grouper(flatten(list1), 2))
[('x1', 'y1'), ('z1', 'w2'), ('x2', 'y2'), ('z2', 'p3'), ('r3', 't3'), ('n3',)]
>>> list(grouper(flatten(list1), 3))
[('x1', 'y1', 'z1'), ('w2', 'x2', 'y2'), ('z2', 'p3', 'r3'), ('t3', 'n3')]
or the iteration_utilities.Iterable
:
>>> Iterable(list1).flatten().grouper(3).as_list()
[('x1', 'y1', 'z1'), ('w2', 'x2', 'y2'), ('z2', 'p3', 'r3'), ('t3', 'n3')]
>>> Iterable(list1).flatten().grouper(4).map(list).as_list()
[['x1', 'y1', 'z1', 'w2'], ['x2', 'y2', 'z2', 'p3'], ['r3', 't3', 'n3']]
1 Disclaimer: I'm the author of that library.
from itertools import chain, islice
flatten = chain.from_iterable
from iteration_utilities import flatten, grouper, Iterable
def slicer(seq, n):
it = iter(seq)
return lambda: list(islice(it,n))
def my_gen(seq_seq, batchsize):
for batch in iter(slicer(flatten(seq_seq), batchsize), []):
yield batch
def flatten_and_batch(lst, size):
it = flatten(lst)
while True:
res = list(islice(it, size))
if not res:
break
else:
yield res
def iteration_utilities_approach(seq, size):
return grouper(flatten(seq), size)
def partition(lst, c):
all_elem = list(chain.from_iterable(lst))
for k in range(0, len(all_elem), c):
yield all_elem[k:k+c]
def juanpa(seq, size):
return list(my_gen(seq, size))
def mseifert1(seq, size):
return list(flatten_and_batch(seq, size))
def mseifert2(seq, size):
return list(iteration_utilities_approach(seq, size))
def JoelCornett(seq, size):
return list(partition(seq, size))
# Timing setup
timings = {juanpa: [],
mseifert1: [],
mseifert2: [],
JoelCornett: []}
sizes = [2**i for i in range(1, 18, 2)]
# Timing
for size in sizes:
print(size)
func_input = [['x1','y1','z1']]*size
for func in timings:
print(str(func))
res = %timeit -o func(func_input, 3)
timings[func].append(res)
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(1)
ax = plt.subplot(111)
for func in timings:
ax.plot(sizes,
[time.best for time in timings[func]],
label=str(func.__name__))
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('size')
ax.set_ylabel('time [seconds]')
ax.grid(which='both')
ax.legend()
plt.tight_layout()
It is relatively trivial if you break the task into two steps:
Here is an example implementation:
from itertools import chain
def break_into_batches(items, batch_size):
flattened = list(chain(*items))
for i in range(0, len(flattened), batch_size):
yield flattened[i:i+batch_size]
Given the following objectives applied to a list
size
cycles
more_itertools
can achieve these objectives as follows:
import more_itertools as mit
def batch(iterable, size=2, cycles=1):
"""Yield resized batches of an iterable."""
iterable = mit.ncycles(iterable, cycles)
return mit.chunked(mit.flatten(iterable), size)
list(batch(list1, 3))
# [["x1", "y1", "z1"], ["w2", "x2", "y2"], ["z2", "p3", "r3"], ["t3", "n3"]]
list(batch(list1, size=3, cycles=2))
# [["x1", "y1", "z1"], ["w2", "x2", "y2"], ["z2", "p3", "r3"],
# ["t3", "n3", "x1"], ["y1", "z1", "w2"], ["x2", "y2", "z2"],
# ["p3", "r3", "t3"], ["n3"]]
See docs for details on each tool ncycles
, flatten
and chucked
.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With