I have a 3D numpy array looks like this
shape(3,1000,100)
[[[2,3,0,2,6,...,0,-1,-1,-1,-1,-1],
[1,4,6,1,4,5,3,...,1,2,6,-1,-1],
[7,4,6,3,1,0,1,...,2,0,8,-1,-1],
...
[8,7,6,4,...,2,4,5,2,1,-1]],
...,
[1,5,6,7,...,0,0,0,0,1]]]
Each lane of array end with 0 or multiple(less than 70 I'm sure) -1.
For now, I want to select only 30 values before the -1 for each lane, to make a subset of original numpy array with shape of (3,1000,30)
Should be similar like this,
[[[...,0],
[...,1,2,6],
[...,2,0,8],
...
[...,2,4,5,2,1]],
...,
[...,0,0,0,0,1]]]
Is it possible to do it with some numpy functions? Hope without a for loop:)
Here's one making use of broadcasting
and advanced-indexing
-
def preceedingN(a, N):
# mask of value (minus 1 here) to be found
mask = a==-1
# Get the first index with the value along the last axis.
# In case its not found, choose the last index
idx = np.where(mask.any(-1), mask.argmax(-1), mask.shape[-1])
# Get N ranged indices along the last axis
ind = idx[...,None] + np.arange(-N,0)
# Finally advanced-index and get the ranged indexed elements as the o/p
m,n,r = a.shape
return a[np.arange(m)[:,None,None], np.arange(n)[:,None], ind]
Sample run -
Setup for reproducible input :
import numpy as np
# Setup sample input array
np.random.seed(0)
m,n,r = 2,4,10
a = np.random.randint(11,99,(m,n,r))
# Select N elements off each row
N = 3
idx = np.random.randint(N,a.shape[-1]-1,(m,n))
a[idx[...,None] < np.arange(a.shape[-1])] = -1
a[0,0] = range(r) # set first row of first 2D slice to range (no -1s there)
Input, output :
>>> a
array([[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
[81, 23, 69, 76, 50, 98, 57, -1, -1, -1],
[88, 83, 20, 31, 91, 80, 90, 58, -1, -1],
[60, 40, 30, 30, 25, 50, 43, 76, -1, -1]],
[[43, 42, 85, 34, 46, 86, 66, 39, -1, -1],
[11, 47, 64, 16, -1, -1, -1, -1, -1, -1],
[42, 12, 76, 52, 68, 46, 22, 57, -1, -1],
[25, 64, 23, 53, 95, 86, 79, -1, -1, -1]]])
>>> preceedingN(a, N=3)
array([[[ 7, 8, 9],
[50, 98, 57],
[80, 90, 58],
[50, 43, 76]],
[[86, 66, 39],
[47, 64, 16],
[46, 22, 57],
[95, 86, 79]]])
This is a solution based on the idea of calculating the indices which should be kept. We use numpy.argmin
and numpy.nonzero
to find the start of the -1
or the end of the row, then use 2-dimensional addition/subtraction to build the indices of the 30 elements that need to be kept.
First off, we create reproducible example data
import numpy as np
np.random.seed(0)
a = np.random.randint(low=0, high=10, size=(3, 1000, 100))
for i in range(3):
for j in range(1000):
a[i, j, np.random.randint(low=30, high=a.shape[2]+1):] = -1
You can of course skip this step, I just added it to allow others to reproduce this solution. :)
Now let's go through the code step-by-step:
Change shape of a to simplify code
a.shape = 3 * 1000, 100
Find index of -1 in each row of a
i = np.argmin(a, axis=1)
Replace any indices for rows of a with no -1 by a 100
i[np.nonzero(a[np.arange(a.shape[0]), i] != -1)] = a.shape[1]
Translate indices to 1D
i = i + a.shape[1] * np.arange(a.shape[0])
Calculate indices for all elements that should be kept. We make the indices two-dimensional so that we get the 30 indices before each -1 index.
i = i.reshape(a.shape[0], 1) - 30 + np.arange(30).reshape(1, 30)
a.shape = 3 * 1000 * 100
Perform filtering
a = a[i]
Return a to desired shape
a.shape = 3, 1000, 30
Here's one using stride_tricks
for efficient retrieval of slices:
import numpy as np
from numpy.lib.stride_tricks import as_strided
# create mock data
data = np.random.randint(0, 9, (3, 1000, 100))
fstmone = np.random.randint(30, 101, (3, 1000))
data[np.arange(100) >= fstmone[..., None]] = -1
# count -1s (ok, this is a bit wasteful compared to @Divakar's)
aux = np.where(data[..., 30:] == -1, 1, -100)
nmones = np.maximum(np.max(np.cumsum(aux[..., ::-1], axis=-1), axis=-1), 0)
# slice (but this I'd expect to be faster)
sliceable = as_strided(data, data.shape[:2] + (71, 30),
data.strides + data.strides[2:])
result = sliceable[np.arange(3)[:, None], np.arange(1000)[None, :], 70-nmones, :]
Benchmarks, best solution is a hybrid of @Divakar's and mine, @Florian Rhiem's is also quite fast:
import numpy as np
from numpy.lib.stride_tricks import as_strided
# create mock data
data = np.random.randint(0, 9, (3, 1000, 100))
fstmone = np.random.randint(30, 101, (3, 1000))
data[np.arange(100) >= fstmone[..., None]] = -1
def pp(data, N):
# count -1s
aux = np.where(data[..., N:] == -1, 1, -data.shape[-1])
nmones = np.maximum(np.max(np.cumsum(aux[..., ::-1], axis=-1), axis=-1), 0)
# slice
sliceable = as_strided(data, data.shape[:2] + (data.shape[-1]-N+1, N),
data.strides + data.strides[2:])
return sliceable[np.arange(data.shape[0])[:, None],
np.arange(data.shape[1])[None, :],
data.shape[-1]-N-nmones, :]
def Divakar(data, N):
# mask of value (minus 1 here) to be found
mask = data==-1
# Get the first index with the value along the last axis.
# In case its not found, choose the last index
idx = np.where(mask.any(-1), mask.argmax(-1), mask.shape[-1])
# Get N ranged indices along the last axis
ind = idx[...,None] + np.arange(-N,0)
# Finally advanced-index and get the ranged indexed elements as the o/p
m,n,r = data.shape
return data[np.arange(m)[:,None,None], np.arange(n)[:,None], ind]
def combined(data, N):
# mix best of Divakar's and mine
mask = data==-1
idx = np.where(mask.any(-1), mask.argmax(-1), mask.shape[-1])
sliceable = as_strided(data, data.shape[:2] + (data.shape[-1]-N+1, N),
data.strides + data.strides[2:])
return sliceable[np.arange(data.shape[0])[:, None],
np.arange(data.shape[1])[None, :],
idx-N, :]
def fr(data, N):
data = data.reshape(-1, data.shape[-1])
i = np.argmin(data, axis=1)
i[np.nonzero(data[np.arange(data.shape[0]), i] != -1)] = data.shape[1]
i = i + data.shape[1] * np.arange(data.shape[0])
i = i.reshape(data.shape[0], 1) - N + np.arange(N).reshape(1, N)
data.shape = -1,
res = data[i]
res.shape = 3, 1000, 30
return res
print(np.all(combined(data, 30) == Divakar(data, 30)))
print(np.all(combined(data, 30) == pp(data, 30)))
print(np.all(combined(data, 30) == fr(data, 30)))
from timeit import timeit
for func in pp, Divakar, combined, fr:
print(timeit('f(data, 30)', number=100, globals={'f':func, 'data':data}))
Sample output:
True
True
True
0.2767702739802189
0.13680238201050088
0.060565065999981016
0.0795100320247002
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With