How to concatenate gathered data using mpi4py library in python

Question

I used to list append of data employing mpi4py and try to save the data sequentially at the source(root==0) node.

As suggested by Alan22, I've modified the code and it works, but the script does not concatenate properly, so I get the output file as shown in attached figure:01.

Can anybody help how to fix the error message? In addition, whatever I've written in python script [shown below], isn't the best way to solve the problem. saved output array

Is there any way to solve this type of problem efficiently? Any help is highly appreciated.

The python script is given as follows:

import numpy as np
from scipy import signal
from mpi4py import MPI  
import random
import cmath, math
import matplotlib.pyplot as plt
import time

#File storing path
save_results_to = 'File storing path'

count_day = 1
count_hour = 1

arr_x = [0, 8.49, 0.0, -8.49, -12.0, -8.49, -0.0, 8.49, 12.0]
arr_y = [0, 8.49, 12.0, 8.49, 0.0, -8.49, -12.0, -8.49, -0.0]
M = len(arr_x)
N = len(arr_y)

np.random.seed(12345)
total_rows = 50000
raw_data=np.reshape(np.random.rand(total_rows*N),(total_rows,N))

# Function of CSD:: Using For Loop
fs = 500;       # Sampling frequency
def csdMat(data):
    dat, cols = data.shape   # For 2D data
    total_csd = []
    for i in range(cols):
        col_csd =[]
        for j in range( cols):
            freq, Pxy = signal.csd(data[:,i], data[:, j], fs=fs, window='hann', nperseg=100, noverlap=70, nfft=5000) 
            col_csd.append(Pxy)  
        total_csd.append(col_csd)
        pxy = np.array(total_csd)
    return freq, pxy

# Finding cross spectral density (CSD)
t0 = time.time()
freq, csd = csdMat(raw_data)
print('The shape of the csd data', csd.shape)
print('Time required {} seconds to execute CSD--For loop'.format(time.time()-t0))

kf=1*2*np.pi/10
resolution = 50 # This is important:: the HIGHER the Resolution, the higher the execution time!!!
grid_size = N * resolution
kx = np.linspace(-kf, kf, )  # space vector
ky = np.linspace(-kf, kf, grid_size)  # space vector

def DFT2D(data):
    P=len(kx)
    Q=len(ky)
    dft2d = np.zeros((P,Q), dtype=complex)
    for k in range(P):
        for l in range(Q):
            sum_log = []
            mat2d = np.zeros((M,N))
            sum_matrix = 0.0
            for m in range(M):
                for n in range(N):
                    e = cmath.exp(-1j*((((dx[m]-dx[n])*kx[l])/1) + (((dy[m]-dy[n])*ky[k])/1)))
                    sum_matrix += data[m, n] * e
            dft2d[k,l] = sum_matrix
    return dft2d

dx = arr_x[:]; dy = arr_y[:]


comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

data = []
start_freq = 100
end_freq   = 109
freq_range = np.arange(start_freq,end_freq)
no_of_freq = len(freq_range)

for fr_count in range(start_freq, end_freq):
    if fr_count % size == rank:
        dft = np.zeros((grid_size, grid_size))
        spec_csd = csd[:,:, fr_count]
        dft = DFT2D(spec_csd)  # Call the DFT2D function
        spec = np.array(np.real(dft))  # Spectrum or 2D_DFT of data[real part]
        print('Shape of spec', spec.shape)
        data.append(spec)
        #data = np.append(data,spec)
        np.seterr(invalid='ignore')
data = comm.gather(data, root =0)
#    comm.Allreduce(MPI.IN_PLACE,data,op=MPI.MAX)
print("Rank: ", rank, ". Spectrum shape is:
", spec.shape)


if rank == 0:
    output_data = np.concatenate(data, axis = 0)
    #output_data = np.c_(data, axis = 0)
    dft_tot = np.array((output_data), dtype='object')
    res = np.zeros((grid_size, grid_size))
    for k in range(size):
        for i in range(no_of_freq):

            jj = np.around(freq[freq_range[i]], decimals = 2)

            #print('The shape of data after indexing', data1.shape)
            #data_final=data1.reshape(data1.shape[0]*data1.shape[1], data1.shape[2])
            res[i * size + k] = dft_tot[k][i] #np.array(data[k])
            data = np.array(res)
            #print('The shape of the dft at root node', data.shape)
            np.savetxt(save_results_to + f'Day_{count_day}_hour_{count_hour}_f_{jj}_hz.txt', data.view(float))

I use the following bash script command to run the script ( i.e., my_file.sh). I submit the job with command sbatch my_file.sh

#! /bin/bash -l
#SBATCH -J testmvapich2
#SBATCH -N 1 ## Maximum 04 nodes
#SBATCH --ntasks=10
#SBATCH --cpus-per-task=1        # cpu-cores per task
#SBATCH --mem-per-cpu=3000MB
#SBATCH --time=00:20:00
#SBATCH -p para
#SBATCH --output="stdout.txt"
#SBATCH --error="stderr.txt"
#SBATCH -A camk
##SBATCH --mail-type=ALL
##SBATCH --chdir=/work/cluster_computer/my_name/data_work/MMC331/


eval "$(conda shell.bash hook)"
conda activate myenv
#conda activate fast-mpi4py

cd $SLURM_SUBMIT_DIR

#module purge
#module add mpi/mvapich2-2.2-x86_64

mpirun python3 mpi_test.py

Alan22 · Accepted Answer

You can try with this after "data = comm.gather(data, root=0)"

if rank == 0:
    print('Type of data:', type(data))
    dft_tot = np.array((data))#, dtype='object')
    print('shape of DATA array:', dft_tot.shape)
    #print('Type of dft array:', type(dft_tot))
    res = np.zeros((450,450))
    for k in range(size):
#            for i in range(len(data[rank])):
        for i in range(no_of_freq):

            jj = np.around(freq[freq_range[k]], decimals = 2)
            #data1 = np.array(dft_tot[k])
            res[i * size + k] = data[k]
            data = np.array(res)#.reshape(data1.shape[0]*data1.shape[1], data1.shape[2])
            print('The shape of the dft at root node', data.shape)
            np.savetxt(save_results_to + f'Day_{count_day}_hour_{co

Here is the link. Hope it helps mpi4py on HPC: comm.gather

How to concatenate gathered data using mpi4py library in python

Tags:

python-3.x

parallel-processing

cluster-computing

mpi

mpi4py

CEB

1 Answers

Alan22

Recent Activity

Donate For Us

How to concatenate gathered data using mpi4py library in python

Tags:

python-3.x

parallel-processing

cluster-computing

mpi

mpi4py

CEB

1 Answers

Alan22

Related questions

Recent Activity

Donate For Us