I am getting the peak frequency from wav files
My code for getting the peak frequency from a wav file is:
import wave
import struct
import numpy as np
import wave
import contextlib
if __name__ == '__main__':
fname = "test.wav"
frate = 0
data_size = 0
with contextlib.closing(wave.open(fname,'r')) as f:
frate = f.getframerate()
data_size = f.getnframes()
wav_file = wave.open(fname, 'r')
data = wav_file.readframes(data_size)
data_size = data_size * wav_file.getnchannels()
print wav_file.getparams()
wav_file.close()
data = struct.unpack('{n}h'.format(n=data_size), data)
data = np.array(data)
w = np.fft.fft(data)
freqs = np.fft.fftfreq(len(w))
print(freqs.min(), freqs.max())
# Find the peak in the coefficients
idx = np.argmax(np.abs(w))
freq = freqs[idx]
freq_in_hertz = abs(freq * frate)
print(freq_in_hertz)
I recorded a wav file with 48000 sample rate, 16 bitwidth, 2 channels. In that file I have a sine tone with 1000Hz. But the script outputting only 500Hz. I dont know where I went wrong. But for single channel and generated wav file with 48000 sample rate, 16 bitwidth, 2 channels it is working fine.
I generated the wav file using the following script
import math
import wave
import struct
if __name__ == '__main__':
# http://stackoverflow.com/questions/3637350/how-to-write-stereo-wav-files-in-python
# http://www.sonicspot.com/guide/wavefiles.html
freq = 1000
data_size = 454656 * 2
fname = "test.wav"
frate = 48000.0
amp = 64000.0
nchannels = 2
sampwidth = 2
framerate = int(frate)
nframes = data_size
comptype = "NONE"
compname = "not compressed"
data = [math.sin(2 * math.pi * freq * (x / frate))
for x in range(data_size)]
wav_file = wave.open(fname, 'w')
wav_file.setparams(
(nchannels, sampwidth, framerate, nframes, comptype, compname))
for v in data:
wav_file.writeframes(struct.pack('h', int(v * amp / 2)))
wav_file.close()
I dont know where I did wrong. I uploaded my wav files at script generated wav script_gen.wav with 48000 sample rate, 2 channels, 16 bit. Recorded wavs: 2 channel wav with 48000 sample rate, 2 channels, 16 bit 1 channel wav(Not allowing to post the link here, so will post in the comments) with 48000 sample rate, 1 channel, 16 bit.
I checked all these peak frequency in audacity, it showing 1000Khz only.
But when I tried with my scirpt I am getting correct output for 1 channel wav and failing for 2 channel wav.
update: I am getting the half of the peak frequency as the output for 2 channels.
I am feeling that I missed something. Can anyone help me in this?
Why so complicated? Consider the following
#!/usr/bin/env python3
import numpy as np
from numpy import fft
import scipy.io.wavfile as wf
import matplotlib.pyplot as plt
sr = 44100 # sample rate
len_sig = 2 # length of resulting signal in seconds
f = 1000 # frequency in Hz
# set you time axis
t = np.linspace(0, len_sig, sr*len_sig)
# set your signal
mono_data = np.sin(2*np.pi*t*f)
# write single channel .wav file
wf.write('mono.wav', sr, mono_data)
# write two-channel .wav file
stereo_data = np.vstack((mono_data, mono_data)).T
wf.write('stereo.wav', sr, stereo_data)
Now test it by loading and analyzing the data
# Load data
mono_sr, mono_data = wf.read('mono.wav')
stereo_sr, stereo_data = wf.read('stereo.wav')
# analyze the data
X_mono = fft.fft(mono_data) / len(mono_data) # remember to normalize your amplitudes
# Remember that half of energy of the signal is distributed over the
# positive frequencies and the other half over the negative frequencies.
#
# Commonly you want see a magnitude spectrum. That means, we ignore the phases. Hence, we
# simply multiply the spectrum by 2 and consider ONLY the first half of it.
freq_nq = len(X_mono) // 2
X_mono = abs(X_mono[:freq_nq]) * 2
freqs_mono = fft.fftfreq(len(mono_data), 1/mono_sr)[:freq_nq]
# in order the analyze a stereo signal you first have to add both channels
sum_stereo = stereo_data.sum(axis=1) / 2
# and now the same way as above
freq_nq = len(sum_stereo) // 2
X_stereo= abs(fft.fft(sum_stereo))[:freq_nq] / len(stereo_data) * 2
freqs_stereo = fft.fftfreq(len(stereo_data), 1/stereo_sr)[:freq_nq]
Peak picking:
freqs_mono[np.argmax(X_mono)] # == 1000.0
freqs_stereo[np.argmax(X_stereo)] # == 1000.0
Plot the result:
fig, (ax1, ax2) = plt.subplots(2, figsize=(10,5), sharex=True, sharey=True)
ax1.set_title('mono signal')
ax1.set_xlim([0, 2000])
ax1.plot(freqs_mono, X_mono, 'b', lw=2)
ax2.set_title('stereo signal')
ax2.plot(freqs_stereo, X_stereo, 'g', lw=2)
ax2.set_xlim([0, 2000])
plt.tight_layout()
plt.show()

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With