selcal/scripts/selcal-fft.py

133 lines
4.0 KiB
Python
Executable File

#! /usr/bin/env python3
import numpy as np
from scipy.io import wavfile
from scipy.fft import fft
from filters import anti_alias
from tones import TONES
import matplotlib.pyplot as plt
import sys
file_name = sys.argv[1]
def decibels(f):
return 10 * np.log10(f)
def find_top_two_keys(d, threshold):
sorted_items = sorted(d.items(), key=lambda item: item[1], reverse=True)
if len(sorted_items) < 3:
return None
top_two = sorted_items[:2]
third_value = sorted_items[2][1]
if top_two[0][1] - third_value < threshold or top_two[1][1] - third_value < threshold:
return None
return top_two[0][0], top_two[1][0]
# Step 1: Read the WAV file
sample_rate, data = wavfile.read(file_name)
# Handle stereo audio by converting to mono if needed
if len(data.shape) == 2:
data = data.mean(axis=1)
max_freq = 1600.0
data, sample_rate = anti_alias(data, sample_rate, max_freq)
fft_size = 1024 # Must be larger than max_freq TODO JMT: fix this, zero-pad
frequency_resolution = sample_rate / fft_size
high_bin = int(max_freq / frequency_resolution)
segment_interval = 0.2 # seconds
samples_per_interval = int(sample_rate * segment_interval)
num_segments = len(data) // samples_per_interval
fft_results = np.zeros((num_segments, high_bin))
for i in range(num_segments):
# Segment window is current position to fft_size samples in the past. As such some segments
# will have overlap in which samples are used when fft_size > samples_per_interval
end = (i + 1) * samples_per_interval
start = end - fft_size
try:
segment = data[start:end]
fft_result = fft(segment)
magnitudes = np.abs(fft_result)
total_energy = np.sum(magnitudes ** 2)
normalized_magnitudes = magnitudes / np.sqrt(total_energy)
# Store the normalised magnitude spectrum only for the desired frequency range
fft_results[i, :] = normalized_magnitudes[:high_bin]
tone_width = 6.25 # Hz
def band(centre_frequency, width=tone_width):
return np.array([centre_frequency - width, centre_frequency + width])
def magnitude_in_band(band):
return np.sum(
normalized_magnitudes[int(band[0]/frequency_resolution):int(band[1]/frequency_resolution)]
)
scores = {
tone:decibels(magnitude_in_band(band(frequency)))
for tone,frequency in TONES.items()
}
active_tones = find_top_two_keys(scores, 3.0)
if active_tones:
print(active_tones)
except:
pass
# Only import if we're actually plotting, these imports are pretty heavy.
import pyqtgraph as pg
from pyqtgraph.Qt import QtGui, QtWidgets, QtCore, mkQApp
app = pg.mkQApp("ImageView Example")
## Create window with ImageView widget
win = QtWidgets.QMainWindow()
win.resize(1280, 720)
imv = pg.GraphicsLayoutWidget(show=True)
win.setCentralWidget(imv)
correlogram = pg.ImageItem()
correlogram.setImage(fft_results)
img_rect = QtCore.QRectF(0, 0, len(data) // sample_rate, max_freq)
correlogram.setRect(img_rect)
plotItem = imv.addPlot() # add PlotItem to the main GraphicsLayoutWidget
#plotItem.setDefaultPadding(0.0) # plot without padding data range
plotItem.addItem(correlogram) # display correlogram
#plotItem.showAxes(True, showValues=(True, True, False, False), size=20)
freq_pen = pg.mkPen(color=(20, 20, 20), width=1, style=QtCore.Qt.DashLine)
for freq in TONES.values():
horizontal_line = pg.InfiniteLine(pos=freq, angle=0, pen=freq_pen)
plotItem.addItem(horizontal_line)
yticks = [(frequency, tone) for tone,frequency in TONES.items()]
plotItem.getAxis('left').setTicks([yticks])
colorMap = pg.colormap.get("CMRmap", source='matplotlib') # choose perceptually uniform, diverging color map
# generate an adjustabled color bar, initially spanning -1 to 1:
bar = pg.ColorBarItem( values=(0,1), colorMap=colorMap)
# link color bar and color map to correlogram, and show it in plotItem:
bar.setImageItem(correlogram, insert_in=plotItem)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')
pg.exec()