#! /usr/bin/env python3 import numpy as np from scipy.io import wavfile from scipy.fft import fft from filters import anti_alias from tones import TONES import matplotlib.pyplot as plt import sys file_name = sys.argv[1] def decibels(f): return 10 * np.log10(f) def find_top_two_keys(d, threshold): sorted_items = sorted(d.items(), key=lambda item: item[1], reverse=True) if len(sorted_items) < 3: return None top_two = sorted_items[:2] third_value = sorted_items[2][1] if top_two[0][1] - third_value < threshold or top_two[1][1] - third_value < threshold: return None return top_two[0][0], top_two[1][0] # Step 1: Read the WAV file sample_rate, data = wavfile.read(file_name) # Handle stereo audio by converting to mono if needed if len(data.shape) == 2: data = data.mean(axis=1) max_freq = 1600.0 data, sample_rate = anti_alias(data, sample_rate, max_freq) fft_size = 1024 # Must be larger than max_freq TODO JMT: fix this, zero-pad frequency_resolution = sample_rate / fft_size high_bin = int(max_freq / frequency_resolution) segment_interval = 0.2 # seconds samples_per_interval = int(sample_rate * segment_interval) num_segments = len(data) // samples_per_interval fft_results = np.zeros((num_segments, high_bin)) for i in range(num_segments): # Segment window is current position to fft_size samples in the past. As such some segments # will have overlap in which samples are used when fft_size > samples_per_interval end = (i + 1) * samples_per_interval start = end - fft_size try: segment = data[start:end] fft_result = fft(segment) magnitudes = np.abs(fft_result) total_energy = np.sum(magnitudes ** 2) normalized_magnitudes = magnitudes / np.sqrt(total_energy) # Store the normalised magnitude spectrum only for the desired frequency range fft_results[i, :] = normalized_magnitudes[:high_bin] tone_width = 6.25 # Hz def band(centre_frequency, width=tone_width): return np.array([centre_frequency - width, centre_frequency + width]) def magnitude_in_band(band): return np.sum( normalized_magnitudes[int(band[0]/frequency_resolution):int(band[1]/frequency_resolution)] ) scores = { tone:decibels(magnitude_in_band(band(frequency))) for tone,frequency in TONES.items() } active_tones = find_top_two_keys(scores, 3.0) if active_tones: print(active_tones) except: pass # Only import if we're actually plotting, these imports are pretty heavy. import pyqtgraph as pg from pyqtgraph.Qt import QtGui, QtWidgets, QtCore, mkQApp app = pg.mkQApp("ImageView Example") ## Create window with ImageView widget win = QtWidgets.QMainWindow() win.resize(1280, 720) imv = pg.GraphicsLayoutWidget(show=True) win.setCentralWidget(imv) correlogram = pg.ImageItem() correlogram.setImage(fft_results) img_rect = QtCore.QRectF(0, 0, len(data) // sample_rate, max_freq) correlogram.setRect(img_rect) plotItem = imv.addPlot() # add PlotItem to the main GraphicsLayoutWidget #plotItem.setDefaultPadding(0.0) # plot without padding data range plotItem.addItem(correlogram) # display correlogram #plotItem.showAxes(True, showValues=(True, True, False, False), size=20) freq_pen = pg.mkPen(color=(20, 20, 20), width=1, style=QtCore.Qt.DashLine) for freq in TONES.values(): horizontal_line = pg.InfiniteLine(pos=freq, angle=0, pen=freq_pen) plotItem.addItem(horizontal_line) yticks = [(frequency, tone) for tone,frequency in TONES.items()] plotItem.getAxis('left').setTicks([yticks]) colorMap = pg.colormap.get("CMRmap", source='matplotlib') # choose perceptually uniform, diverging color map # generate an adjustabled color bar, initially spanning -1 to 1: bar = pg.ColorBarItem( values=(0,1), colorMap=colorMap) # link color bar and color map to correlogram, and show it in plotItem: bar.setImageItem(correlogram, insert_in=plotItem) win.show() win.setWindowTitle('pyqtgraph example: ImageView') pg.exec()