Reworked to be much neater, something is broken though

2024-05-27 00:13:42 +09:30 · 2024-05-27 00:13:42 +09:30 · eb2926f525
commit eb2926f525
parent e2be6823bd
3 changed files with 96 additions and 98 deletions
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@ -1 +1,2 @@
 *.wav
 __pycache__/
--- a/scripts/filters.py
+++ b/scripts/filters.py
@ -0,0 +1,42 @@
 from scipy.signal import butter, lfilter, decimate
 def anti_alias(data, sample_rate, max_frequency):
    nyquist_rate = 2 * max_frequency
    if sample_rate > nyquist_rate:
        filtered_data = lowpass_filter(data, max_frequency, sample_rate)
        downsample_factor = int(sample_rate / nyquist_rate)
        filtered_data = decimate(filtered_data, downsample_factor)
        sample_rate = sample_rate // downsample_factor
    else:
        filtered_data = data
    return filtered_data, sample_rate
 # These originally came from https://scipy.github.io/old-wiki/pages/Cookbook/ButterworthBandpass,
 # but they've been copied around the internet so many times that ChatGPT now produces them verbatim.
 def butter_bandpass(lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a
 def bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y
 def butter_lowpass(cutoff, fs, order=5):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a
 def lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y
--- a/scripts/selcal-fft.py
+++ b/scripts/selcal-fft.py
@ -4,57 +4,30 @@
 import numpy as np
 from scipy.io import wavfile
 from scipy.fft import fft
-from scipy.signal import butter, lfilter, decimate
+from filters import anti_alias
 from tones import TONES
 import matplotlib.pyplot as plt
 import sys
 file_name = sys.argv[1]
 frequencies = np.array([10 ** ((n + 22) * 0.0225 + 2) for n in range(32)])
 widths = np.array([6.25 * 10 ** (n * 0.0225) for n in range(32)])
 widths = np.array([6.25 for n in range(32)])
-def butter_bandpass(lowcut, highcut, fs, order=5):
+def decibels(f):
-    nyquist = 0.5 * fs
+    return 10 * np.log10(f)
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a
-def bandpass_filter(data, lowcut, highcut, fs, order=5):
+def find_top_two_keys(d, threshold):
-    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
+    sorted_items = sorted(d.items(), key=lambda item: item[1], reverse=True)
    y = lfilter(b, a, data)
    return y
-def butter_lowpass(cutoff, fs, order=5):
+    if len(sorted_items) < 3:
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a
 def lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y
 def get_largest_two_indices(numbers, threshold):
    # Check if the list has at least three numbers
    if len(numbers) < 3:
        return None
-    # Find the indices of the three largest numbers in the list
+    top_two = sorted_items[:2]
-    indices = sorted(range(len(numbers)), key=lambda i: numbers[i], reverse=True)
+    third_value = sorted_items[2][1]
-    largest_index = indices[0]
+    if top_two[0][1] - third_value < threshold or top_two[1][1] - third_value < threshold:
    second_largest_index = indices[1]
    third_largest_index = indices[2]
    # Check if the largest and second largest numbers are at least threshold larger than the third largest
    if numbers[largest_index] - numbers[third_largest_index] >= threshold and \
       numbers[second_largest_index] - numbers[third_largest_index] >= threshold:
        return largest_index, second_largest_index
    else:
        return None
    return top_two[0][0], top_two[1][0]
 # Step 1: Read the WAV file
 sample_rate, data = wavfile.read(file_name)
@ -62,74 +35,56 @@ sample_rate, data = wavfile.read(file_name)
 if len(data.shape) == 2:
    data = data.mean(axis=1)
-# Define the maximum frequency of interest and Nyquist rate
+max_freq = 1600.0
-max_freq = 1600.0  # 2000 Hz
+data, sample_rate = anti_alias(data, sample_rate, max_freq)
 nyquist_rate = 2 * max_freq  # Nyquist rate to prevent aliasing
-# If the sample rate is higher than the Nyquist rate, downsample
+fft_size = 1024 # Must be larger than max_freq TODO JMT: fix this, zero-pad
-if sample_rate > nyquist_rate:
+frequency_resolution = sample_rate / fft_size
-    # Apply a lowpass filter before downsampling
+high_bin = int(max_freq / frequency_resolution)
    cutoff = max_freq #+ 500  # Lowpass filter cutoff slightly above max_freq to prevent aliasing
    filtered_data = lowpass_filter(data, cutoff, sample_rate)
-    # Calculate the downsampling factor
+segment_interval = 0.2 # seconds
-    downsample_factor = int(sample_rate / nyquist_rate)
+samples_per_interval = int(sample_rate * segment_interval)
 num_segments = len(data) // samples_per_interval
    # Downsample the filtered signal
    filtered_data = decimate(filtered_data, downsample_factor)
    sample_rate = sample_rate // downsample_factor
 else:
    filtered_data = data
-# Step 2: Define the increment (0.1 seconds) and segment size
+fft_results = np.zeros((num_segments, high_bin))
 #increment = 0.2  # in seconds
 #segment_size = int(sample_rate * increment)
 segment_size = 1024
 increment = segment_size / sample_rate
 print(f"Segment size: {segment_size}")
-# Step 3: Process each segment
+for i in range(num_segments):
-num_segments = len(filtered_data) // segment_size
+    # Segment window is current position to fft_size samples in the past. As such some segments
-
+    # will have overlap in which samples are used when fft_size > samples_per_interval
-# Calculate the frequency resolution
+    end = (i + 1) * samples_per_interval
-delta_f = sample_rate / segment_size
+    start = end - fft_size
 # Determine the bin range for desired frequency range (100 Hz to 2000 Hz)
 high_bin = int(max_freq / delta_f)
 seg_off = int(sample_rate * 0.1)
 act_segs = len(filtered_data) // seg_off
 # Initialize a 2D array to store DFT results (magnitude spectrum)
 # Only store the bins within the desired frequency range
 dft_results = np.zeros((act_segs, high_bin))
 for i in range(act_segs):
    end = (i+1) * seg_off
    start = end - segment_size
    try:
-        segment = filtered_data[start:end]
+        segment = data[start:end]
-        # Step 4: Apply the DFT
+        fft_result = fft(segment)
        dft_result = fft(segment)
-        magnitudes = np.abs(dft_result)
+        magnitudes = np.abs(fft_result)
        total_energy = np.sum(magnitudes ** 2)
        normalized_magnitudes = magnitudes / np.sqrt(total_energy)
        normalized_magnitude = np.mean(normalized_magnitudes)
-        # Store the magnitude spectrum in the 2D array, only for the desired frequency range
+        # Store the normalised magnitude spectrum only for the desired frequency range
-        dft_results[i, :] = normalized_magnitudes[:high_bin]
+        fft_results[i, :] = normalized_magnitudes[:high_bin]
-        scores = [
+        tone_width = 6.25 # Hz
-            10 * np.log10(np.sum(normalized_magnitudes[int((f-w)/delta_f):int((f+w)/delta_f)]))
+        def band(centre_frequency, width=tone_width):
-            for f,w in zip(frequencies, widths)
+            return np.array([centre_frequency - width, centre_frequency + width])
-        ]
+
        def magnitude_in_band(band):
            return np.sum(normalized_magnitudes[int(band[0]):int(band[1])])
        scores = {
            tone:decibels(magnitude_in_band(band(frequency)))
            for tone,frequency in TONES.items()
        }
        print(scores)
        active_tones = find_top_two_keys(scores)
        if active_tones:
            print(active_tones)
        codes = get_largest_two_indices(scores, 3.0)
        if codes:
            print([frequencies[code] for code in sorted(codes)])
    except:
        pass
@ -148,8 +103,8 @@ win.setCentralWidget(imv)
 correlogram = pg.ImageItem()
-correlogram.setImage(dft_results)
+correlogram.setImage(fft_results)
-img_rect = QtCore.QRectF(0, 0, len(filtered_data) // sample_rate, max_freq)
+img_rect = QtCore.QRectF(0, 0, len(data) // sample_rate, max_freq)
 correlogram.setRect(img_rect)
 plotItem = imv.addPlot()      # add PlotItem to the main GraphicsLayoutWidget
@ -158,11 +113,11 @@ plotItem.addItem(correlogram)    # display correlogram
 #plotItem.showAxes(True, showValues=(True, True, False, False), size=20)
 freq_pen = pg.mkPen(color=(20, 20, 20), width=1, style=QtCore.Qt.DashLine)
-for freq in frequencies:
+for freq in TONES.values():
    horizontal_line = pg.InfiniteLine(pos=freq, angle=0, pen=freq_pen)
    plotItem.addItem(horizontal_line)
-yticks = [(i, str(round(i))) for i in frequencies]
+yticks = [(frequency, tone) for tone,frequency in TONES.items()]
 plotItem.getAxis('left').setTicks([yticks])
 colorMap = pg.colormap.get("CMRmap", source='matplotlib')     # choose perceptually uniform, diverging color map