SoundLocal/Decision Tree.py

423 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import matplotlib.patches as patches
import librosa
import random
import pandas as pd
import threading
import time
from dataclasses import dataclass
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.interpolate import interp1d
from scipy.signal import windows
# Константы
SOUND_SPEED = 343.2 # скорость звука (м/с)
MIC_DISTANCE = 0.06 # расстояние между микрофонами (м)
ROOM_WIDTH = 3.0 # ширина комнаты (м)
ROOM_HEIGHT = 2.0 # высота комнаты (м)
SAMPLE_RATE = 48000 # частота дискретизации (Гц)
CHUNK = 32768 # размер буфера
RMS_THRESHOLD = 0.1 # порог RMS для определения звука
SILENCE_TIMEOUT = 0.5 # время в секундах для сохранения последнего угла
MOVE_INTERVAL = 0.5 # интервал перемещения источника звука (с)
@dataclass
class Microphone:
"""Класс для хранения информации о микрофоне"""
x: float
y: float
@dataclass
class SoundSource:
"""Класс для хранения информации об источнике звука"""
x: float
y: float
class TreeRegressionDirectionFinder:
def __init__(self, mic_distance: float, audio_file: str):
"""Инициализация определителя направления с использованием регрессии деревом решений"""
self.mic_distance = mic_distance
self.mic1 = Microphone(x=-mic_distance / 2, y=0.0)
self.mic2 = Microphone(x=mic_distance / 2, y=0.0)
self.sound_source = self._generate_random_sound_source()
self.running = True
self.current_angle = 0.0
self.sound_detected = False
self.last_sound_time = 0
self.last_detected_angle = None
self.show_arrow = False
self.rms_left = 0.0
self.rms_right = 0.0
self.audio_data, self.sample_rate = self.load_audio(audio_file)
self.audio_index = 0
self.noise_level = 0.001 # уровень шума
self.results = []
self.source_positions = [(self.sound_source.x, self.sound_source.y, 0.0)]
self.last_move_time = time.time()
self.max_physical_delay = self.mic_distance / SOUND_SPEED
self.model = self.train_regression_model()
print("TreeRegressionDirectionFinder инициализирован")
def _generate_random_sound_source(self) -> SoundSource:
"""Генерация положения источника звука с последовательным проходом углов от -90 до 90 градусов"""
if not hasattr(self, 'angle_ranges'):
self.angle_ranges = list(range(-90, 91, 20))
self.current_range_idx = 0
self.angle_count = 0
self.current_angle = self.angle_ranges[0]
start_angle = self.angle_ranges[self.current_range_idx]
end_angle = start_angle + 20 if self.current_range_idx < len(self.angle_ranges) - 1 else 90
angle_deg = random.uniform(start_angle, end_angle)
angle_rad = np.radians(angle_deg)
distance = 1.5 # расстояние
source_x = distance * np.sin(angle_rad)
source_y = distance * np.cos(angle_rad)
self.angle_count += 1
if self.angle_count >= 10:
self.angle_count = 0
self.current_range_idx = (self.current_range_idx + 1) % len(self.angle_ranges)
print(f"Генерация новой позиции: угол={angle_deg:.2f}°, x={source_x:.2f}, y={source_y:.2f}")
return SoundSource(x=source_x, y=source_y)
def load_audio(self, filename: str) -> tuple:
"""Загрузка аудиофайла"""
try:
audio_data, sample_rate = librosa.load(filename, sr=SAMPLE_RATE, mono=True)
return audio_data, sample_rate
except Exception as e:
raise ValueError(f"Ошибка загрузки аудиофайла: {e}")
def calculate_distances(self) -> tuple:
"""Расчет расстояний от источника звука до микрофонов"""
l1 = np.sqrt((self.sound_source.x - self.mic1.x) ** 2 +
(self.sound_source.y - self.mic1.y) ** 2)
l2 = np.sqrt((self.sound_source.x - self.mic2.x) ** 2 +
(self.sound_source.y - self.mic2.y) ** 2)
return l1, l2
def process_signals_with_delay(self, signal: np.ndarray) -> tuple:
"""Обработка сигналов с учетом временного сдвига и шума с интерполяцией"""
l1, l2 = self.calculate_distances()
t1 = l1 / SOUND_SPEED
t2 = l2 / SOUND_SPEED
time_points = np.arange(len(signal)) / self.sample_rate
interp_func = interp1d(time_points, signal, kind='linear', fill_value="extrapolate")
S1 = interp_func(time_points - t1)
S2 = interp_func(time_points - t2)
min_length = min(len(S1), len(S2))
S1, S2 = S1[:min_length], S2[:min_length]
noise1 = np.random.normal(0, self.noise_level, S1.shape)
noise2 = np.random.normal(0, self.noise_level, S2.shape)
S1 += noise1
S2 += noise2
return S1, S2, t1, t2
def capture_audio(self):
"""Эмуляция захвата аудиоданных из файла"""
if self.audio_index + CHUNK >= len(self.audio_data):
self.audio_index = 0
chunk = self.audio_data[self.audio_index:self.audio_index + CHUNK]
self.audio_index += CHUNK
signal1, signal2, t1, t2 = self.process_signals_with_delay(chunk)
return signal1, signal2, t1, t2
def calculate_rms(self, signal: np.ndarray) -> float:
"""Вычисление RMS сигнала"""
return np.sqrt(np.mean(signal ** 2))
def calculate_time_delay_fft(self, signal1: np.ndarray, signal2: np.ndarray) -> tuple:
"""Расчет временной задержки и пика кросс-корреляции через FFT с оконной функцией Ханна"""
window = windows.hann(len(signal1))
signal1 = (signal1 - np.mean(signal1)) / (np.std(signal1) + 1e-10) * window
signal2 = (signal2 - np.mean(signal2)) / (np.std(signal2) + 1e-10) * window
fft_signal1 = np.fft.rfft(signal1)
fft_signal2 = np.fft.rfft(signal2)
cross_spectrum = fft_signal1 * np.conj(fft_signal2)
cross_spectrum = cross_spectrum / (np.abs(cross_spectrum) + 1e-10)
correlation = np.fft.irfft(cross_spectrum)
correlation = np.roll(correlation, len(correlation) // 2)
max_delay_samples = int(self.max_physical_delay * self.sample_rate)
middle_point = len(correlation) // 2
start_idx = middle_point - max_delay_samples
end_idx = middle_point + max_delay_samples
max_correlation_idx = start_idx + np.argmax(correlation[start_idx:end_idx])
peak_correlation = correlation[max_correlation_idx]
if max_correlation_idx > start_idx and max_correlation_idx < end_idx - 1:
y0 = correlation[max_correlation_idx - 1]
y1 = correlation[max_correlation_idx]
y2 = correlation[max_correlation_idx + 1]
denom = 2 * (y0 - 2 * y1 + y2)
if denom != 0:
delta = (y0 - y2) / denom
max_correlation_idx += delta
delay_samples = max_correlation_idx - middle_point
time_delay = delay_samples / self.sample_rate
time_delay = np.clip(time_delay, -self.max_physical_delay, self.max_physical_delay)
return time_delay, peak_correlation
def train_regression_model(self):
"""Обучение модели регрессии с использованием только пика кросс-корреляции"""
n_samples = 50000
n_additional_samples = 20000
X = []
y = []
# Генерация стандартных тренировочных данных
for _ in range(n_samples):
source = self._generate_random_sound_source()
l1 = np.sqrt((source.x - self.mic1.x) ** 2 + (source.y - self.mic1.y) ** 2)
l2 = np.sqrt((source.x - self.mic2.x) ** 2 + (source.y - self.mic2.y) ** 2)
t1 = l1 / SOUND_SPEED
t2 = l2 / SOUND_SPEED
# Генерация тестового сигнала для вычисления пика кросс-корреляции
signal = np.random.normal(0, 1, CHUNK)
time_points = np.arange(len(signal)) / self.sample_rate
interp_func = interp1d(time_points, signal, kind='linear', fill_value="extrapolate")
S1 = interp_func(time_points - t1)
S2 = interp_func(time_points - t2)
min_length = min(len(S1), len(S2))
S1, S2 = S1[:min_length], S2[:min_length]
noise1 = np.random.normal(0, self.noise_level, S1.shape)
noise2 = np.random.normal(0, self.noise_level, S2.shape)
S1 += noise1
S2 += noise2
_, peak_correlation = self.calculate_time_delay_fft(S1, S2)
true_angle = np.arctan2(source.x, source.y) * 180 / np.pi
X.append([peak_correlation])
y.append(true_angle)
# Генерация дополнительных данных для углов -90°...-50° и 50°...90°
for _ in range(n_additional_samples):
if random.choice([True, False]):
angle_deg = random.uniform(-90, -50)
else:
angle_deg = random.uniform(50, 90)
angle_rad = np.radians(angle_deg)
distance = 1.5
source_x = distance * np.sin(angle_rad)
source_y = distance * np.cos(angle_rad)
source = SoundSource(x=source_x, y=source_y)
l1 = np.sqrt((source.x - self.mic1.x) ** 2 + (source.y - self.mic1.y) ** 2)
l2 = np.sqrt((source.x - self.mic2.x) ** 2 + (source.y - self.mic2.y) ** 2)
t1 = l1 / SOUND_SPEED
t2 = l2 / SOUND_SPEED
signal = np.random.normal(0, 1, CHUNK)
time_points = np.arange(len(signal)) / self.sample_rate
interp_func = interp1d(time_points, signal, kind='linear', fill_value="extrapolate")
S1 = interp_func(time_points - t1)
S2 = interp_func(time_points - t2)
min_length = min(len(S1), len(S2))
S1, S2 = S1[:min_length], S2[:min_length]
noise1 = np.random.normal(0, self.noise_level, S1.shape)
noise2 = np.random.normal(0, self.noise_level, S2.shape)
S1 += noise1
S2 += noise2
_, peak_correlation = self.calculate_time_delay_fft(S1, S2)
X.append([peak_correlation])
y.append(angle_deg)
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = DecisionTreeRegressor(max_depth=10, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Среднеквадратичная ошибка модели на тестовых данных: {mse:.4f}")
return model
def calculate_direction(self, peak_correlation: float) -> float:
"""Расчет угла направления с использованием модели регрессии"""
features = np.array([[peak_correlation]])
angle = self.model.predict(features)[0]
angle = np.clip(angle, -90, 90)
return angle
def run(self):
"""Обработка аудио в реальном времени"""
while self.running:
try:
current_time = time.time()
if current_time - self.last_move_time >= MOVE_INTERVAL:
self.sound_source = self._generate_random_sound_source()
self.source_positions.append((self.sound_source.x, self.sound_source.y, current_time))
print(f"Источник звука перемещен в: x={self.sound_source.x:.2f}, y={self.sound_source.y:.2f}")
self.last_move_time = current_time
left, right, t1, t2 = self.capture_audio()
self.rms_left = self.calculate_rms(left)
self.rms_right = self.calculate_rms(right)
peak_amp_left = np.max(np.abs(left))
peak_amp_right = np.max(np.abs(right))
peak_diff = peak_amp_left - peak_amp_right
new_sound_detected = (self.rms_left > RMS_THRESHOLD) and (self.rms_right > RMS_THRESHOLD)
if new_sound_detected:
print(f"RMS left: {self.rms_left:.4f}, RMS right: {self.rms_right:.4f}") # Исправлено
print(f"Peak difference: {peak_diff:.4f}")
time_delay, peak_correlation = self.calculate_time_delay_fft(left, right)
print(f"Time delay: {time_delay * 1000:.2f} ms")
print(f"Peak correlation: {peak_correlation:.4f}")
angle = self.calculate_direction(peak_correlation)
print(f"Calculated angle: {angle:.1f}°")
self.current_angle = angle
self.last_detected_angle = angle
self.last_sound_time = current_time
self.sound_detected = True
self.show_arrow = True
true_dx = self.sound_source.x
true_dy = self.sound_source.y
true_angle = np.arctan2(true_dx, true_dy) * 180 / np.pi
self.results.append({
'Time Delay (ms)': time_delay * 1000,
'Peak Correlation': peak_correlation,
'Detected Angle (°)': self.current_angle,
'True Angle (°)': true_angle,
'Source X': self.sound_source.x,
'Source Y': self.sound_source.y
})
else:
if self.last_detected_angle is not None and current_time - self.last_sound_time < SILENCE_TIMEOUT:
self.sound_detected = False
self.show_arrow = True
else:
self.sound_detected = False
self.show_arrow = False
time.sleep(CHUNK / self.sample_rate)
except Exception as e:
print(f"Ошибка в run: {e}")
continue
def get_coordinates_dataframe(self):
"""Создание датафрейма с координатами микрофонов и всех позиций источника"""
data = {
'Object': ['Mic1', 'Mic2'] + [f'SoundSource_{i}' for i in range(len(self.source_positions))],
'X': [self.mic1.x, self.mic2.x] + [pos[0] for pos in self.source_positions],
'Y': [self.mic1.y, self.mic2.y] + [pos[1] for pos in self.source_positions],
'Time': [0.0, 0.0] + [pos[2] for pos in self.source_positions]
}
return pd.DataFrame(data)
def get_results_dataframe(self):
"""Создание датафрейма с результатами"""
return pd.DataFrame(self.results)
def main():
try:
finder = TreeRegressionDirectionFinder(MIC_DISTANCE, "my_recording1.wav")
print("Аудиофайл загружен, частота дискретизации:", finder.sample_rate)
thread = threading.Thread(target=finder.run)
thread.start()
fig, ax = plt.subplots(figsize=(9, 6))
ax.set_xlim(-ROOM_WIDTH / 2, ROOM_WIDTH / 2)
ax.set_ylim(-0.5, ROOM_HEIGHT)
ax.set_aspect('equal')
ax.set_title("Определение направления на источник звука (Tree Regression)", fontsize=12)
ax.set_xlabel("X (м)", fontsize=10)
ax.set_ylabel("Y (м)", fontsize=10)
ax.grid(True)
ax.plot(finder.mic1.x, finder.mic1.y, 'bs', markersize=12, label='Микрофон 1')
ax.plot(finder.mic2.x, finder.mic2.y, 'bs', markersize=12, label='Микрофон 2')
source_plot, = ax.plot(finder.sound_source.x, finder.sound_source.y, 'ro', markersize=12,
label='Источник звука')
arrow_length = min(ROOM_WIDTH, ROOM_HEIGHT) / 4
arrow = ax.arrow(0, 0, 0, 0, head_width=0.2, head_length=0.3,
fc='r', ec='r', label='Расчетное направление')
arrow_true = ax.arrow(0, 0, 0, 0, head_width=0.2, head_length=0.3,
fc='g', ec='g', linestyle=':', label='Истинное направление')
sound_bar = ax.axhline(y=ROOM_HEIGHT - 1, color='green', linewidth=15, visible=False)
angle_text = ax.text(0, ROOM_HEIGHT - 0.3, "", ha='center', va='center', fontsize=10)
left_indicator = patches.Rectangle((finder.mic1.x - 0.03, -0.15), 0.06, 0.08, facecolor='gray')
right_indicator = patches.Rectangle((finder.mic2.x - 0.03, -0.15), 0.06, 0.08, facecolor='gray')
ax.add_patch(left_indicator)
ax.add_patch(right_indicator)
def update(frame):
source_plot.set_data([finder.sound_source.x], [finder.sound_source.y])
dx = finder.sound_source.x
dy = finder.sound_source.y
true_angle_rad = np.arctan2(dx, dy)
true_end_x = arrow_length * np.sin(true_angle_rad)
true_end_y = arrow_length * np.cos(true_angle_rad)
arrow_true.set_data(x=0, y=0, dx=true_end_x, dy=true_end_y)
if finder.show_arrow:
angle = finder.current_angle
calc_angle_rad = np.radians(angle)
calc_end_x = arrow_length * np.sin(calc_angle_rad)
calc_end_y = arrow_length * np.cos(calc_angle_rad)
arrow.set_data(x=0, y=0, dx=calc_end_x, dy=calc_end_y)
angle_text.set_text(f"Расчетный угол: {angle:.1f}°\nИстинный угол: {np.degrees(true_angle_rad):.1f}°")
if finder.sound_detected:
sound_bar.set_visible(True)
ax.set_title("Активное обнаружение звука (Tree Regression)", fontsize=12)
else:
sound_bar.set_visible(False)
ax.set_title("Последнее зафиксированное направление (Tree Regression)", fontsize=12)
else:
arrow.set_data(x=0, y=0, dx=0, dy=0)
angle_text.set_text("")
sound_bar.set_visible(False)
ax.set_title("Звук не обнаружен", fontsize=12)
left_indicator.set_facecolor('green' if finder.rms_left > RMS_THRESHOLD else 'gray')
right_indicator.set_facecolor('green' if finder.rms_right > RMS_THRESHOLD else 'gray')
return [source_plot, arrow, arrow_true, sound_bar,
left_indicator, right_indicator, angle_text]
ani = FuncAnimation(fig, update, frames=None, interval=10, blit=True, cache_frame_data=False)
plt.legend(loc='upper left', fontsize=8)
plt.tight_layout()
plt.show()
finder.running = False
thread.join()
coords_df = finder.get_coordinates_dataframe()
results_df = finder.get_results_dataframe()
print("\nКоординаты микрофонов и всех позиций источника звука:")
print(coords_df.to_string(index=False))
print("\nРезультаты вычислений:")
print(results_df.to_string(index=False))
coords_df.to_csv('coordinates_tree.csv', index=False)
results_df.to_csv('results_tree.csv', index=False)
print("\nДанные сохранены в 'coordinates_tree.csv' и 'results_tree.csv'")
except Exception as e:
print(f"Ошибка в main: {e}")
if __name__ == "__main__":
main()