Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental Auto Detect Batuk, Thread Record #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/.cache
__pycache__
63 changes: 63 additions & 0 deletions coughgui/pysources/experimental/auto_detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#pip install xgboost==0.90 scipy==0.22.1

import alsaaudio as alsa
import numpy as np
from queue import Queue
from datetime import datetime, timedelta
from time import sleep
import threading
import os
import wave
import soundfile as sf

from src.DSP import classify_cough
import pickle

model = pickle.load(open(os.path.join('./models', 'cough_classifier'), 'rb'))
scaler = pickle.load(open(os.path.join('./models', 'cough_classification_scaler'), 'rb'))

AudioLong = 1024
to_checkAudio = np.zeros(3 * AudioLong, dtype='i2')

aslsa_pcm = alsa.PCM(alsa.PCM_CAPTURE, alsa.PCM_NORMAL, channels=2, rate=44100,format=alsa.PCM_FORMAT_S16_LE, periodsize=AudioLong)

data_queue = Queue()
phrase_time = None
phrase_timeout = 3

def capture_audio():
phrase_time = datetime.utcnow()
total_indata = bytearray()
while True:
now = datetime.utcnow()
_, indata = aslsa_pcm.read()

if now - phrase_time > timedelta(seconds=phrase_timeout):
phrase_time = now
data_queue.put(bytes(total_indata))
total_indata = bytearray(indata)
else:
total_indata += indata

capture_thread = threading.Thread(target=capture_audio)
capture_thread.daemon = True
capture_thread.start()

while True:
try:
now = datetime.utcnow()
if not data_queue.empty():
audio_data = data_queue.get()
data_queue.queue.clear()

audio_np = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
reshape_np = audio_np.reshape(-1, 2)

prob = classify_cough(reshape_np, 44100, model, scaler)
#print(f"has probability of cough: {prob}")
if prob > 0.75:
print(f"Batuk")

sleep(0.25)
except KeyboardInterrupt:
break
63 changes: 63 additions & 0 deletions coughgui/pysources/experimental/background_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import alsaaudio as alsa
import numpy as np
from queue import Queue
from datetime import datetime, timedelta
from time import sleep
import threading
import os
import wave
import soundfile as sf

AudioLong = 1024
to_checkAudio = np.zeros(3 * AudioLong, dtype='i2')

aslsa_pcm = alsa.PCM(alsa.PCM_CAPTURE, alsa.PCM_NORMAL, channels=2, rate=44100,format=alsa.PCM_FORMAT_S16_LE, periodsize=AudioLong)

data_queue = Queue()
phrase_time = None
phrase_timeout = 3

def capture_audio():
phrase_time = datetime.utcnow()
total_indata = bytearray()
while True:
now = datetime.utcnow()
_, indata = aslsa_pcm.read()

if now - phrase_time > timedelta(seconds=phrase_timeout):
phrase_time = now
data_queue.put(bytes(total_indata))
total_indata = bytearray(indata)
else:
total_indata += indata

capture_thread = threading.Thread(target=capture_audio)
capture_thread.daemon = True
capture_thread.start()

i = 0

while True:
try:
now = datetime.utcnow()
if not data_queue.empty():
audio_data = data_queue.get()
data_queue.queue.clear()

# Convert in-ram buffer to something the model can use directly without needing a temp file.
# Convert data from 16 bit wide integers to floating point with a width of 32 bits.
# Clamp the audio stream frequency to a PCM wavelength compatible default of 32768hz max.
audio_np = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
reshape_np = audio_np.reshape(-1, 2)
#print(.shape)
sf.write(f"temp/coba_{i}.wav", reshape_np, 44100)
# with wave.open(f"temp/coba_{i}.wav", "wb") as out_wav:
# out_wav.setparams((2, 2, 44100, 0, 'NONE', 'NONE'))
# out_wav.writeframesraw(audio_data)

i += 1

# Infinite loops are bad for processors, must sleep.
sleep(0.25)
except KeyboardInterrupt:
break
Binary file not shown.
Binary file not shown.
79 changes: 79 additions & 0 deletions coughgui/pysources/experimental/src/DSP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import numpy as np
import librosa
from scipy import signal
from scipy.io import wavfile
from scipy.signal import butter,filtfilt
from scipy.signal import cwt
from scipy.signal import hilbert
from scipy.signal import resample
from scipy.signal import decimate
from scipy.signal import spectrogram
from scipy.signal.windows import get_window

import os

from .feature_class import features


def classify_cough(x, fs, model, scaler):
"""Classify whether an inputted signal is a cough or not using filtering, feature extraction, and ML classification
Inputs:
x: (float array) raw cough signal
fs: (int) sampling rate of raw signal
model: cough classification ML model loaded from file
Outputs:
result: (float) probability that a given file is a cough
"""
try:
x,fs = preprocess_cough(x,fs)
data = (fs,x)
FREQ_CUTS = [(0,200),(300,425),(500,650),(950,1150),(1400,1800),(2300,2400),(2850,2950),(3800,3900)]
features_fct_list = ['EEPD','ZCR','RMSP','DF','spectral_features','SF_SSTD','SSL_SD','MFCC','CF','LGTH','PSD']
feature_values_vec = []
obj = features(FREQ_CUTS)
for feature in features_fct_list:
feature_values, feature_names = getattr(obj,feature)(data)
for value in feature_values:
if isinstance(value,np.ndarray):
feature_values_vec.append(value[0])
else:
feature_values_vec.append(value)
feature_values_scaled = scaler.transform(np.array(feature_values_vec).reshape(1,-1))
result = model.predict_proba(feature_values_scaled)[:,1]
return result[0]
except:
"Feature extraction fails when the audio is completely silent"
return 0

def preprocess_cough(x,fs, cutoff = 6000, normalize = True, filter_ = True, downsample = True):
"""
Normalize, lowpass filter, and downsample cough samples in a given data folder

Inputs: x*: (float array) time series cough signal
fs*: (int) sampling frequency of the cough signal in Hz
cutoff: (int) cutoff frequency of lowpass filter
normalize: (bool) normailzation on or off
filter: (bool) filtering on or off
downsample: (bool) downsampling on or off
*: mandatory input

Outputs: x: (float32 array) new preprocessed cough signal
fs: (int) new sampling frequency
"""

fs_downsample = cutoff*2

#Preprocess Data
if len(x.shape)>1:
x = np.mean(x,axis=1) # Convert to mono
if normalize:
x = x/(np.max(np.abs(x))+1e-17) # Norm to range between -1 to 1
if filter_:
b, a = butter(4, fs_downsample/fs, btype='lowpass') # 4th order butter lowpass filter
x = filtfilt(b, a, x)
if downsample:
x = signal.decimate(x, int(fs/fs_downsample)) # Downsample for anti-aliasing

fs_new = fs_downsample

return np.float32(x), fs_new
Empty file.
24 changes: 24 additions & 0 deletions coughgui/pysources/experimental/src/convert_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
import pandas as pd
import os
import subprocess
from pathlib import Path


def convert_files(folder):
"""Convert files from .webm and .ogg to .wav
folder: path to coughvid database and metadata_compiled csv"""

df = pd.read_csv(folder + 'metadata_compiled.csv')
names_to_convert = df.uuid.to_numpy()
for counter, name in enumerate(names_to_convert):
if (counter%1000 == 0):
print("Finished {0}/{1}".format(counter,len(names_to_convert)))
if os.path.isfile(folder + name + '.webm'):
subprocess.call(["ffmpeg", "-i", folder+name+".webm", folder+name+".wav"])
elif os.path.isfile(folder + name + '.ogg'):
subprocess.call(["ffmpeg", "-i", folder+name+".ogg", folder+name+".wav"])
else:
print("Error: No file name {0}".format(name))


Loading