diff --git a/__pycache__/predictor.cpython-39.pyc b/__pycache__/predictor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da07262fc88a72fd60765a9a82a10acede77a399 Binary files /dev/null and b/__pycache__/predictor.cpython-39.pyc differ diff --git a/configs/.parameters.yaml.swp b/configs/.parameters.yaml.swp deleted file mode 100644 index efc1e793a79f99c8e8ae9bcca89838319bd02be9..0000000000000000000000000000000000000000 Binary files a/configs/.parameters.yaml.swp and /dev/null differ diff --git a/configs/parameters.yaml b/configs/parameters.yaml index 59dfc1033327feb5b618a0491817145b70fd1baa..555945ad96a73aa993252ab8147571395b26037b 100644 --- a/configs/parameters.yaml +++ b/configs/parameters.yaml @@ -1,12 +1,12 @@ # PATHS wav_path: '../data/wavs/SPEECOMCO/' data_path: './data/' -embedding_path: './data/embeddings/ecapa/' +embedding_path: './data/embeddings/xvec/' model_path: './models/' -train_set_file: './data/TRAIN_LEC_SEGAUG_C2SI_98_ECAPA.csv' #'./data/TRAINING.txt' -validation_set_file: './data/VALIDATION_LEC_SEGAUG_C2SI_10_ECAPA.csv' -test_set_file: './data/TEST_DAP_UNSEGMENTED_ECAPA.csv' +train_set_file: './data/TRAIN_LEC_SEGAUG_C2SI_98_XVEC.csv' #'./data/TRAINING.txt' +validation_set_file: './data/VALIDATION_LEC_SEGAUG_C2SI_10_XVEC.csv' +test_set_file: './data/TEST_DAP_UNSEGMENTED_XVEC.csv' @@ -21,7 +21,7 @@ dropout: 0.2 # MODEL PARAMETERS -first_layer: 192 +first_layer: 512 second_layer: 128 third_layer: 64 @@ -30,4 +30,4 @@ third_layer: 64 # Types of embeddings supported: 'ecapa_tdnn' or 'x-vector' # ecapa_tdnn: dim = 192 (change first_layer dim) # x-vector: dim = 512 (change first_layer dim) -embedding_type: ecapa_tdnn +embedding_type: x-vector diff --git a/data/.~lock.FULL_TRAIN_LEC_SEGAUG_C2SI_108_ECAPA.csv# b/data/.~lock.FULL_TRAIN_LEC_SEGAUG_C2SI_108_ECAPA.csv# deleted file mode 100644 index 6e55e4fb7cc97ee097132904ae50b5dbd16b01af..0000000000000000000000000000000000000000 --- a/data/.~lock.FULL_TRAIN_LEC_SEGAUG_C2SI_108_ECAPA.csv# +++ /dev/null @@ -1 +0,0 @@ -,sebastiao,Valinor,10.01.2023 13:49,file:///homelocal/sebastiao/.config/libreoffice/4; \ No newline at end of file diff --git a/data/.~lock.VALIDATION_LEC_SEGAUG_C2SI_10_ECAPA.csv# b/data/.~lock.VALIDATION_LEC_SEGAUG_C2SI_10_ECAPA.csv# deleted file mode 100644 index ba8fcf1126aad696f9b33cfac7d69c0a7387dd13..0000000000000000000000000000000000000000 --- a/data/.~lock.VALIDATION_LEC_SEGAUG_C2SI_10_ECAPA.csv# +++ /dev/null @@ -1 +0,0 @@ -,sebastiao,Valinor,10.01.2023 13:54,file:///homelocal/sebastiao/.config/libreoffice/4; \ No newline at end of file diff --git a/predictor/PMB18-AAA-16k_mono.wav b/predictor/PMB18-AAA-16k_mono.wav new file mode 100644 index 0000000000000000000000000000000000000000..133cf36e00cea0a0494b959f46e305ba64f18160 Binary files /dev/null and b/predictor/PMB18-AAA-16k_mono.wav differ diff --git a/predictor/PMB18-CSN-16k_mono.wav b/predictor/PMB18-CSN-16k_mono.wav new file mode 100644 index 0000000000000000000000000000000000000000..ddd21ff8b4b1115a33d1e1ca5f0327afbf739953 Binary files /dev/null and b/predictor/PMB18-CSN-16k_mono.wav differ diff --git a/predictor/PMB18-DAP-16k_mono.wav b/predictor/PMB18-DAP-16k_mono.wav new file mode 100644 index 0000000000000000000000000000000000000000..e67334917519623ffe2b2b8b56f5383e9486c862 Binary files /dev/null and b/predictor/PMB18-DAP-16k_mono.wav differ diff --git a/predictor/PMB18-PHR-16k_mono.wav b/predictor/PMB18-PHR-16k_mono.wav new file mode 100644 index 0000000000000000000000000000000000000000..bb82577d5539fc2ad0c1c8f597894e5a0f750610 Binary files /dev/null and b/predictor/PMB18-PHR-16k_mono.wav differ diff --git a/predictor/PMB18-TXT-16k_mono.wav b/predictor/PMB18-TXT-16k_mono.wav new file mode 100644 index 0000000000000000000000000000000000000000..70f6a08d7ea08ae0ff81c1db0e35d7d38dacae52 Binary files /dev/null and b/predictor/PMB18-TXT-16k_mono.wav differ diff --git a/predictor/TTT117_LEC_seg_8.wav b/predictor/TTT117_LEC_seg_8.wav new file mode 100644 index 0000000000000000000000000000000000000000..1071f108e30b855f3588e2426d05d6dd6bbe501b Binary files /dev/null and b/predictor/TTT117_LEC_seg_8.wav differ diff --git a/predictor/__pycache__/model.cpython-39.pyc b/predictor/__pycache__/model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fee1da447d7af9b3c12a559f730b79a77762728 Binary files /dev/null and b/predictor/__pycache__/model.cpython-39.pyc differ diff --git a/predictor/__pycache__/predictor.cpython-39.pyc b/predictor/__pycache__/predictor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..455cb8d6c2e2883ae813e61aa3ddc1514824e668 Binary files /dev/null and b/predictor/__pycache__/predictor.cpython-39.pyc differ diff --git a/predictor/model.py b/predictor/model.py new file mode 100644 index 0000000000000000000000000000000000000000..759a98ff349a519feb0b8efe6429d81894649bff --- /dev/null +++ b/predictor/model.py @@ -0,0 +1,60 @@ +import yaml +import torch +import torch.nn as nn +import torch.nn.functional as F + +with open("./parameters.yaml", "r") as ymlfile: + cfg = yaml.safe_load(ymlfile) + + +class model_embedding_snn(nn.Module): + def __init__(self): + super(model_embedding_snn,self).__init__() + + self.relu = nn.ReLU() + #self.dropout = nn.Dropout2d(cfg['dropout']) + self.dropout = nn.Dropout(cfg['dropout']) + + self.batch_norm1 = nn.BatchNorm1d(cfg['first_layer']) + self.batch_norm2 = nn.BatchNorm1d(cfg['second_layer']) + self.batch_norm3 = nn.BatchNorm1d(cfg['third_layer']) + + self.fc1 = nn.Linear(cfg['first_layer'],cfg['second_layer']) + self.fc2 = nn.Linear(cfg['second_layer'],cfg['third_layer']) + + self.fc_voix = nn.Linear(cfg['third_layer'],1) + self.fc_res = nn.Linear(cfg['third_layer'],1) + self.fc_pros = nn.Linear(cfg['third_layer'],1) + self.fc_pd = nn.Linear(cfg['third_layer'],1) + + self.fc_sev = nn.Linear(cfg['third_layer'],1) + self.fc_int = nn.Linear(cfg['third_layer'],1) + + def forward(self, input_embs): + + x = self.batch_norm1(input_embs) + x = self.fc1(x) + x = self.dropout(x) + x = self.relu(x) + x = self.batch_norm2(x) + x = self.fc2(x) + x = self.dropout(x) + x = self.relu(x) + x = self.batch_norm3(x) + + v = self.fc_voix(x) + v = self.relu(v) + r = self.fc_res(x) + r = self.relu(r) + p = self.fc_pros(x) + p = self.relu(p) + pd = self.fc_pd(x) + pd = self.relu(pd) + + INT = self.fc_int(x) + INT = self.relu(INT) + + SEV = self.fc_sev(x) + SEV = self.relu(SEV) + + return SEV, INT, v, r, p, pd diff --git a/predictor/model_snn_x-vector b/predictor/model_snn_x-vector new file mode 100644 index 0000000000000000000000000000000000000000..99d21b29ebcab1b11921b33c31b83d987b3fe605 Binary files /dev/null and b/predictor/model_snn_x-vector differ diff --git a/predictor/parameters.yaml b/predictor/parameters.yaml new file mode 100644 index 0000000000000000000000000000000000000000..555945ad96a73aa993252ab8147571395b26037b --- /dev/null +++ b/predictor/parameters.yaml @@ -0,0 +1,33 @@ +# PATHS +wav_path: '../data/wavs/SPEECOMCO/' +data_path: './data/' +embedding_path: './data/embeddings/xvec/' +model_path: './models/' + +train_set_file: './data/TRAIN_LEC_SEGAUG_C2SI_98_XVEC.csv' #'./data/TRAINING.txt' +validation_set_file: './data/VALIDATION_LEC_SEGAUG_C2SI_10_XVEC.csv' +test_set_file: './data/TEST_DAP_UNSEGMENTED_XVEC.csv' + + + +# TRAINING PARAMETERS +sampling_rate: 16000 +batch_size: 8 +learning_rate: 0.001 +epochs: 20 +dropout: 0.2 + + + +# MODEL PARAMETERS + +first_layer: 512 +second_layer: 128 +third_layer: 64 + + + +# Types of embeddings supported: 'ecapa_tdnn' or 'x-vector' +# ecapa_tdnn: dim = 192 (change first_layer dim) +# x-vector: dim = 512 (change first_layer dim) +embedding_type: x-vector diff --git a/predictor/predictor.py b/predictor/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..f380648ffd79721a7aa47e2fa68a7f88926cbda9 --- /dev/null +++ b/predictor/predictor.py @@ -0,0 +1,42 @@ +import torch +#import yaml +import model +import torchaudio + +from speechbrain.pretrained import EncoderClassifier + + +#with open("./configs/parameters.yaml", "r") as ymlfile: +# cfg = yaml.safe_load(ymlfile) + +MODEL_PATH = './model_snn_x-vector' + + +class PythonPredictor(): + + def __init__(self): + super(PythonPredictor,self).__init__() + + self.model = model.model_embedding_snn().cuda() + #self.model.load_state_dict(torch.load(cfg['model_path']+'model_snn_'\ + # +cfg['embedding_type'])) + self.model.load_state_dict(torch.load(MODEL_PATH)) + self.model.eval() + + self.emb_extractor = EncoderClassifier.from_hparams(source=\ + "speechbrain/spkrec-xvect-voxceleb", savedir=\ + "pretrained_models/spkrec-xvect-voxceleb") + + def predict(self,wavfile): + + signal, fs =torchaudio.load(wavfile) + emb = self.emb_extractor.encode_batch(signal) + emb = emb.squeeze().unsqueeze(0).cuda() + + sev_,int_,v_,r_,p_,pd_=self.model(emb) + + print('\nFilename: {}\n'.format(wavfile)) + + print('Speech Disorder Severity: {}'.format(round(sev_.item(),3))) + print('Speech Intelligibility: {}\n'.format(round(int_.item(),3))) + diff --git a/predictor/pretrained_models/spkrec-xvect-voxceleb/classifier.ckpt b/predictor/pretrained_models/spkrec-xvect-voxceleb/classifier.ckpt new file mode 120000 index 0000000000000000000000000000000000000000..6148c68877fe7b256e4091684e5d51177e9a167b --- /dev/null +++ b/predictor/pretrained_models/spkrec-xvect-voxceleb/classifier.ckpt @@ -0,0 +1 @@ +/homelocal/sebastiao/.cache/huggingface/hub/models--speechbrain--spkrec-xvect-voxceleb/snapshots/e2cc27f853f99bd5d539432f0cba3f124c059f71/classifier.ckpt \ No newline at end of file diff --git a/predictor/pretrained_models/spkrec-xvect-voxceleb/embedding_model.ckpt b/predictor/pretrained_models/spkrec-xvect-voxceleb/embedding_model.ckpt new file mode 120000 index 0000000000000000000000000000000000000000..7e749eaa0d7cbaf0822c64608fc7519a8c854885 --- /dev/null +++ b/predictor/pretrained_models/spkrec-xvect-voxceleb/embedding_model.ckpt @@ -0,0 +1 @@ +/homelocal/sebastiao/.cache/huggingface/hub/models--speechbrain--spkrec-xvect-voxceleb/snapshots/e2cc27f853f99bd5d539432f0cba3f124c059f71/embedding_model.ckpt \ No newline at end of file diff --git a/predictor/pretrained_models/spkrec-xvect-voxceleb/hyperparams.yaml b/predictor/pretrained_models/spkrec-xvect-voxceleb/hyperparams.yaml new file mode 120000 index 0000000000000000000000000000000000000000..58c459b9e9399bbffcae8bce744df30d58931e15 --- /dev/null +++ b/predictor/pretrained_models/spkrec-xvect-voxceleb/hyperparams.yaml @@ -0,0 +1 @@ +/homelocal/sebastiao/.cache/huggingface/hub/models--speechbrain--spkrec-xvect-voxceleb/snapshots/e2cc27f853f99bd5d539432f0cba3f124c059f71/hyperparams.yaml \ No newline at end of file diff --git a/predictor/pretrained_models/spkrec-xvect-voxceleb/label_encoder.ckpt b/predictor/pretrained_models/spkrec-xvect-voxceleb/label_encoder.ckpt new file mode 120000 index 0000000000000000000000000000000000000000..567a918ea978991d10c9e9a4de7278a5c11eef70 --- /dev/null +++ b/predictor/pretrained_models/spkrec-xvect-voxceleb/label_encoder.ckpt @@ -0,0 +1 @@ +/homelocal/sebastiao/.cache/huggingface/hub/models--speechbrain--spkrec-xvect-voxceleb/snapshots/e2cc27f853f99bd5d539432f0cba3f124c059f71/label_encoder.txt \ No newline at end of file diff --git a/predictor/pretrained_models/spkrec-xvect-voxceleb/mean_var_norm_emb.ckpt b/predictor/pretrained_models/spkrec-xvect-voxceleb/mean_var_norm_emb.ckpt new file mode 120000 index 0000000000000000000000000000000000000000..0a0a06ed4c7da2ceeb78d24cdd7b22a95956e984 --- /dev/null +++ b/predictor/pretrained_models/spkrec-xvect-voxceleb/mean_var_norm_emb.ckpt @@ -0,0 +1 @@ +/homelocal/sebastiao/.cache/huggingface/hub/models--speechbrain--spkrec-xvect-voxceleb/snapshots/e2cc27f853f99bd5d539432f0cba3f124c059f71/mean_var_norm_emb.ckpt \ No newline at end of file diff --git a/utils/__pycache__/predictor.cpython-39.pyc b/utils/__pycache__/predictor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a6e226b99c791f46c18fcbcb542d5b66d1f92f4 Binary files /dev/null and b/utils/__pycache__/predictor.cpython-39.pyc differ diff --git a/utils/int_xvec_lec.png b/utils/plots/int_xvec_lec.png similarity index 100% rename from utils/int_xvec_lec.png rename to utils/plots/int_xvec_lec.png diff --git a/utils/int_xvec_phr.png b/utils/plots/int_xvec_phr.png similarity index 100% rename from utils/int_xvec_phr.png rename to utils/plots/int_xvec_phr.png diff --git a/utils/sev_ecapa_dap.png b/utils/plots/sev_ecapa_dap.png similarity index 100% rename from utils/sev_ecapa_dap.png rename to utils/plots/sev_ecapa_dap.png diff --git a/utils/sev_xvec_lec.png b/utils/plots/sev_xvec_lec.png similarity index 100% rename from utils/sev_xvec_lec.png rename to utils/plots/sev_xvec_lec.png