# To install the library for the recognition and organization of speech and audio (librosa): # pip install librosa # https://github.com/librosa/librosa/blob/master/examples/LibROSA%20demo.ipynb import librosa import numpy as np from io import TextIOWrapper from zipfile import ZipFile def makeTensors(partition, n): X = np.zeros((n, 32, 20)).astype("float32") with ZipFile("ml530-2022-fall-speech.zip", "r") as archive: for index in range(n): with archive.open(partition + "/" + partition + "/" + str(index).zfill(5) + ".wav") as file: timeSeries, samplingRate = librosa.load(file, sr = None) if (timeSeries.shape[0] < samplingRate): timeSeries = librosa.effects.time_stretch(timeSeries, rate = 1.0 * timeSeries.shape[0] / samplingRate) length = timeSeries.shape[0] if (length > samplingRate): length = samplingRate mfcc = librosa.feature.mfcc(y = timeSeries[:length], sr = samplingRate) X[index,:,:] = np.transpose(mfcc).astype("float32") np.save(partition + "X.npy", X) if (partition != "tst"): Y = np.zeros(n).astype("int32") with ZipFile("ml530-2022-fall-speech.zip", "r") as archive: with TextIOWrapper(archive.open(partition + ".csv", "r")) as file: header = file.readline() for index in range(n): Y[index] = np.int32(file.readline().strip("\r\n").split(",")[1]) np.save(partition + "Y.npy", Y) makeTensors("trn", 51088) makeTensors("val", 6798) makeTensors("tst", 6835)