-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
118 lines (96 loc) · 3.75 KB
/
model.py
File metadata and controls
118 lines (96 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
from scipy.io import wavfile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
from python_speech_features import mfcc
def build_rand_feat():
X = []
y = []
_min, _max = float('inf'), float('-inf')
for _ in tqdm(range(n_samples)):
rand_class = np.random.choice(class_dist.index, p=prob_dist)
f = np.random.choice(df[df.label==rand_class].index)
rate, wav = wavfile.read('clean/'+f)
label = df.at[f, 'label']
rand_index = np.random.randint(0, wav.shape[0]-config.step)
sample = wav[rand_index:rand_index+config.step]
X_sample = mfcc(sample, rate, numcep=config.nfeat,
nfilt=config.nfilt, nfft=config.nfft).T
_min = min(np.amin(X_sample), _min)
_max = max(np.amax(X_sample), _max)
X.append(X_sample if config.mode == 'conv' else X_sample.T)
y.append(classes.index(label))
X, y = np.array(X), np.array(y)
X = (X - _min) / (_max - _min)
if config.mode == 'conv':
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
elif config.mode == 'time':
X = X.reshape(X.shape[0], X.shape[1], X.shape[2])
y = to_categorical(y, num_classes=10)
return X, y
def get_conv_model():
model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', strides=(1, 1),
padding='same', input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1),
padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1),
padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1),
padding='same'))
model.add(MaxPool2D((2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['acc'])
return model
class Config:
def __init__(self, mode='conv', nfilt=26, nfeat=13, nfft=512, rate=16000):
self.mode = mode
self.nfilt = nfilt
self.nfeat = nfeat
self.nfft = nfft
self.rate = rate
self.step = int(rate/12)
df = pd.read_csv('instruments.csv')
df.set_index('fname', inplace=True)
for f in df.index:
rate, signal = wavfile.read('clean/'+f)
df.at[f, 'length'] = signal.shape[0]/rate
classes = list(np.unique(df.label))
class_dist = df.groupby(['label'])['length'].mean()
n_samples = 2 * int(df['length'].sum() / 0.1)
prob_dist = class_dist / class_dist.sum()
choices = np.random.choice(class_dist.index, p=prob_dist)
fig, ax = plt.subplots()
ax.set_title('Class Distribution', y=1.08)
ax.pie(class_dist, labels=class_dist.index, autopct='%1.1f%%',
shadow=False, startangle=90)
ax.axis('equal')
plt.show()
config = Config(mode='conv')
if config.mode == 'conv':
X, y = build_rand_feat()
y_flat = np.argmax(y, axis=1)
input_shape = (X.shape[1], X.shape[2], 1)
model = get_conv_model()
elif config.mode == 'time':
X, y = build_rand_feat()
y_flat = np.argmax(y, axis=1)
input_shape = (X.shape[1], X.shape[2])
model = get_recurrent_model()
# clss_weight = compute_class_weight('balanced',
# np.unique(y_flat),
# y_flat)
model.fit(X, y, epochs=10, batch_size=32, shuffle=True)