-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNeural_Network_IDS.py
More file actions
139 lines (111 loc) · 6.1 KB
/
Neural_Network_IDS.py
File metadata and controls
139 lines (111 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import RMSprop, Adam
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_curve, auc, f1_score
from sklearn.svm import SVC, LinearSVC
import matplotlib.pyplot as plt
plt.style.use('bmh')
names = ["duration","protocol","service","flag","src_bytes",
"dst_bytes","land","wrong_fragment","urgent","hot",
"num_failed_logins","logged_in","num_compromised",
"root_shell","su_attempted","num_root","num_file_creations",
"num_shells","num_access_files","num_outbound_cmds",
"is_host_login","is_guest_login","count","srv_count",
"serror_rate","srv_serror_rate","rerror_rate","srv_rerror_rate",
"same_srv_rate","diff_srv_rate","srv_diff_host_rate",
"dst_host_count","dst_host_srv_count","dst_host_same_srv_rate",
"dst_host_diff_srv_rate","dst_host_same_src_port_rate","dst_host_srv_diff_host_rate",
"dst_host_serror_rate","dst_host_srv_serr_rate","dst_host_rerror_rate","dst_host_srv_rerror_rate",
"attack_type","other"]
train_path = "Dataset/NSL-KDD/KDDTrain+.txt"
test_path = "Dataset/NSL-KDD/KDDTest+.txt"
df_train = pd.read_csv(train_path,names=names,header=None)
df_test = pd.read_csv(test_path,names=names,header=None)
print("Shapes of training and testing are:",df_train.shape,df_test.shape)
full_dataset = pd.concat([df_train,df_test])
full_dataset['label'] = full_dataset['attack_type']
full_dataset.loc[full_dataset.label == 'neptune','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'back','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'land','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'pod','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'smurf','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'teardrop','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'mailbomb','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'processtable','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'udpstorm','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'apache2','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'worm','label'] = 'DOS'
full_dataset.loc[full_dataset.label == 'buffer_overflow','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'loadmodule','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'perl','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'rootkit','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'sqlattack','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'xterm','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'ps','label'] = 'U2R'
full_dataset.loc[full_dataset.label == 'ftp_write','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'guess_passwd','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'imap','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'multihop','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'phf','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'spy','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'warezclient','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'warezmaster','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'xlock','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'xsnoop','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'snmpgetattack','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'httptunnel','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'snmpguess','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'sendmail','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'named','label'] = 'R2L'
full_dataset.loc[full_dataset.label == 'satan','label'] = 'Probe'
full_dataset.loc[full_dataset.label == 'ipsweep','label'] = 'Probe'
full_dataset.loc[full_dataset.label == 'nmap','label'] = 'Probe'
full_dataset.loc[full_dataset.label == 'portsweep','label'] = 'Probe'
full_dataset.loc[full_dataset.label == 'saint','label'] = 'Probe'
full_dataset.loc[full_dataset.label == 'mscan','label'] = 'Probe'
full_dataset = full_dataset.drop(['other','attack_type'],axis=1)
print("Unique Labels",full_dataset.label.unique())
#One Hot Encoding
full_dataset = pd.get_dummies(full_dataset,drop_first=False)
#Train test split
features = list(full_dataset.columns[:-5])
y_train = np.array(full_dataset[:df_train.shape[0]][['label_normal','label_DOS','label_Probe','label_R2L','label_U2R']])
X_train = full_dataset[:df_train.shape[0]][features]
y_test = np.array(full_dataset[:df_test.shape[0]][['label_normal','label_DOS','label_Probe','label_R2L','label_U2R']])
X_test = full_dataset[:df_test.shape[0]][features]
#Scaling data
scaler = MinMaxScaler().fit(X_train)
X_train_scaled = np.array(scaler.transform(X_train))
X_test_scaled = np.array(scaler.transform(X_test))
print("No of parameters before PCA are ",X_test_scaled.shape[1])
pca_instance = PCA(.85) #Can be changed depending on our requirement to choose principal components
pca_instance.fit(X_train_scaled)
print("\n--------------------PCA Done ------------------------\n")
print("No of parameters after PCA are ",pca_instance.n_components_)
X_train_scaled = pca_instance.transform(X_train_scaled)
X_test_scaled = pca_instance.transform(X_test_scaled)
print()
print(np.shape(y_train))
print()
def NN_model():
model = Sequential()
model.add(Dense(256,activation='relu',input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.4))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(y_train.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()
return model
Neural_Network_Model = NN_model()
Neural_Network_Model.fit(X_train_scaled,y_train,epochs=5,verbose=1,batch_size=32)
scores = Neural_Network_Model.evaluate(X_test_scaled,y_test)
print("Accuracy : ",scores[1]*100)