fork download
  1. import pandas as pd
  2. import numpy as np
  3. import pickle
  4. import matplotlib.pyplot as plt
  5. from scipy import stats
  6. import tensorflow as tf
  7. from tensorflow import keras
  8. import seaborn as sns
  9. # In[ ]:
  10. from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU
  11. # In[ ]:
  12. def cnn_model(img_rows, img_cols, img_channels):
  13. model = Sequential()
  14. model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform',
  15. input_shape=(img_rows, img_cols, img_channels)))
  16. model.add(ReLU()) # add an advanced activation
  17. model.add(MaxPooling2D(pool_size=(5, 5)))
  18. model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform'))
  19. model.add(ReLU()) # add an advanced activation
  20. model.add(MaxPooling2D(pool_size=(3, 3)))
  21. model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform'))
  22. model.add(ReLU()) # add an advanced activation
  23. model.add(MaxPooling2D(pool_size=(3, 3)))
  24. model.add(Flatten())
  25. model.add(Dense(1024))
  26. model.add(Dense(1024))
  27. model.add(ReLU()) # add an advanced activation
  28. model.add(Dense(4))
  29. model.add(Activation('softmax'))
  30. return model
  31. # In[ ]:
  32. from pylab import rcParams
  33. from sklearn.model_selection import train_test_split
  34. from sklearn.preprocessing import MinMaxScaler
  35. from sklearn.linear_model import LogisticRegression
  36. from sklearn.manifold import TSNE
  37. from sklearn.metrics import classification_report, accuracy_score
  38. # In[ ]:
  39. from tensorflow.keras.preprocessing.image import load_img
  40. from tensorflow.keras.preprocessing.image import img_to_array
  41. from tensorflow.keras.applications.vgg16 import preprocess_input
  42. from tensorflow.keras.applications.vgg16 import decode_predictions
  43. from tensorflow.keras.applications.vgg16 import VGG16
  44. # In[ ]:
  45. from tensorflow.python.keras.layers import Input, Dense
  46. from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
  47. from tensorflow.keras import regularizers, Sequential
  48. # In[ ]:
  49. get_ipython().run_line_magic('matplotlib', 'inline')
  50. sns.set(style='whitegrid', palette='muted', font_scale=1.5)
  51. rcParams['figure.figsize'] = 14, 8
  52. RANDOM_SEED = 42
  53. # In[ ]:
  54. model = tf.keras.Sequential([
  55. tf.keras.layers.Dense(5, input_shape=(3,)),
  56. tf.keras.layers.Softmax()])
  57. model.save('/tmp/model')
  58. loaded_model = tf.keras.models.load_model('/tmp/model')
  59. x = tf.random.uniform((10, 3))
  60. assert np.allclose(model.predict(x), loaded_model.predict(x))
  61. # In[ ]:
  62. LABELS = ["Normal", "Fraud"]
  63. # In[ ]:
  64. df = pd.read_csv("creditcard.csv")
  65. df.head()
  66. # In[ ]:
  67. df.shape
  68. # In[ ]:
  69. df.isnull().values.any()
  70. # In[ ]:
  71. count_classes = pd.value_counts(df['Class'], sort = True)
  72. count_classes.plot(kind = 'bar', rot=0)
  73. plt.title("Transaction class distribution")
  74. plt.xticks(range(2), LABELS)
  75. plt.xlabel("Class")
  76. plt.ylabel("Frequency")
  77. # In[ ]:
  78. frauds = df[df.Class == 1]
  79. normal = df[df.Class == 0]
  80. frauds.shape
  81. # In[ ]:
  82. normal.shape
  83. # In[ ]:
  84. frauds.Amount.describe()
  85. # In[ ]:
  86. normal.Amount.describe()
  87. # In[ ]:
  88. f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
  89. f.suptitle('Amount per transaction by class')
  90. bins = 50
  91. ax1.hist(frauds.Amount, bins = bins)
  92. ax1.set_title('Fraud')
  93. ax2.hist(normal.Amount, bins = bins)
  94. ax2.set_title('Normal')
  95. plt.xlabel('Amount ($)')
  96. plt.ylabel('Number of Transactions')
  97. plt.xlim((0, 20000))
  98. plt.yscale('log')
  99. plt.show()
  100. # In[ ]:
  101. f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
  102. f.suptitle('Time of transaction vs Amount by class')
  103. ax1.scatter(frauds.Time, frauds.Amount)
  104. ax1.set_title('Fraud')
  105. ax2.scatter(normal.Time, normal.Amount)
  106. ax2.set_title('Normal')
  107. plt.xlabel('Time (in Seconds)')
  108. plt.ylabel('Amount')
  109. plt.show()
  110. # In[ ]:
  111. data = df.drop(['Time'], axis=1)
  112. # In[ ]:
  113. from sklearn.preprocessing import StandardScaler
  114. data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
  115. # In[ ]:
  116. non_fraud = data[data['Class'] == 0] #.sample(1000)
  117. fraud = data[data['Class'] == 1]
  118. df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True)
  119. X = df.drop(['Class'], axis = 1).values
  120. Y = df["Class"].values
  121. # In[ ]:
  122. X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED)
  123. X_train_fraud = X_train[X_train.Class == 1]
  124. X_train = X_train[X_train.Class == 0]
  125. X_train = X_train.drop(['Class'], axis=1)
  126. y_test = X_test['Class']
  127. X_test = X_test.drop(['Class'], axis=1)
  128. X_train = X_train.values
  129. X_test = X_test.values
  130. X_train.shape
  131. # In[ ]:
  132. input_layer = Input(shape=(X.shape[1],))
  133. ## encoding part
  134. encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer)
  135. encoded = Dense(50, activation='relu')(encoded)
  136. ## decoding part
  137. decoded = Dense(50, activation='tanh')(encoded)
  138. decoded = Dense(100, activation='tanh')(decoded)
  139. ## output layer
  140. output_layer = Dense(X.shape[1], activation='relu')(decoded)
  141. # In[ ]:
  142. model.save('model.h5')
  143. # In[ ]:
  144. from tensorflow.keras import models
  145. # In[ ]:
  146. from tensorflow.keras import models
  147. # In[ ]:
  148. from tensorflow.keras.models import Model, load_model
  149. # In[ ]:
  150. autoencoder = Model(input_layer, output_layer)
  151. autoencoder.compile(optimizer="adadelta", loss="mse")
  152. # In[ ]:
  153. x = data.drop(["Class"], axis=1)
  154. y = data["Class"].values
  155. x_scale = MinMaxScaler().fit_transform(x.values)
  156. x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1]
  157. autoencoder.fit(x_norm[0:2000], x_norm[0:2000],
  158. batch_size = 256, epochs = 10,
  159. shuffle = True, validation_split = 0.20);
  160. # In[ ]:
  161. hidden_representation = Sequential()
  162. hidden_representation.add(autoencoder.layers[0])
  163. hidden_representation.add(autoencoder.layers[1])
  164. hidden_representation.add(autoencoder.layers[2])
  165. # In[ ]:
  166. norm_hid_rep = hidden_representation.predict(x_norm[:3000])
  167. fraud_hid_rep = hidden_representation.predict(x_fraud)
  168. # In[ ]:
  169. rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0)
  170. y_n = np.zeros(norm_hid_rep.shape[0])
  171. y_f = np.ones(fraud_hid_rep.shape[0])
  172. rep_y = np.append(y_n, y_f)
  173. # In[ ]:
  174. train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25)
  175. # In[ ]:
  176. clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y)
  177. pred_y = clf.predict(val_x)
  178. print ("")
  179. print ("Classification Report: ")
  180. print (classification_report(val_y, pred_y))
  181. print ("")
  182. print ("Accuracy Score: ", accuracy_score(val_y, pred_y))
  183.  
Success #stdin #stdout 0.03s 25848KB
stdin
Standard input is empty
stdout
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
# In[ ]:
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU
# In[ ]:
def cnn_model(img_rows, img_cols, img_channels):
model = Sequential()
model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform',
input_shape=(img_rows, img_cols, img_channels)))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(5, 5)))
model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform'))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform'))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Flatten())
model.add(Dense(1024))
model.add(Dense(1024))
model.add(ReLU()) # add an advanced activation
model.add(Dense(4))
model.add(Activation('softmax'))
return model
# In[ ]:
from pylab import rcParams
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, accuracy_score
# In[ ]:
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.vgg16 import decode_predictions
from tensorflow.keras.applications.vgg16 import VGG16
# In[ ]:
from tensorflow.python.keras.layers import Input, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import regularizers, Sequential
# In[ ]:
get_ipython().run_line_magic('matplotlib', 'inline')
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8
RANDOM_SEED = 42
# In[ ]:
model = tf.keras.Sequential([
tf.keras.layers.Dense(5, input_shape=(3,)),
tf.keras.layers.Softmax()])
model.save('/tmp/model')
loaded_model = tf.keras.models.load_model('/tmp/model')
x = tf.random.uniform((10, 3))
assert np.allclose(model.predict(x), loaded_model.predict(x))
# In[ ]:
LABELS = ["Normal", "Fraud"]
# In[ ]:
df = pd.read_csv("creditcard.csv")
df.head()
# In[ ]:
df.shape
# In[ ]:
df.isnull().values.any()
# In[ ]:
count_classes = pd.value_counts(df['Class'], sort = True)
count_classes.plot(kind = 'bar', rot=0)
plt.title("Transaction class distribution")
plt.xticks(range(2), LABELS)
plt.xlabel("Class")
plt.ylabel("Frequency")
# In[ ]:
frauds = df[df.Class == 1]
normal = df[df.Class == 0]
frauds.shape
# In[ ]:
normal.shape
# In[ ]:
frauds.Amount.describe()
# In[ ]:
normal.Amount.describe()
# In[ ]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
f.suptitle('Amount per transaction by class')
bins = 50
ax1.hist(frauds.Amount, bins = bins)
ax1.set_title('Fraud')
ax2.hist(normal.Amount, bins = bins)
ax2.set_title('Normal')
plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.xlim((0, 20000))
plt.yscale('log')
plt.show()
# In[ ]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
f.suptitle('Time of transaction vs Amount by class')
ax1.scatter(frauds.Time, frauds.Amount)
ax1.set_title('Fraud')
ax2.scatter(normal.Time, normal.Amount)
ax2.set_title('Normal')
plt.xlabel('Time (in Seconds)')
plt.ylabel('Amount')
plt.show()
# In[ ]:
data = df.drop(['Time'], axis=1)
# In[ ]:
from sklearn.preprocessing import StandardScaler
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
# In[ ]:
non_fraud = data[data['Class'] == 0] #.sample(1000)
fraud = data[data['Class'] == 1]
df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True)
X = df.drop(['Class'], axis = 1).values
Y = df["Class"].values
# In[ ]:
X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED)
X_train_fraud = X_train[X_train.Class == 1]
X_train = X_train[X_train.Class == 0]
X_train = X_train.drop(['Class'], axis=1)
y_test = X_test['Class']
X_test = X_test.drop(['Class'], axis=1)
X_train = X_train.values
X_test = X_test.values
X_train.shape
# In[ ]:
input_layer = Input(shape=(X.shape[1],))
## encoding part
encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer)
encoded = Dense(50, activation='relu')(encoded)
## decoding part
decoded = Dense(50, activation='tanh')(encoded)
decoded = Dense(100, activation='tanh')(decoded)
## output layer
output_layer = Dense(X.shape[1], activation='relu')(decoded)
# In[ ]:
model.save('model.h5')
# In[ ]:
from tensorflow.keras import models
# In[ ]:
from tensorflow.keras import models
# In[ ]:
from tensorflow.keras.models import Model, load_model
# In[ ]:
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer="adadelta", loss="mse")
# In[ ]:
x = data.drop(["Class"], axis=1)
y = data["Class"].values
x_scale = MinMaxScaler().fit_transform(x.values)
x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1]
autoencoder.fit(x_norm[0:2000], x_norm[0:2000],
batch_size = 256, epochs = 10,
shuffle = True, validation_split = 0.20);
# In[ ]:
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])
# In[ ]:
norm_hid_rep = hidden_representation.predict(x_norm[:3000])
fraud_hid_rep = hidden_representation.predict(x_fraud)
# In[ ]:
rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0)
y_n = np.zeros(norm_hid_rep.shape[0])
y_f = np.ones(fraud_hid_rep.shape[0])
rep_y = np.append(y_n, y_f)
# In[ ]:
train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25)
# In[ ]:
clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y)
pred_y = clf.predict(val_x)
print ("")
print ("Classification Report: ")
print (classification_report(val_y, pred_y))
print ("")
print ("Accuracy Score: ", accuracy_score(val_y, pred_y))