import pandas as pd import numpy as np import pickle import matplotlib.pyplot as plt from scipy import stats import tensorflow as tf from tensorflow import keras import seaborn as sns # In[ ]: from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU # In[ ]: def cnn_model(img_rows, img_cols, img_channels): model = Sequential() model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform', input_shape=(img_rows, img_cols, img_channels))) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(5, 5))) model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(1024)) model.add(Dense(1024)) model.add(ReLU()) # add an advanced activation model.add(Dense(4)) model.add(Activation('softmax')) return model # In[ ]: from pylab import rcParams from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.manifold import TSNE from sklearn.metrics import classification_report, accuracy_score # In[ ]: from tensorflow.keras.preprocessing.image import load_img from tensorflow.keras.preprocessing.image import img_to_array from tensorflow.keras.applications.vgg16 import preprocess_input from tensorflow.keras.applications.vgg16 import decode_predictions from tensorflow.keras.applications.vgg16 import VGG16 # In[ ]: from tensorflow.python.keras.layers import Input, Dense from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard from tensorflow.keras import regularizers, Sequential # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') sns.set(style='whitegrid', palette='muted', font_scale=1.5) rcParams['figure.figsize'] = 14, 8 RANDOM_SEED = 42 # In[ ]: model = tf.keras.Sequential([ tf.keras.layers.Dense(5, input_shape=(3,)), tf.keras.layers.Softmax()]) model.save('/tmp/model') loaded_model = tf.keras.models.load_model('/tmp/model') x = tf.random.uniform((10, 3)) # In[ ]: LABELS = ["Normal", "Fraud"] # In[ ]: df = pd.read_csv("creditcard.csv") df.head() # In[ ]: df.shape # In[ ]: df.isnull().values.any() # In[ ]: count_classes.plot(kind = 'bar', rot=0) plt.title("Transaction class distribution") plt.xlabel("Class") plt.ylabel("Frequency") # In[ ]: frauds = df[df.Class == 1] normal = df[df.Class == 0] frauds.shape # In[ ]: normal.shape # In[ ]: frauds.Amount.describe() # In[ ]: normal.Amount.describe() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Amount per transaction by class') bins = 50 ax1.hist(frauds.Amount, bins = bins) ax1.set_title('Fraud') ax2.hist(normal.Amount, bins = bins) ax2.set_title('Normal') plt.xlabel('Amount ($)') plt.ylabel('Number of Transactions') plt.xlim((0, 20000)) plt.yscale('log') plt.show() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Time of transaction vs Amount by class') ax1.set_title('Fraud') ax2.set_title('Normal') plt.xlabel('Time (in Seconds)') plt.ylabel('Amount') plt.show() # In[ ]: data = df.drop(['Time'], axis=1) # In[ ]: from sklearn.preprocessing import StandardScaler data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1)) # In[ ]: non_fraud = data[data['Class'] == 0] #.sample(1000) fraud = data[data['Class'] == 1] df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True) X = df.drop(['Class'], axis = 1).values Y = df["Class"].values # In[ ]: X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED) X_train_fraud = X_train[X_train.Class == 1] X_train = X_train[X_train.Class == 0] X_train = X_train.drop(['Class'], axis=1) y_test = X_test['Class'] X_test = X_test.drop(['Class'], axis=1) X_train = X_train.values X_test = X_test.values X_train.shape # In[ ]: input_layer = Input(shape=(X.shape[1],)) ## encoding part encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer) encoded = Dense(50, activation='relu')(encoded) ## decoding part decoded = Dense(50, activation='tanh')(encoded) decoded = Dense(100, activation='tanh')(decoded) ## output layer output_layer = Dense(X.shape[1], activation='relu')(decoded) # In[ ]: model.save('model.h5') # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras.models import Model, load_model # In[ ]: autoencoder = Model(input_layer, output_layer) autoencoder.compile(optimizer="adadelta", loss="mse") # In[ ]: x = data.drop(["Class"], axis=1) y = data["Class"].values x_scale = MinMaxScaler().fit_transform(x.values) x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1] autoencoder.fit(x_norm[0:2000], x_norm[0:2000], batch_size = 256, epochs = 10, # In[ ]: hidden_representation = Sequential() hidden_representation.add(autoencoder.layers[0]) hidden_representation.add(autoencoder.layers[1]) hidden_representation.add(autoencoder.layers[2]) # In[ ]: norm_hid_rep = hidden_representation.predict(x_norm[:3000]) fraud_hid_rep = hidden_representation.predict(x_fraud) # In[ ]: rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0) y_n = np.zeros(norm_hid_rep.shape[0]) y_f = np.ones(fraud_hid_rep.shape[0]) rep_y = np.append(y_n, y_f) # In[ ]: train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25) # In[ ]: clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y) pred_y = clf.predict(val_x) print ("") print ("Classification Report: ") print (classification_report(val_y, pred_y)) print ("") print ("Accuracy Score: ", accuracy_score(val_y, pred_y))
Standard input is empty
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
# In[ ]:
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU
# In[ ]:
def cnn_model(img_rows, img_cols, img_channels):
model = Sequential()
model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform',
input_shape=(img_rows, img_cols, img_channels)))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(5, 5)))
model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform'))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform'))
model.add(ReLU()) # add an advanced activation
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Flatten())
model.add(Dense(1024))
model.add(Dense(1024))
model.add(ReLU()) # add an advanced activation
model.add(Dense(4))
model.add(Activation('softmax'))
return model
# In[ ]:
from pylab import rcParams
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, accuracy_score
# In[ ]:
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.vgg16 import decode_predictions
from tensorflow.keras.applications.vgg16 import VGG16
# In[ ]:
from tensorflow.python.keras.layers import Input, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import regularizers, Sequential
# In[ ]:
get_ipython().run_line_magic('matplotlib', 'inline')
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8
RANDOM_SEED = 42
# In[ ]:
model = tf.keras.Sequential([
tf.keras.layers.Dense(5, input_shape=(3,)),
tf.keras.layers.Softmax()])
model.save('/tmp/model')
loaded_model = tf.keras.models.load_model('/tmp/model')
x = tf.random.uniform((10, 3))
assert np.allclose(model.predict(x), loaded_model.predict(x))
# In[ ]:
LABELS = ["Normal", "Fraud"]
# In[ ]:
df = pd.read_csv("creditcard.csv")
df.head()
# In[ ]:
df.shape
# In[ ]:
df.isnull().values.any()
# In[ ]:
count_classes = pd.value_counts(df['Class'], sort = True)
count_classes.plot(kind = 'bar', rot=0)
plt.title("Transaction class distribution")
plt.xticks(range(2), LABELS)
plt.xlabel("Class")
plt.ylabel("Frequency")
# In[ ]:
frauds = df[df.Class == 1]
normal = df[df.Class == 0]
frauds.shape
# In[ ]:
normal.shape
# In[ ]:
frauds.Amount.describe()
# In[ ]:
normal.Amount.describe()
# In[ ]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
f.suptitle('Amount per transaction by class')
bins = 50
ax1.hist(frauds.Amount, bins = bins)
ax1.set_title('Fraud')
ax2.hist(normal.Amount, bins = bins)
ax2.set_title('Normal')
plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.xlim((0, 20000))
plt.yscale('log')
plt.show()
# In[ ]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
f.suptitle('Time of transaction vs Amount by class')
ax1.scatter(frauds.Time, frauds.Amount)
ax1.set_title('Fraud')
ax2.scatter(normal.Time, normal.Amount)
ax2.set_title('Normal')
plt.xlabel('Time (in Seconds)')
plt.ylabel('Amount')
plt.show()
# In[ ]:
data = df.drop(['Time'], axis=1)
# In[ ]:
from sklearn.preprocessing import StandardScaler
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
# In[ ]:
non_fraud = data[data['Class'] == 0] #.sample(1000)
fraud = data[data['Class'] == 1]
df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True)
X = df.drop(['Class'], axis = 1).values
Y = df["Class"].values
# In[ ]:
X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED)
X_train_fraud = X_train[X_train.Class == 1]
X_train = X_train[X_train.Class == 0]
X_train = X_train.drop(['Class'], axis=1)
y_test = X_test['Class']
X_test = X_test.drop(['Class'], axis=1)
X_train = X_train.values
X_test = X_test.values
X_train.shape
# In[ ]:
input_layer = Input(shape=(X.shape[1],))
## encoding part
encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer)
encoded = Dense(50, activation='relu')(encoded)
## decoding part
decoded = Dense(50, activation='tanh')(encoded)
decoded = Dense(100, activation='tanh')(decoded)
## output layer
output_layer = Dense(X.shape[1], activation='relu')(decoded)
# In[ ]:
model.save('model.h5')
# In[ ]:
from tensorflow.keras import models
# In[ ]:
from tensorflow.keras import models
# In[ ]:
from tensorflow.keras.models import Model, load_model
# In[ ]:
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer="adadelta", loss="mse")
# In[ ]:
x = data.drop(["Class"], axis=1)
y = data["Class"].values
x_scale = MinMaxScaler().fit_transform(x.values)
x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1]
autoencoder.fit(x_norm[0:2000], x_norm[0:2000],
batch_size = 256, epochs = 10,
shuffle = True, validation_split = 0.20);
# In[ ]:
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])
# In[ ]:
norm_hid_rep = hidden_representation.predict(x_norm[:3000])
fraud_hid_rep = hidden_representation.predict(x_fraud)
# In[ ]:
rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0)
y_n = np.zeros(norm_hid_rep.shape[0])
y_f = np.ones(fraud_hid_rep.shape[0])
rep_y = np.append(y_n, y_f)
# In[ ]:
train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25)
# In[ ]:
clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y)
pred_y = clf.predict(val_x)
print ("")
print ("Classification Report: ")
print (classification_report(val_y, pred_y))
print ("")
print ("Accuracy Score: ", accuracy_score(val_y, pred_y))