import pandas as pd import numpy as np import pickle import matplotlib.pyplot as plt from scipy import stats import tensorflow as tf from tensorflow import keras import seaborn as sns # In[ ]: from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU # In[ ]: def cnn_model(img_rows, img_cols, img_channels): model = Sequential() model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform', input_shape=(img_rows, img_cols, img_channels))) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(5, 5))) model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(1024)) model.add(Dense(1024)) model.add(ReLU()) # add an advanced activation model.add(Dense(4)) model.add(Activation('softmax')) return model # In[ ]: from pylab import rcParams from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.manifold import TSNE from sklearn.metrics import classification_report, accuracy_score # In[ ]: from tensorflow.keras.preprocessing.image import load_img from tensorflow.keras.preprocessing.image import img_to_array from tensorflow.keras.applications.vgg16 import preprocess_input from tensorflow.keras.applications.vgg16 import decode_predictions from tensorflow.keras.applications.vgg16 import VGG16 # In[ ]: from tensorflow.python.keras.layers import Input, Dense from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard from tensorflow.keras import regularizers, Sequential # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') sns.set(style='whitegrid', palette='muted', font_scale=1.5) rcParams['figure.figsize'] = 14, 8 RANDOM_SEED = 42 # In[ ]: model = tf.keras.Sequential([ tf.keras.layers.Dense(5, input_shape=(3,)), tf.keras.layers.Softmax()]) model.save('/tmp/model') loaded_model = tf.keras.models.load_model('/tmp/model') x = tf.random.uniform((10, 3)) # In[ ]: LABELS = ["Normal", "Fraud"] # In[ ]: df = pd.read_csv("creditcard.csv") df.head() # In[ ]: df.shape # In[ ]: df.isnull().values.any() # In[ ]: count_classes.plot(kind = 'bar', rot=0) plt.title("Transaction class distribution") plt.xlabel("Class") plt.ylabel("Frequency") # In[ ]: frauds = df[df.Class == 1] normal = df[df.Class == 0] frauds.shape # In[ ]: normal.shape # In[ ]: frauds.Amount.describe() # In[ ]: normal.Amount.describe() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Amount per transaction by class') bins = 50 ax1.hist(frauds.Amount, bins = bins) ax1.set_title('Fraud') ax2.hist(normal.Amount, bins = bins) ax2.set_title('Normal') plt.xlabel('Amount ($)') plt.ylabel('Number of Transactions') plt.xlim((0, 20000)) plt.yscale('log') plt.show() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Time of transaction vs Amount by class') ax1.set_title('Fraud') ax2.set_title('Normal') plt.xlabel('Time (in Seconds)') plt.ylabel('Amount') plt.show() # In[ ]: data = df.drop(['Time'], axis=1) # In[ ]: from sklearn.preprocessing import StandardScaler data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1)) # In[ ]: non_fraud = data[data['Class'] == 0] #.sample(1000) fraud = data[data['Class'] == 1] df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True) X = df.drop(['Class'], axis = 1).values Y = df["Class"].values # In[ ]: X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED) X_train_fraud = X_train[X_train.Class == 1] X_train = X_train[X_train.Class == 0] X_train = X_train.drop(['Class'], axis=1) y_test = X_test['Class'] X_test = X_test.drop(['Class'], axis=1) X_train = X_train.values X_test = X_test.values X_train.shape # In[ ]: input_layer = Input(shape=(X.shape[1],)) ## encoding part encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer) encoded = Dense(50, activation='relu')(encoded) ## decoding part decoded = Dense(50, activation='tanh')(encoded) decoded = Dense(100, activation='tanh')(decoded) ## output layer output_layer = Dense(X.shape[1], activation='relu')(decoded) # In[ ]: model.save('model.h5') # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras.models import Model, load_model # In[ ]: autoencoder = Model(input_layer, output_layer) autoencoder.compile(optimizer="adadelta", loss="mse") # In[ ]: x = data.drop(["Class"], axis=1) y = data["Class"].values x_scale = MinMaxScaler().fit_transform(x.values) x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1] autoencoder.fit(x_norm[0:2000], x_norm[0:2000], batch_size = 256, epochs = 10, # In[ ]: hidden_representation = Sequential() hidden_representation.add(autoencoder.layers[0]) hidden_representation.add(autoencoder.layers[1]) hidden_representation.add(autoencoder.layers[2]) # In[ ]: norm_hid_rep = hidden_representation.predict(x_norm[:3000]) fraud_hid_rep = hidden_representation.predict(x_fraud) # In[ ]: rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0) y_n = np.zeros(norm_hid_rep.shape[0]) y_f = np.ones(fraud_hid_rep.shape[0]) rep_y = np.append(y_n, y_f) # In[ ]: train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25) # In[ ]: clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y) pred_y = clf.predict(val_x) print ("") print ("Classification Report: ") print (classification_report(val_y, pred_y)) print ("") print ("Accuracy Score: ", accuracy_score(val_y, pred_y))
Standard input is empty
import pandas as pd import numpy as np import pickle import matplotlib.pyplot as plt from scipy import stats import tensorflow as tf from tensorflow import keras import seaborn as sns # In[ ]: from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, ReLU # In[ ]: def cnn_model(img_rows, img_cols, img_channels): model = Sequential() model.add(Conv2D(64, (3, 3),activation='linear',kernel_initializer='he_uniform', input_shape=(img_rows, img_cols, img_channels))) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(5, 5))) model.add(Conv2D(32, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Conv2D(16, (3, 3),activation='linear',kernel_initializer='he_uniform')) model.add(ReLU()) # add an advanced activation model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(1024)) model.add(Dense(1024)) model.add(ReLU()) # add an advanced activation model.add(Dense(4)) model.add(Activation('softmax')) return model # In[ ]: from pylab import rcParams from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.manifold import TSNE from sklearn.metrics import classification_report, accuracy_score # In[ ]: from tensorflow.keras.preprocessing.image import load_img from tensorflow.keras.preprocessing.image import img_to_array from tensorflow.keras.applications.vgg16 import preprocess_input from tensorflow.keras.applications.vgg16 import decode_predictions from tensorflow.keras.applications.vgg16 import VGG16 # In[ ]: from tensorflow.python.keras.layers import Input, Dense from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard from tensorflow.keras import regularizers, Sequential # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') sns.set(style='whitegrid', palette='muted', font_scale=1.5) rcParams['figure.figsize'] = 14, 8 RANDOM_SEED = 42 # In[ ]: model = tf.keras.Sequential([ tf.keras.layers.Dense(5, input_shape=(3,)), tf.keras.layers.Softmax()]) model.save('/tmp/model') loaded_model = tf.keras.models.load_model('/tmp/model') x = tf.random.uniform((10, 3)) assert np.allclose(model.predict(x), loaded_model.predict(x)) # In[ ]: LABELS = ["Normal", "Fraud"] # In[ ]: df = pd.read_csv("creditcard.csv") df.head() # In[ ]: df.shape # In[ ]: df.isnull().values.any() # In[ ]: count_classes = pd.value_counts(df['Class'], sort = True) count_classes.plot(kind = 'bar', rot=0) plt.title("Transaction class distribution") plt.xticks(range(2), LABELS) plt.xlabel("Class") plt.ylabel("Frequency") # In[ ]: frauds = df[df.Class == 1] normal = df[df.Class == 0] frauds.shape # In[ ]: normal.shape # In[ ]: frauds.Amount.describe() # In[ ]: normal.Amount.describe() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Amount per transaction by class') bins = 50 ax1.hist(frauds.Amount, bins = bins) ax1.set_title('Fraud') ax2.hist(normal.Amount, bins = bins) ax2.set_title('Normal') plt.xlabel('Amount ($)') plt.ylabel('Number of Transactions') plt.xlim((0, 20000)) plt.yscale('log') plt.show() # In[ ]: f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) f.suptitle('Time of transaction vs Amount by class') ax1.scatter(frauds.Time, frauds.Amount) ax1.set_title('Fraud') ax2.scatter(normal.Time, normal.Amount) ax2.set_title('Normal') plt.xlabel('Time (in Seconds)') plt.ylabel('Amount') plt.show() # In[ ]: data = df.drop(['Time'], axis=1) # In[ ]: from sklearn.preprocessing import StandardScaler data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1)) # In[ ]: non_fraud = data[data['Class'] == 0] #.sample(1000) fraud = data[data['Class'] == 1] df = non_fraud.append(fraud).sample(frac=1).reset_index(drop=True) X = df.drop(['Class'], axis = 1).values Y = df["Class"].values # In[ ]: X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED) X_train_fraud = X_train[X_train.Class == 1] X_train = X_train[X_train.Class == 0] X_train = X_train.drop(['Class'], axis=1) y_test = X_test['Class'] X_test = X_test.drop(['Class'], axis=1) X_train = X_train.values X_test = X_test.values X_train.shape # In[ ]: input_layer = Input(shape=(X.shape[1],)) ## encoding part encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer) encoded = Dense(50, activation='relu')(encoded) ## decoding part decoded = Dense(50, activation='tanh')(encoded) decoded = Dense(100, activation='tanh')(decoded) ## output layer output_layer = Dense(X.shape[1], activation='relu')(decoded) # In[ ]: model.save('model.h5') # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras import models # In[ ]: from tensorflow.keras.models import Model, load_model # In[ ]: autoencoder = Model(input_layer, output_layer) autoencoder.compile(optimizer="adadelta", loss="mse") # In[ ]: x = data.drop(["Class"], axis=1) y = data["Class"].values x_scale = MinMaxScaler().fit_transform(x.values) x_norm, x_fraud = x_scale[y == 0], x_scale[y == 1] autoencoder.fit(x_norm[0:2000], x_norm[0:2000], batch_size = 256, epochs = 10, shuffle = True, validation_split = 0.20); # In[ ]: hidden_representation = Sequential() hidden_representation.add(autoencoder.layers[0]) hidden_representation.add(autoencoder.layers[1]) hidden_representation.add(autoencoder.layers[2]) # In[ ]: norm_hid_rep = hidden_representation.predict(x_norm[:3000]) fraud_hid_rep = hidden_representation.predict(x_fraud) # In[ ]: rep_x = np.append(norm_hid_rep, fraud_hid_rep, axis = 0) y_n = np.zeros(norm_hid_rep.shape[0]) y_f = np.ones(fraud_hid_rep.shape[0]) rep_y = np.append(y_n, y_f) # In[ ]: train_x, val_x, train_y, val_y = train_test_split(rep_x, rep_y, test_size=0.25) # In[ ]: clf = LogisticRegression(solver="lbfgs").fit(train_x, train_y) pred_y = clf.predict(val_x) print ("") print ("Classification Report: ") print (classification_report(val_y, pred_y)) print ("") print ("Accuracy Score: ", accuracy_score(val_y, pred_y))