import numpy as np
import pandas as pd


              
                def answer_one():
    
    # Your code here
    df = pd.read_csv(r'fraud_data.csv')
    fraud_percent = len(df[df['Class']==1])/len(df['Class'])
    
    return fraud_percent # Return your answer


              
                # Use X_train, X_test, y_train, y_test for all of the following questions
from sklearn.model_selection import train_test_split

df = pd.read_csv('fraud_data.csv')

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)


              
                def answer_two():
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import recall_score
    from sklearn.metrics import accuracy_score
    
    # Your code here
    dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
    y_pred = dummy_majority.predict(X_test)
    accuracy = accuracy_score(y_test,y_pred)
    recall = recall_score(y_test,y_pred)
    return (accuracy,recall)# Return your answer


              
                def answer_three():
    from sklearn.metrics import recall_score, precision_score, accuracy_score
    from sklearn.svm import SVC

    # Your code here
    svm = SVC().fit(X_train,y_train)
    y_pred = svm.predict(X_test)
    
    accuracy = accuracy_score(y_test,y_pred)
    recall = recall_score(y_test,y_pred)
    precision = precision_score(y_test,y_pred)
    
    return (accuracy, recall, precision)# Return your answer


              
                def answer_four():
    from sklearn.metrics import confusion_matrix
    from sklearn.svm import SVC

    # Your code here
    svm = SVC(C=1e9 , gamma = 1e-07).fit(X_train,y_train)
    
    y_pred = svm.decision_function(X_test)
    y_pred_new = np.zeros_like(y_pred)
    y_pred_new[np.where(y_pred >= -220)] =1
    confusion = confusion_matrix(y_test,y_pred_new)
    return confusion# Return your answer


              
                def answer_five():
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import roc_curve     
       
    # Your code here
    
    model = LogisticRegression().fit(X_train,y_train)
    y_pred = model.decision_function(X_test)
    precision,recall,threshold = precision_recall_curve(y_test,y_pred)
    fpr,tpr,_ = roc_curve(y_test,y_pred)
    recall_value = float(recall[np.where(precision==0.75)])
    tpr_value = float(tpr[np.where((fpr >= 0.159) & (fpr <= 0.16))][0])
    
    return recall_value,tpr_value # Return your answer


              
                def answer_six():    
    from sklearn.model_selection import GridSearchCV
    from sklearn.linear_model import LogisticRegression
    params = {'C':[0.01, 0.1, 1, 10, 100],'penalty': ['l1', 'l2']}
    # Your code here
    model = LogisticRegression()
    model_lr = GridSearchCV(model, params, scoring ="recall", cv =3)
    model_lr.fit(X_train,y_train)
    output = model_lr.cv_results_["mean_test_score"].reshape(-1,2)
    return output# Return your answer


              
                # Use the following function to help visualize results from the grid search
def GridSearch_Heatmap(scores):
    %matplotlib notebook
    import seaborn as sns
    import matplotlib.pyplot as plt
    plt.figure()
    sns.heatmap(scores.reshape(5,2), xticklabels=['l1','l2'], yticklabels=[0.01, 0.1, 1, 10, 100])
    plt.yticks(rotation=0);

#GridSearch_Heatmap(answer_six())

Assignment 3 - Evaluation¶

Question 1¶

Question 2¶

Question 3¶

Question 4¶

Question 5¶

Question 6¶