- 2nd Jun 2022
- 06:03 am

# Importing the dependencies import warnings import matplotlib.pyplot as plt import numpy as np from sklearn import preprocessing, datasets from sklearn.model_selection import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score from sklearn.metrics import roc_curve, roc_auc_score from sklearn.model_selection import train_test_split warnings.filterwarnings("ignore") #Loading the data set into data and target variables X,Y = datasets.load_iris(return_X_y=True) # training sets all the features as columns except the last column # target values as last column of the data frame # 10 different sets of features considering 2 and 3 columns at a time as mentioned in question X1 = X[:,[0,1]] X2 = X[:, [0,2]] X3 = X[:, [0,3]] X4 = X[:, [1,2]] X5 = X[:, [1,3]] X6 = X[:, [2,3]] X7 = X[:, [0,1,2]] X8 = X[:, [0,1,3]] X9 = X[:, [0,2,3]] X10 = X[:, [1,2,3]] # data split into 80% for training and 20% for testing x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y, test_size=0.2, random_state=42) x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y, test_size=0.2, random_state=42) x_train3, x_test3, y_train3, y_test3 = train_test_split(X3, Y, test_size=0.2, random_state=42) x_train4, x_test4, y_train4, y_test4 = train_test_split(X4, Y, test_size=0.2, random_state=42) x_train5, x_test5, y_train5, y_test5 = train_test_split(X5, Y, test_size=0.2, random_state=42) x_train6, x_test6, y_train6, y_test6 = train_test_split(X6, Y, test_size=0.2, random_state=42) x_train7, x_test7, y_train7, y_test7 = train_test_split(X7, Y, test_size=0.2, random_state=42) x_train8, x_test8, y_train8, y_test8 = train_test_split(X8, Y, test_size=0.2, random_state=42) x_train9, x_test9, y_train9, y_test9 = train_test_split(X9, Y, test_size=0.2, random_state=42) x_train10, x_test10, y_train10, y_test10 = train_test_split(X10, Y, test_size=0.2, random_state=42) #Training the model logistic= LogisticRegression() # Create grid search using 5-fold cross validation param_grid = [ {'penalty' : ['l1', 'l2'], 'C' : [10,1,.1,.001], 'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'], 'max_iter' : [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 250, 500, 1000] } ] clf = GridSearchCV(logistic, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1) lst=[] X_train = [x_train, x_train1, x_train2, x_train3, x_train4, x_train5, x_train6, x_train7, x_train8, x_train9, x_train10] Y_train = [y_train, y_train1, y_train2, y_train3, y_train4, y_train5, y_train6, y_train7, y_train8, y_train9, y_train10] X_test = [x_test, x_test1, x_test2, x_test3, x_test4, x_test5, x_test6, x_test7, x_test8, x_test9, x_test10] Y_test = [y_test, y_test1, y_test2, y_test3, y_test4, y_test5, y_test6, y_test7, y_test8, y_test9, y_test10 ] for i in range(0,11): scaler = preprocessing.StandardScaler().fit(X_train[i]) #scaling data scaler.transform(X_train[i]) scaler.transform(X_test[i]) best_clf = clf.fit(X_train[i],Y_train[i]) #finding best model for the data by grid search lst1=[] lst1.append(best_clf.score(X_train[i],Y_train[i])) #storing the result for each case into list lst1.append(best_clf.best_estimator_) lst.append(lst1) for i in range(0,11): #printing best accuracy and other important results for 11 cases print("for case: ",i) for j in range(0,2): print(lst[i][j]) print("\n") C = [10, 1, .1, .001] # a list of C values lstl1=[] lstl2=[] for i in range(0,11): scaler = preprocessing.StandardScaler().fit(X_train[i]) scaler.transform(X_train[i]) scaler.transform(X_test[i]) lst_l1=[] lst_l2=[] for c in C: model_l1 = LogisticRegression(penalty='l1',C=c, solver='liblinear') # Applying l1 regularization for all c values model_l1.fit(X_train[i],Y_train[i]) lst_l1.append(model_l1.score(X_test[i],Y_test[i])) lstl1.append(lst_l1) # storing the accuracy for all c values of each case model_l2 = LogisticRegression(C=c, solver='liblinear') # Applying l2 regularization for all c values model_l2.fit(X_train[i],Y_train[i]) lst_l2.append(model_l2.score(X_test[i],Y_test[i])) lstl2.append(lst_l2) # storing the accuracy for all c values of each case for i in range(0,11): print("for case: ",i) print("\n") for j in range(0,4): # printing accuracy values for analysis print(lstl1[i][j]) print("\n") for j in range(0,4): print(lstl2[i][j]) print("\n")