avatar
Untitled

Guest 34 4th Dec, 2019

PYTHON 3.01 KB
                                           
                         """
This file contains code for lab 5b. 

Simply call the app to run it.

'python main.py'

Kevin Tran 000375580, Mohawk College, 2019

It looks like Linear Regression had better results than KNN.
LR frequently achieves a higher correlation and a lower RSS value

This could be because the data I used has a more linear shape.
Also DTs are more classifier based rather than being numerically driven.

I had to change the target values to integers rather than floats (3.333 -> 3333),
since DTs only accept integer values as targets. 

Kevin Tran, Mohawk College, 2019"

"""

import numpy as np
import csv
from sklearn import tree
from sklearn.linear_model import LinearRegression
import random
import sys
import csv
import matplotlib.pyplot as plt

def main():

    print("Welcome to Lab 5b\n")
    print("Kevin Tran 000375580\n")

    ## Load the data and compute a random train/test split
    data=[]
    targets=[]
    with open("fish2.csv") as file:
        for line in file:
            row = line.strip().split(",")
            frow = []
            for item in row:
                frow += [float(item)]
            data+=[frow[:-1]]
            targets+=[int(frow[-1] * 1000)]
    
    
    data = np.array(data)
    targets = np.array(targets)

    indexes = np.random.permutation(len(data))
    split = round(len(data)*0.25)

    testdata=data[indexes[:split]]
    testtargets=targets[indexes[:split]]
    traindata=data[indexes[split:]]
    traintargets=targets[indexes[split:]]


    ## Print the Size of Test and Training Data
    print("Dataset -> QSAR fish toxicity Data Set\n")
    print("Training Size: ", len(traindata))
    print("Testing Size: ", len(testdata))
    print("# of features: ", len(traindata[0]))

    print("----------------------------------------\n")
    print("Linear Regression")

    ## Create and train linear regressor
    rgr = LinearRegression(normalize=True,fit_intercept=True)
    rgr.fit(traindata, traintargets)

    ## Print the coefficients and intercepts of the model
    print("Coefficients:",rgr.coef_)
    print("Intercept:",rgr.intercept_)

    ## Print Correlation and RSS
    pred = rgr.predict(testdata)
    print("Correlation (r):",np.corrcoef(pred,testtargets)[0,1])
    print("Residual Sum of Squares:",((pred-testtargets)**2).sum())

    print("Decision Tree")


    ## Create DT classifier
    dt = tree.DecisionTreeClassifier()

    dt = dt.fit(traindata, traintargets)
    dtpred = dt.predict(testdata)

    ## Print Correlation and RSS
    print("Correlation (r):",np.corrcoef(dtpred,testtargets)[0,1])
    print("Residual Sum of Squares:",((dtpred-testtargets)**2).sum())


# Call the app
if __name__ == "__main__":
    main()
                      
                                       
To share this paste please copy this url and send to your friends
RAW Paste Data
Recent Pastes