Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python profiler results by 123 ( 6 years ago )
import numpy as np
from sklearn.metrics import mean_squared_error
def linearRegression(x, y):
X = np.matrix(x)
Y = np.matrix(y)
Wlin = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), Y)
# Wlin = np.linalg.inv(X.T*X)*X.T*Y
return Wlin
# Get data
filename = 'hw3_train.dat.txt'
filename_test = 'hw3_test.dat.txt'
with open(filename) as f:
input_data = np.array([i for i in [line.strip('\n').split('\t')[:] for line in f] if i != ['']], dtype = float)
with open(filename_test) as t:
test_data = np.array([i for i in [line.strip('\n').split('\t')[:] for line in t] if i != ['']], dtype = float)
# Train
Sample = len(input_data) # 1000
Feature = len(input_data[0]) - 1 # 10
X_0 = np.ones((Sample, 1))
train_data = np.concatenate((X_0, input_data), axis=1)
# print(input_data.shape)
# print(train_data.shape)
# print(train_data)
X = np.zeros((Sample, Feature+1))
Y = np.zeros((Sample, 1))
for i in range(Sample) :
for j in range(Feature+1) :
X[i][j] = train_data[i][j]
Y[i][0] = train_data[i][11]
Wlin = linearRegression(X, Y)
# print(Wlin.shape)
# print(Wlin)
# Test
test_Sample = len(test_data) # 3000
test_Feature = len(test_data[0]) - 1 # 10
t_X_0 = np.ones((test_Sample, 1))
t_data = np.concatenate((t_X_0, test_data), axis=1)
# print(t_data.shape)
# print(t_data)
t_X = np.zeros((test_Sample, test_Feature+1))
t_Y = np.zeros((test_Sample, 1))
for i in range(test_Sample) :
for j in range(test_Feature+1) :
t_X[i][j] = t_data[i][j]
t_Y[i][0] = t_data[i][11]
Y_hat = np.dot(t_X, Wlin)
Error = mean_squared_error(t_Y, Y_hat)
print('Error = {}'.format(Error))
# sample = len(input_data)
# feature = len(input_data[0])
# X = np.zeros((sample, feature-1))
# Y = np.zeros((sample, 1))
# for i in range(sample) :
# for j in range(feature-1) :
# X[i][j] = input_data[i][j]
# Y[i][0] = input_data[i][10]
# X_0 = np.ones((sample, 1))
# # print(X.shape)
# X = np.concatenate((X_0, X), axis=1)
# Wlin = linearRegression(X, Y)
# # print(X.shape)
# # print(Y.shape)
# # print(Wlin.shape)
# # print(Wlin)
# # Test
# test_sample = len(test_data) # 1000
# test_feature = len(test_data[0]) # 11
# X_test = np.zeros((test_sample, test_feature - 1))
# Y_test = np.zeros((test_sample, 1))
# for i in range(test_sample) :
# for j in range(test_feature - 1) :
# X_test[i][j] = test_data[i][j]
# Y_test[i][0] = test_data[i][10]
# X_0 = np.ones((test_sample, 1))
# # print(X_test.shape)
# print(X_test)
# X_test = np.concatenate((X_0, X_test), axis=1)
# print(X_test)
# # print(X_test.shape)
# # print(Y_test.shape)
# # print(Wlin.shape)
# Y_hat = np.dot(X_test, Wlin)
# # print(Y_hat.shape)
# # Error = np.square(np.subtract(Y_hat, Y_test)).mean()
# Error = mean_squared_error(Y_test, Y_hat)
# print('Error = {}'.format(Error))
Revise this Paste