-
Notifications
You must be signed in to change notification settings - Fork 0
/
regression_project.py
105 lines (75 loc) · 2.35 KB
/
regression_project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
"""Regression_Project.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1BQOKR_LzwpHEUoc5UbVu0ywmbJg8RDsP
"""
#IMPORTING DEPENDENCIES
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_boston
# UNDERSTANDING THE DATASET
boston = load_boston()
print(boston.DESCR)
# ACCESS DATA ATTRIBUTES
dataset = boston.data
for name,index in enumerate(boston.feature_names):
print(index,name)
# RESHAPING THE SIZE
data= dataset[:,12].reshape(-1,1)
# SHAPE OF THE DATA
np.shape(dataset)
# TARGET VALUES
target = boston.target.reshape(-1,1)
# SHAPE OF THE TARGET
np.shape(target)
# Commented out IPython magic to ensure Python compatibility.
# ENSURING THAT MATPLOTLIB IS WORKING INSIDE THE NOTEBOOK
# %matplotlib inline
plt.scatter(data,target,color='green')
plt.xlabel('Lower Income Population')
plt.ylabel('Cost of House')
plt.show()
# LINEAR REGRESSION
from sklearn.linear_model import LinearRegression #Lasso,Ridge
# CREATING A REGRESSION MODEL
reg= LinearRegression()
# FIT THE MODEL
reg.fit(data,target)
# PREDICTION
pred= reg.predict(data)
# Commented out IPython magic to ensure Python compatibility.
# ENSURING THAT MATPLOTLIB IS WORKING INSIDE THE NOTEBOOK
# %matplotlib inline
plt.scatter(data,target,color='red')
plt.xlabel('Lower Income Population')
plt.ylabel('Cost of House')
plt.show()
# Commented out IPython magic to ensure Python compatibility.
# ENSURING THAT MATPLOTLIB IS WORKING INSIDE THE NOTEBOOK
# %matplotlib inline
plt.scatter(data,target,color='red')
plt.plot(data,pred,color='green')
plt.xlabel('Lower Income Population')
plt.ylabel('Cost of House')
plt.show()
# CIRCUMVENTING CURVE ISSUE USING POLYNOMIAL MODEL
from sklearn.preprocessing import PolynomialFeatures
# TO ALLOW MERGING OF MODELS
from sklearn.pipeline import make_pipeline
model= make_pipeline(PolynomialFeatures(4),reg)
model.fit(data,target)
pred= model.predict(data)
# Commented out IPython magic to ensure Python compatibility.
# ENSURING THAT MATPLOTLIB IS WORKING INSIDE THE NOTEBOOK
# %matplotlib inline
plt.scatter(data,target,color='red')
plt.plot(data,pred,color='green')
plt.xlabel('Lower Income Population')
plt.ylabel('Cost of House')
plt.show()
# r_2 metric
from sklearn.metrics import r2_score
# PREDICT
r2_score(pred,target)