Project 8 Predictive Analytics - Ipynb - Colaboratory
Project 8 Predictive Analytics - Ipynb - Colaboratory
Project 8 Predictive Analytics - Ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('https://github.com/YBI-Foundation/Dataset/raw/main/MPG.csv')
df.head()
Saved successfully!
df.nunique()
mpg 129
cylinders 5
displacement 82
horsepower 93
weight 351
acceleration 95
model_year 13
origin 3
name 305
dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 mpg 398 non-null float64
1 cylinders 398 non-null int64
2 displacement 398 non-null float64
3 horsepower 392 non-null float64
4 weight 398 non-null int64
5 acceleration 398 non-null float64
6 model_year 398 non-null int64
7 origin 398 non-null object
8 name 398 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB
df.describe()
Saved successfully!
df.corr()
df= df.dropna()
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 0 to 397
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 mpg 392 non-null float64
1 cylinders 392 non-null int64
2 displacement 392 non-null float64
3 horsepower 392 non-null float64
4 weight 392 non-null int64
5 acceleration 392 non-null float64
6 model_year 392 non-null int64
7 origin 392 non-null object
8 name 392 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 30.6+ KB
sns.pairplot(df,x_vars=['displacement','horsepower','weight','acceleration','mpg'])
<seaborn.axisgrid.PairGrid at 0x7f2fcdd60190>
Saved successfully!
y = df['mpg']
y.shape
(392,)
X = df[['displacement','horsepower','weight','acceleration']]
X.shape
(392, 4)
X
Saved successfully!
displacement horsepower weight acceleration
ss = StandardScaler()
X = ss.fit_transform(X)
pd.DataFrame(X).describe()
0 1 2 3
lr = LinearRegression()
lr.fit(X_train, y_train)
LinearRegression()
lr.intercept_
23.485738559737584
lr.coef_
y_pred = lr.predict(X_test)
y_pred
3.3286968643244106
mean_absolute_percentage_error(y_test, y_pred)
0.14713035779536746
r2_score(y_test, y_pred)
0.7031250746717692
X_train2 = poly.fit_transform(X_train)
X_test2 = poly.fit_transform(X_test)
lr.fit(X_train2, y_train)
LinearRegression()
lr.intercept_
Saved successfully!
21.27336450063766
lr.coef_
y_pred_poly = lr.predict(X_test2)
mean_absolute_error(y_test, y_pred_poly)
2.7887147720295977
mean_absolute_percentage_error(y_test, y_pred_poly)
0.1207401834293869
r2_score(y_test, y_pred_poly)
0.7461731314563803
Saved successfully!