Nothing Special   »   [go: up one dir, main page]

TITANIC EJERCICIO - Ipynb - Colab

Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

19/6/24, 10:37 TITANIC EJERCICIO.

ipynb - Colab

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import seaborn as sns
from matplotlib.colors import ListedColormap

# Cargar el dataset
dataset = pd.read_csv('titanic.csv')

# Mostrar las primeras filas del dataset


dataset.head(10)

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

Braund,
0 1 0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.2500 NaN S
Harris

Cumings,
Mrs. John
1 2 1 1 Bradley female 38.0 1 0 PC 17599 71.2833 C85 C
(Florence
Briggs Th...

Heikkinen, STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
Miss. Laina 3101282

Futrelle,
Mrs.
3 4 1 1 Jacques female 35.0 1 0 113803 53.1000 C123 S
Heath (Lily
May Peel)

# Preprocesamiento de los datos


dataset = dataset.drop(['Name', 'Ticket', 'Cabin'], axis=1)
dataset = dataset.dropna()
dataset = pd.get_dummies(dataset, columns=['Sex', 'Embarked'], drop_first=True)

# Separar características y variable objetivo


X = dataset.drop(['Survived'], axis=1).values
y = dataset['Survived'].values

# Dividir en conjunto de entrenamiento y prueba


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Estandarizar los datos


sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

pca = PCA(n_components=2) # Reducimos a 2 componentes para visualización


X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_
print("Varianza explicada por cada componente principal:", explained_variance)
print("Varianza total explicada:", explained_variance.sum())

Varianza explicada por cada componente principal: [0.21438928 0.19939672]


Varianza total explicada: 0.4137860001368372

classifier = LogisticRegression(random_state=0)
classifier.fit(X_train_pca, y_train)

▾ LogisticRegression
LogisticRegression(random_state=0)

y_pred = classifier.predict(X_test_pca)
cm = confusion_matrix(y_test, y_pred)
print("Matriz de confusión:\n", cm)

Matriz de confusión:
[[71 11]
[31 30]]

# Visualizar el conjunto de entrenamiento


https://colab.research.google.com/drive/1uDMt-3vec1LUAVAFQgGliskRYoJ0hl3T#scrollTo=zXLzvwog3Ehd 1/3
19/6/24, 10:37 TITANIC EJERCICIO.ipynb - Colab
# Visualizar el conjunto de entrenamiento
X_set, y_set = X_train_pca, y_train
X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),


alpha=0.75, cmap=ListedColormap(('white', 'blue')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=ListedColormap(('red', 'orange'))(i), label=j)
plt.title('Clasificador (Conjunto de Entrenamiento)')
plt.xlabel('CP1')
plt.ylabel('CP2')
plt.legend()
plt.show()

# Visualizar el conjunto de prueba


X_set, y_set = X_test_pca, y_test
X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))

plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),


alpha=0.75, cmap=ListedColormap(('yellow', 'purple')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=ListedColormap(('red', 'green'))(i), label=j)
plt.title('Clasificador (Conjunto de Prueba)')
plt.xlabel('CP1')
plt.ylabel('CP2')
plt.legend()
plt.show()

<ipython-input-23-ec96e7272e76>:11: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have prec
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],

<ipython-input-23-ec96e7272e76>:29: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have prec
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],

https://colab.research.google.com/drive/1uDMt-3vec1LUAVAFQgGliskRYoJ0hl3T#scrollTo=zXLzvwog3Ehd 2/3
19/6/24, 10:37 TITANIC EJERCICIO.ipynb - Colab

https://colab.research.google.com/drive/1uDMt-3vec1LUAVAFQgGliskRYoJ0hl3T#scrollTo=zXLzvwog3Ehd 3/3

You might also like