Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
108 views
ubuntu2004
Kernel: Python 3 (system-wide)
import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Step 1: Load the data

df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

Step 2: Analyze the data

Descriptive statistics

df.info()
df.describe()
df.isnull().sum()

Data visualization

sns.pairplot(df, hue='DEATH_EVENT', palette="inferno", corner=True) plt.show()
Image in a Jupyter notebook
# Correlation matrix plt.figure(figsize = (12,10)) sns.heatmap(df.corr(), annot=True) plt.show()
Image in a Jupyter notebook

Step 3: Prepare the data

X = df.drop(['DEATH_EVENT'], axis=1) y = df['DEATH_EVENT']
# Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Step 4: Build the models

Logistic regression

log_reg = LogisticRegression() log_reg.fit(X_train, y_train) y_pred = log_reg.predict(X_test) print('Logistic Regression Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))

KNN model

knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) print('KNN Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))

Decision Tree model

dtc = DecisionTreeClassifier(max_depth=3) dtc.fit(X_train, y_train) y_pred = dtc.predict(X_test) print('Decision Tree Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))