¹⁴⁶ views
ubuntu2004

Kernel: Python 3 (system-wide)

In [1]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Step 1: Load the data

In [3]:

df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

Out[3]:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_2446/3368122559.py in <cell line: 1>()
----> 1 df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

/usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    209                 else:
    210                     kwargs[new_arg_name] = new_arg_value
--> 211             return func(*args, **kwargs)
    212 
    213         return cast(F, wrapper)
/usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    329                     stacklevel=find_stack_level(),
    330                 )
--> 331             return func(*args, **kwargs)
    332 
    333         # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
/usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    948     kwds.update(kwds_defaults)
    949 
--> 950     return _read(filepath_or_buffer, kwds)
    951 
    952 
/usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    603 
    604     # Create the parser.
--> 605     parser = TextFileReader(filepath_or_buffer, **kwds)
    606 
    607     if chunksize or iterator:
/usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
   1440 
   1441         self.handles: IOHandles | None = None
-> 1442         self._engine = self._make_engine(f, self.engine)
   1443 
   1444     def close(self) -> None:
/usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine)
   1733                 if "b" not in mode:
   1734                     mode += "b"
-> 1735             self.handles = get_handle(
   1736                 f,
   1737                 mode,
/usr/local/lib/python3.8/dist-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    854         if ioargs.encoding and "b" not in ioargs.mode:
    855             # Encoding
--> 856             handle = open(
    857                 handle,
    858                 ioargs.mode,
FileNotFoundError: [Errno 2] No such file or directory: 'heart_failure_clinical_records_dataset.csv'

In [4]:

cd

Out[4]:

/home/user

Step 2: Analyze the data

Descriptive statistics

In [3]:

df.info()

In [0]:

df.describe()

In [5]:

df.isnull().sum()

Data visualization

In [0]:

sns.pairplot(df, hue='DEATH_EVENT', palette="inferno", corner=True)
plt.show()

In [12]:

# Correlation matrix
plt.figure(figsize = (12,10))
sns.heatmap(df.corr(), annot=True)
plt.show()

Out[12]:

Step 3: Prepare the data

In [0]:

X = df.drop(['DEATH_EVENT'], axis=1)
y = df['DEATH_EVENT']

In [0]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [0]:

# Step 4: Build the models
# Logistic regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
print('Logistic Regression Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

In [0]:

# KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print('KNN Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

In [0]:

# Decision Tree model
dtc = DecisionTreeClassifier(max_depth=3)
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)
print('Decision Tree Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

Step 1: Load the data

Step 2: Analyze the data

Descriptive statistics

Data visualization

Step 3: Prepare the data

Product

Resources

Company