Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
146 views
ubuntu2004
Kernel: Python 3 (system-wide)
import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Step 1: Load the data

df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) /tmp/ipykernel_2446/3368122559.py in <cell line: 1>() ----> 1 df = pd.read_csv('heart_failure_clinical_records_dataset.csv') /usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(*args, **kwargs) 212 213 return cast(F, wrapper) /usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(*args, **kwargs) 332 333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no /usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options) 948 kwds.update(kwds_defaults) 949 --> 950 return _read(filepath_or_buffer, kwds) 951 952 /usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds) 603 604 # Create the parser. --> 605 parser = TextFileReader(filepath_or_buffer, **kwds) 606 607 if chunksize or iterator: /usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds) 1440 1441 self.handles: IOHandles | None = None -> 1442 self._engine = self._make_engine(f, self.engine) 1443 1444 def close(self) -> None: /usr/local/lib/python3.8/dist-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine) 1733 if "b" not in mode: 1734 mode += "b" -> 1735 self.handles = get_handle( 1736 f, 1737 mode, /usr/local/lib/python3.8/dist-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) 854 if ioargs.encoding and "b" not in ioargs.mode: 855 # Encoding --> 856 handle = open( 857 handle, 858 ioargs.mode, FileNotFoundError: [Errno 2] No such file or directory: 'heart_failure_clinical_records_dataset.csv'
cd
/home/user

Step 2: Analyze the data

Descriptive statistics

df.info()
df.describe()
df.isnull().sum()

Data visualization

sns.pairplot(df, hue='DEATH_EVENT', palette="inferno", corner=True) plt.show()
# Correlation matrix plt.figure(figsize = (12,10)) sns.heatmap(df.corr(), annot=True) plt.show()
Image in a Jupyter notebook

Step 3: Prepare the data

X = df.drop(['DEATH_EVENT'], axis=1) y = df['DEATH_EVENT']
# Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 4: Build the models # Logistic regression log_reg = LogisticRegression() log_reg.fit(X_train, y_train) y_pred = log_reg.predict(X_test) print('Logistic Regression Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))
# KNN model knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) print('KNN Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))
# Decision Tree model dtc = DecisionTreeClassifier(max_depth=3) dtc.fit(X_train, y_train) y_pred = dtc.predict(X_test) print('Decision Tree Accuracy: {:.2f}%'.format(accuracy_score(y_test, y_pred) * 100)) print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred)) print('Classification Report:\n', classification_report(y_test, y_pred))