From e103ba7c848669d6bf7e494ead1f8810c60d785b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D1=91=D0=BD=D0=B0=20=D0=9F=D0=B8=D1=81=D0=BA?= =?UTF-8?q?=D1=83=D0=BD?= Date: Thu, 15 May 2025 19:51:41 +0000 Subject: [PATCH] =?UTF-8?q?=D0=A1=D0=BE=D0=B7=D0=B4=D0=B0=D0=B9=D1=82?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B9=20Notebook?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- week2_analysis.ipynb | 427 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100644 week2_analysis.ipynb diff --git a/week2_analysis.ipynb b/week2_analysis.ipynb new file mode 100644 index 0000000..654670a --- /dev/null +++ b/week2_analysis.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 37, + "id": "3347bb59-0e96-4dc9-9be6-fb447147d50e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первые 5 строк данных:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "from tqdm import tqdm \n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "df = pd.read_csv(\"train.csv\")\n", + "print(\"Первые 5 строк данных:\")\n", + "display(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "03484d47-e142-43ff-94a3-5b40e2a308e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Информация о датасете:\n", + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 PassengerId 891 non-null int64 \n", + " 1 Survived 891 non-null int64 \n", + " 2 Pclass 891 non-null int64 \n", + " 3 Name 891 non-null object \n", + " 4 Sex 891 non-null object \n", + " 5 Age 714 non-null float64\n", + " 6 SibSp 891 non-null int64 \n", + " 7 Parch 891 non-null int64 \n", + " 8 Ticket 891 non-null object \n", + " 9 Fare 891 non-null float64\n", + " 10 Cabin 204 non-null object \n", + " 11 Embarked 889 non-null object \n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.7+ KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Описательная статистика:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassAgeSibSpParchFare
count891.000000891.000000891.000000714.000000891.000000891.000000891.000000
mean446.0000000.3838382.30864229.6991180.5230080.38159432.204208
std257.3538420.4865920.83607114.5264971.1027430.80605749.693429
min1.0000000.0000001.0000000.4200000.0000000.0000000.000000
25%223.5000000.0000002.00000020.1250000.0000000.0000007.910400
50%446.0000000.0000003.00000028.0000000.0000000.00000014.454200
75%668.5000001.0000003.00000038.0000001.0000000.00000031.000000
max891.0000001.0000003.00000080.0000008.0000006.000000512.329200
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Age SibSp \\\n", + "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n", + "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n", + "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n", + "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n", + "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n", + "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n", + "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n", + "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n", + "\n", + " Parch Fare \n", + "count 891.000000 891.000000 \n", + "mean 0.381594 32.204208 \n", + "std 0.806057 49.693429 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 7.910400 \n", + "50% 0.000000 14.454200 \n", + "75% 0.000000 31.000000 \n", + "max 6.000000 512.329200 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Пропущенные значения:\n" + ] + }, + { + "data": { + "text/plain": [ + "PassengerId 0\n", + "Survived 0\n", + "Pclass 0\n", + "Name 0\n", + "Sex 0\n", + "Age 177\n", + "SibSp 0\n", + "Parch 0\n", + "Ticket 0\n", + "Fare 0\n", + "Cabin 687\n", + "Embarked 2\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"\\nИнформация о датасете:\")\n", + "display(df.info())\n", + "print(\"\\nОписательная статистика:\")\n", + "display(df.describe())\n", + "print(\"\\nПропущенные значения:\")\n", + "display(df.isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f71e6a06-5a32-4b41-a043-b03b3f7e4a56", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}