391 lines
12 KiB
Plaintext
391 lines
12 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "5da9c564-63b2-42ab-9dfc-036b8ab8eb96",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Survived</th>\n",
|
||
" <th>Pclass</th>\n",
|
||
" <th>Name</th>\n",
|
||
" <th>Sex</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>SibSp</th>\n",
|
||
" <th>Parch</th>\n",
|
||
" <th>Ticket</th>\n",
|
||
" <th>Fare</th>\n",
|
||
" <th>Cabin</th>\n",
|
||
" <th>Embarked</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>PassengerId</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Braund, Mr. Owen Harris</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>A/5 21171</td>\n",
|
||
" <td>7.2500</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>S</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>38.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>PC 17599</td>\n",
|
||
" <td>71.2833</td>\n",
|
||
" <td>C85</td>\n",
|
||
" <td>C</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Heikkinen, Miss. Laina</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>STON/O2. 3101282</td>\n",
|
||
" <td>7.9250</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>S</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>35.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>113803</td>\n",
|
||
" <td>53.1000</td>\n",
|
||
" <td>C123</td>\n",
|
||
" <td>S</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Allen, Mr. William Henry</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>35.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>373450</td>\n",
|
||
" <td>8.0500</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>S</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Survived Pclass \\\n",
|
||
"PassengerId \n",
|
||
"1 0 3 \n",
|
||
"2 1 1 \n",
|
||
"3 1 3 \n",
|
||
"4 1 1 \n",
|
||
"5 0 3 \n",
|
||
"\n",
|
||
" Name Sex Age \\\n",
|
||
"PassengerId \n",
|
||
"1 Braund, Mr. Owen Harris male 22.0 \n",
|
||
"2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n",
|
||
"3 Heikkinen, Miss. Laina female 26.0 \n",
|
||
"4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n",
|
||
"5 Allen, Mr. William Henry male 35.0 \n",
|
||
"\n",
|
||
" SibSp Parch Ticket Fare Cabin Embarked \n",
|
||
"PassengerId \n",
|
||
"1 1 0 A/5 21171 7.2500 NaN S \n",
|
||
"2 1 0 PC 17599 71.2833 C85 C \n",
|
||
"3 0 0 STON/O2. 3101282 7.9250 NaN S \n",
|
||
"4 1 0 113803 53.1000 C123 S \n",
|
||
"5 0 0 373450 8.0500 NaN S "
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas\n",
|
||
"data = pandas.read_csv('titanic.csv', index_col='PassengerId')\n",
|
||
"data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9e165b4e-12c3-41a2-b770-23fae8e7b9b5",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 1. Количество мужчин и женщин на корабле:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "9ad76526-673a-4c80-8656-6328ea3e2b6d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Количество мужчин - 577\n",
|
||
"Количество женщин - 314\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"gender_counts = data['Sex'].value_counts()\n",
|
||
"\n",
|
||
"print(f'Количество мужчин - {gender_counts.male}')\n",
|
||
"print(f'Количество женщин - {gender_counts.female}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "daea4619-0011-4219-998d-ce5ce13bffcf",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 2. Доля выживших пассажиров:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "5bd61c4c-4c75-4772-a2be-72fe2f23fcc8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Доля выживших - 0.3838383838383838\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"survived_ratio = data['Survived'].mean()\n",
|
||
"\n",
|
||
"print(f'Доля выживших - {survived_ratio}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "240911e6-9dc7-4595-97a8-f78f98e43ec9",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 3. Доля пассажиров первого класса:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "d61d936c-5220-479e-a5a8-ba4e56dd1e2d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Доля пассажиров первого класса - 0.24242424242424243\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"first_class_ratio = (data['Pclass'] == 1).mean()\n",
|
||
"\n",
|
||
"print(f'Доля пассажиров первого класса - {first_class_ratio}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "62ec3de5-ae84-4354-b192-b173732691ad",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 4. Возраст пассажиров. Среднее и медиана возраста пассажиров:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "ccf95bd7-1e31-4076-b7f1-702ca35078a7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Среднее возраста пассажиров - 29.69911764705882\n",
|
||
"Медиана возраста пассажиров - 28.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"mean_age = data['Age'].mean()\n",
|
||
"median_age = data['Age'].median()\n",
|
||
"\n",
|
||
"print(f'Среднее возраста пассажиров - {mean_age}')\n",
|
||
"print(f'Медиана возраста пассажиров - {median_age}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "43451392-bacd-4258-9b75-5be81c226e97",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5. Корреляция Пирсона между признаками SibSp и Parch:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "cc932c9a-09c0-4592-a2d3-055c4bba7923",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Корреляция Пирсона между признаками SibSp и Parch - 0.4148376986201567\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"correlation_sibsp_parch = data['SibSp'].corr(data['Parch'])\n",
|
||
"\n",
|
||
"print(f'Корреляция Пирсона между признаками SibSp и Parch - {correlation_sibsp_parch}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "27d94998-fea3-476a-aa70-a97d36686d30",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 6. Самое популярное женское имя:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "e1e9e18d-e28d-42c1-86ac-794824d37ccb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"First_Name\n",
|
||
"Anna 15\n",
|
||
"Mary 14\n",
|
||
"Elizabeth 11\n",
|
||
"Margaret 10\n",
|
||
"Alice 6\n",
|
||
"Name: count, dtype: int64\n",
|
||
"\n",
|
||
"Самое популярное женское имя - Anna\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import re\n",
|
||
"\n",
|
||
"# Функция для извлечения имени\n",
|
||
"def extract_first_name(full_name):\n",
|
||
" match = re.search(r\"\\(([^)]+)\\)\", full_name)\n",
|
||
" if match:\n",
|
||
" first_name = match.group(1).split()[0]\n",
|
||
" else:\n",
|
||
" first_name = full_name.split(',')[1].split()[1]\n",
|
||
" return first_name\n",
|
||
"\n",
|
||
"# Копируем DataFrame перед изменением\n",
|
||
"female_passengers = data[data['Sex'] == 'female'].copy()\n",
|
||
"# Применяем функцию для извлечения имен\n",
|
||
"female_passengers['First_Name'] = female_passengers['Name'].apply(extract_first_name)\n",
|
||
"\n",
|
||
"\n",
|
||
"# Найдем самое популярное имя\n",
|
||
"popular_female_name = female_passengers['First_Name'].value_counts().idxmax()\n",
|
||
"print(female_passengers['First_Name'].value_counts().head())\n",
|
||
"print(f'\\nСамое популярное женское имя - {popular_female_name}')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|