Cognitive_technologies/лр6/lab6.ipynb

391 lines
12 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "5da9c564-63b2-42ab-9dfc-036b8ab8eb96",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PassengerId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass \\\n",
"PassengerId \n",
"1 0 3 \n",
"2 1 1 \n",
"3 1 3 \n",
"4 1 1 \n",
"5 0 3 \n",
"\n",
" Name Sex Age \\\n",
"PassengerId \n",
"1 Braund, Mr. Owen Harris male 22.0 \n",
"2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n",
"3 Heikkinen, Miss. Laina female 26.0 \n",
"4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n",
"5 Allen, Mr. William Henry male 35.0 \n",
"\n",
" SibSp Parch Ticket Fare Cabin Embarked \n",
"PassengerId \n",
"1 1 0 A/5 21171 7.2500 NaN S \n",
"2 1 0 PC 17599 71.2833 C85 C \n",
"3 0 0 STON/O2. 3101282 7.9250 NaN S \n",
"4 1 0 113803 53.1000 C123 S \n",
"5 0 0 373450 8.0500 NaN S "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas\n",
"data = pandas.read_csv('titanic.csv', index_col='PassengerId')\n",
"data.head()"
]
},
{
"cell_type": "markdown",
"id": "9e165b4e-12c3-41a2-b770-23fae8e7b9b5",
"metadata": {},
"source": [
"### 1. Количество мужчин и женщин на корабле:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "9ad76526-673a-4c80-8656-6328ea3e2b6d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество мужчин - 577\n",
"Количество женщин - 314\n"
]
}
],
"source": [
"gender_counts = data['Sex'].value_counts()\n",
"\n",
"print(f'Количество мужчин - {gender_counts.male}')\n",
"print(f'Количество женщин - {gender_counts.female}')"
]
},
{
"cell_type": "markdown",
"id": "daea4619-0011-4219-998d-ce5ce13bffcf",
"metadata": {},
"source": [
"### 2. Доля выживших пассажиров:"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "5bd61c4c-4c75-4772-a2be-72fe2f23fcc8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Доля выживших - 0.3838383838383838\n"
]
}
],
"source": [
"survived_ratio = data['Survived'].mean()\n",
"\n",
"print(f'Доля выживших - {survived_ratio}')"
]
},
{
"cell_type": "markdown",
"id": "240911e6-9dc7-4595-97a8-f78f98e43ec9",
"metadata": {},
"source": [
"### 3. Доля пассажиров первого класса:"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "d61d936c-5220-479e-a5a8-ba4e56dd1e2d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Доля пассажиров первого класса - 0.24242424242424243\n"
]
}
],
"source": [
"first_class_ratio = (data['Pclass'] == 1).mean()\n",
"\n",
"print(f'Доля пассажиров первого класса - {first_class_ratio}')"
]
},
{
"cell_type": "markdown",
"id": "62ec3de5-ae84-4354-b192-b173732691ad",
"metadata": {},
"source": [
"### 4. Возраст пассажиров. Среднее и медиана возраста пассажиров:"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "ccf95bd7-1e31-4076-b7f1-702ca35078a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Среднее возраста пассажиров - 29.69911764705882\n",
"Медиана возраста пассажиров - 28.0\n"
]
}
],
"source": [
"mean_age = data['Age'].mean()\n",
"median_age = data['Age'].median()\n",
"\n",
"print(f'Среднее возраста пассажиров - {mean_age}')\n",
"print(f'Медиана возраста пассажиров - {median_age}')"
]
},
{
"cell_type": "markdown",
"id": "43451392-bacd-4258-9b75-5be81c226e97",
"metadata": {},
"source": [
"### 5. Корреляция Пирсона между признаками SibSp и Parch:"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "cc932c9a-09c0-4592-a2d3-055c4bba7923",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Корреляция Пирсона между признаками SibSp и Parch - 0.4148376986201567\n"
]
}
],
"source": [
"correlation_sibsp_parch = data['SibSp'].corr(data['Parch'])\n",
"\n",
"print(f'Корреляция Пирсона между признаками SibSp и Parch - {correlation_sibsp_parch}')"
]
},
{
"cell_type": "markdown",
"id": "27d94998-fea3-476a-aa70-a97d36686d30",
"metadata": {},
"source": [
"### 6. Самое популярное женское имя:"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "e1e9e18d-e28d-42c1-86ac-794824d37ccb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"First_Name\n",
"Anna 15\n",
"Mary 14\n",
"Elizabeth 11\n",
"Margaret 10\n",
"Alice 6\n",
"Name: count, dtype: int64\n",
"\n",
"Самое популярное женское имя - Anna\n"
]
}
],
"source": [
"import re\n",
"\n",
"# Функция для извлечения имени\n",
"def extract_first_name(full_name):\n",
" match = re.search(r\"\\(([^)]+)\\)\", full_name)\n",
" if match:\n",
" first_name = match.group(1).split()[0]\n",
" else:\n",
" first_name = full_name.split(',')[1].split()[1]\n",
" return first_name\n",
"\n",
"# Копируем DataFrame перед изменением\n",
"female_passengers = data[data['Sex'] == 'female'].copy()\n",
"# Применяем функцию для извлечения имен\n",
"female_passengers['First_Name'] = female_passengers['Name'].apply(extract_first_name)\n",
"\n",
"\n",
"# Найдем самое популярное имя\n",
"popular_female_name = female_passengers['First_Name'].value_counts().idxmax()\n",
"print(female_passengers['First_Name'].value_counts().head())\n",
"print(f'\\nСамое популярное женское имя - {popular_female_name}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}