{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "5da9c564-63b2-42ab-9dfc-036b8ab8eb96", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
PassengerId
103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " Survived Pclass \\\n", "PassengerId \n", "1 0 3 \n", "2 1 1 \n", "3 1 3 \n", "4 1 1 \n", "5 0 3 \n", "\n", " Name Sex Age \\\n", "PassengerId \n", "1 Braund, Mr. Owen Harris male 22.0 \n", "2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", "3 Heikkinen, Miss. Laina female 26.0 \n", "4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", "5 Allen, Mr. William Henry male 35.0 \n", "\n", " SibSp Parch Ticket Fare Cabin Embarked \n", "PassengerId \n", "1 1 0 A/5 21171 7.2500 NaN S \n", "2 1 0 PC 17599 71.2833 C85 C \n", "3 0 0 STON/O2. 3101282 7.9250 NaN S \n", "4 1 0 113803 53.1000 C123 S \n", "5 0 0 373450 8.0500 NaN S " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas\n", "data = pandas.read_csv('titanic.csv', index_col='PassengerId')\n", "data.head()" ] }, { "cell_type": "markdown", "id": "9e165b4e-12c3-41a2-b770-23fae8e7b9b5", "metadata": {}, "source": [ "### 1. Количество мужчин и женщин на корабле:" ] }, { "cell_type": "code", "execution_count": 24, "id": "9ad76526-673a-4c80-8656-6328ea3e2b6d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Количество мужчин - 577\n", "Количество женщин - 314\n" ] } ], "source": [ "gender_counts = data['Sex'].value_counts()\n", "\n", "print(f'Количество мужчин - {gender_counts.male}')\n", "print(f'Количество женщин - {gender_counts.female}')" ] }, { "cell_type": "markdown", "id": "daea4619-0011-4219-998d-ce5ce13bffcf", "metadata": {}, "source": [ "### 2. Доля выживших пассажиров:" ] }, { "cell_type": "code", "execution_count": 20, "id": "5bd61c4c-4c75-4772-a2be-72fe2f23fcc8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Доля выживших - 0.3838383838383838\n" ] } ], "source": [ "survived_ratio = data['Survived'].mean()\n", "\n", "print(f'Доля выживших - {survived_ratio}')" ] }, { "cell_type": "markdown", "id": "240911e6-9dc7-4595-97a8-f78f98e43ec9", "metadata": {}, "source": [ "### 3. Доля пассажиров первого класса:" ] }, { "cell_type": "code", "execution_count": 21, "id": "d61d936c-5220-479e-a5a8-ba4e56dd1e2d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Доля пассажиров первого класса - 0.24242424242424243\n" ] } ], "source": [ "first_class_ratio = (data['Pclass'] == 1).mean()\n", "\n", "print(f'Доля пассажиров первого класса - {first_class_ratio}')" ] }, { "cell_type": "markdown", "id": "62ec3de5-ae84-4354-b192-b173732691ad", "metadata": {}, "source": [ "### 4. Возраст пассажиров. Среднее и медиана возраста пассажиров:" ] }, { "cell_type": "code", "execution_count": 22, "id": "ccf95bd7-1e31-4076-b7f1-702ca35078a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Среднее возраста пассажиров - 29.69911764705882\n", "Медиана возраста пассажиров - 28.0\n" ] } ], "source": [ "mean_age = data['Age'].mean()\n", "median_age = data['Age'].median()\n", "\n", "print(f'Среднее возраста пассажиров - {mean_age}')\n", "print(f'Медиана возраста пассажиров - {median_age}')" ] }, { "cell_type": "markdown", "id": "43451392-bacd-4258-9b75-5be81c226e97", "metadata": {}, "source": [ "### 5. Корреляция Пирсона между признаками SibSp и Parch:" ] }, { "cell_type": "code", "execution_count": 23, "id": "cc932c9a-09c0-4592-a2d3-055c4bba7923", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Корреляция Пирсона между признаками SibSp и Parch - 0.4148376986201567\n" ] } ], "source": [ "correlation_sibsp_parch = data['SibSp'].corr(data['Parch'])\n", "\n", "print(f'Корреляция Пирсона между признаками SibSp и Parch - {correlation_sibsp_parch}')" ] }, { "cell_type": "markdown", "id": "27d94998-fea3-476a-aa70-a97d36686d30", "metadata": {}, "source": [ "### 6. Самое популярное женское имя:" ] }, { "cell_type": "code", "execution_count": 28, "id": "e1e9e18d-e28d-42c1-86ac-794824d37ccb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First_Name\n", "Anna 15\n", "Mary 14\n", "Elizabeth 11\n", "Margaret 10\n", "Alice 6\n", "Name: count, dtype: int64\n", "\n", "Самое популярное женское имя - Anna\n" ] } ], "source": [ "import re\n", "\n", "# Функция для извлечения имени\n", "def extract_first_name(full_name):\n", " match = re.search(r\"\\(([^)]+)\\)\", full_name)\n", " if match:\n", " first_name = match.group(1).split()[0]\n", " else:\n", " first_name = full_name.split(',')[1].split()[1]\n", " return first_name\n", "\n", "# Копируем DataFrame перед изменением\n", "female_passengers = data[data['Sex'] == 'female'].copy()\n", "# Применяем функцию для извлечения имен\n", "female_passengers['First_Name'] = female_passengers['Name'].apply(extract_first_name)\n", "\n", "\n", "# Найдем самое популярное имя\n", "popular_female_name = female_passengers['First_Name'].value_counts().idxmax()\n", "print(female_passengers['First_Name'].value_counts().head())\n", "print(f'\\nСамое популярное женское имя - {popular_female_name}')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 5 }