From dc651b13a50aeee9397574109f7e66d741e7af37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D0=B5=D0=B9=20=D0=A1=D1=8B=D1=80?= =?UTF-8?q?=D1=87=D0=B8=D0=BD?= Date: Mon, 17 Mar 2025 12:38:33 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A7=D0=B0=D1=81=D1=82=D1=8C=203=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B4=D0=B5=D0=BB=D0=B0=D0=BD=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- week2_analysis.ipynb | 660 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 658 insertions(+), 2 deletions(-) diff --git a/week2_analysis.ipynb b/week2_analysis.ipynb index f77b53f..8e36dcc 100644 --- a/week2_analysis.ipynb +++ b/week2_analysis.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "286caee8-913b-4fa5-ae6e-5be0ba523cb7", "metadata": {}, "outputs": [ @@ -38,6 +38,7 @@ "name": "stdout", "output_type": "stream", "text": [ + "1\n", "Hello Sailor!\n" ] } @@ -56,13 +57,668 @@ "else:\n", " zz = aa + cc\n", "\n", + "print(r)\n", "print(zz)" ] }, + { + "cell_type": "markdown", + "id": "8757d814-8118-4250-9fe6-8d4c847f4791", + "metadata": {}, + "source": [ + "Часть 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6e2f0daf-10fe-4892-8438-7a10f5cc05bd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первый взгляд на данные:\n", + " Имя Возраст Баллы\n", + "0 Анна 21 89\n", + "1 Борис 22 76\n", + "2 Виктор 23 95\n", + "3 Галина 24 82\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Имя 4 non-null object\n", + " 1 Возраст 4 non-null int64 \n", + " 2 Баллы 4 non-null int64 \n", + "dtypes: int64(2), object(1)\n", + "memory usage: 228.0+ bytes\n", + "None\n", + " Возраст Баллы\n", + "count 4.000000 4.000000\n", + "mean 22.500000 85.500000\n", + "std 1.290994 8.266398\n", + "min 21.000000 76.000000\n", + "25% 21.750000 80.500000\n", + "50% 22.500000 85.500000\n", + "75% 23.250000 90.500000\n", + "max 24.000000 95.000000\n", + "Имя 0\n", + "Возраст 0\n", + "Баллы 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Создадим DataFrame\n", + "data = {\n", + " \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\"],\n", + " \"Возраст\": [21, 22, 23, 24],\n", + " \"Баллы\": [89, 76, 95, 82]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "print(\"Первый взгляд на данные:\")\n", + "print(df.head())\n", + "print(df.info())\n", + "print(df.describe())\n", + "print(df.isnull().sum())" + ] + }, + { + "cell_type": "markdown", + "id": "3b801627-8797-4b4d-8f3d-c3ea9911e1bf", + "metadata": {}, + "source": [ + "Часть 3 с изменениями" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f9b40974-e401-451a-8dcb-3aa296931280", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первый взгляд на данные:\n", + " Имя Возраст Баллы\n", + "0 Анна 21 89\n", + "1 Борис 22 76\n", + "2 Виктор 23 95\n", + "3 Галина 24 82\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Имя 4 non-null object\n", + " 1 Возраст 4 non-null int64 \n", + " 2 Баллы 4 non-null int64 \n", + "dtypes: int64(2), object(1)\n", + "memory usage: 228.0+ bytes\n", + "None\n", + " Возраст Баллы\n", + "count 4.000000 4.000000\n", + "mean 22.500000 85.500000\n", + "std 1.290994 8.266398\n", + "min 21.000000 76.000000\n", + "25% 21.750000 80.500000\n", + "50% 22.500000 85.500000\n", + "75% 23.250000 90.500000\n", + "max 24.000000 95.000000\n", + "Имя 0\n", + "Возраст 0\n", + "Баллы 0\n", + "dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ИмяВозрастБаллы
0Анна2189
1Борис2276
2Виктор2395
3Галина2482
\n", + "
" + ], + "text/plain": [ + " Имя Возраст Баллы\n", + "0 Анна 21 89\n", + "1 Борис 22 76\n", + "2 Виктор 23 95\n", + "3 Галина 24 82" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Создадим DataFrame\n", + "data = {\n", + " \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\"],\n", + " \"Возраст\": [21, 22, 23, 24],\n", + " \"Баллы\": [89, 76, 95, 82]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "print(\"Первый взгляд на данные:\")\n", + "print(df.head())\n", + "print(df.info())\n", + "print(df.describe())\n", + "print(df.isnull().sum())\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9ee10d5f-ba28-45bd-9766-89491bb8d9a1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первый взгляд на данные:\n", + " Имя Возраст Баллы Новый столбец\n", + "0 Анна 21 89 97.9\n", + "1 Борис 22 76 83.6\n", + "2 Виктор 23 95 104.5\n", + "3 Галина 24 82 90.2\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Имя 4 non-null object \n", + " 1 Возраст 4 non-null int64 \n", + " 2 Баллы 4 non-null int64 \n", + " 3 Новый столбец 4 non-null float64\n", + "dtypes: float64(1), int64(2), object(1)\n", + "memory usage: 260.0+ bytes\n", + "None\n", + " Возраст Баллы Новый столбец\n", + "count 4.000000 4.000000 4.000000\n", + "mean 22.500000 85.500000 94.050000\n", + "std 1.290994 8.266398 9.093038\n", + "min 21.000000 76.000000 83.600000\n", + "25% 21.750000 80.500000 88.550000\n", + "50% 22.500000 85.500000 94.050000\n", + "75% 23.250000 90.500000 99.550000\n", + "max 24.000000 95.000000 104.500000\n", + "Имя 0\n", + "Возраст 0\n", + "Баллы 0\n", + "Новый столбец 0\n", + "dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ИмяВозрастБаллыНовый столбец
0Анна218997.9
1Борис227683.6
2Виктор2395104.5
3Галина248290.2
\n", + "
" + ], + "text/plain": [ + " Имя Возраст Баллы Новый столбец\n", + "0 Анна 21 89 97.9\n", + "1 Борис 22 76 83.6\n", + "2 Виктор 23 95 104.5\n", + "3 Галина 24 82 90.2" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Создадим DataFrame\n", + "data = {\n", + " \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\"],\n", + " \"Возраст\": [21, 22, 23, 24],\n", + " \"Баллы\": [89, 76, 95, 82]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "df[\"Новый столбец\"] = df[\"Баллы\"] * 1.1\n", + "\n", + "print(\"Первый взгляд на данные:\")\n", + "print(df.head())\n", + "print(df.info())\n", + "print(df.describe())\n", + "print(df.isnull().sum())\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "841cffd3-b0f4-4d76-918e-822044d8cda1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первый взгляд на данные:\n", + " Имя Возраст Баллы Новый столбец\n", + "0 Анна 21 89 97.9\n", + "1 Борис 22 76 83.6\n", + "2 Виктор 23 95 104.5\n", + "3 Галина 24 82 90.2\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Имя 4 non-null object \n", + " 1 Возраст 4 non-null int64 \n", + " 2 Баллы 4 non-null int64 \n", + " 3 Новый столбец 4 non-null float64\n", + "dtypes: float64(1), int64(2), object(1)\n", + "memory usage: 260.0+ bytes\n", + "None\n", + " Возраст Баллы Новый столбец\n", + "count 4.000000 4.000000 4.000000\n", + "mean 22.500000 85.500000 94.050000\n", + "std 1.290994 8.266398 9.093038\n", + "min 21.000000 76.000000 83.600000\n", + "25% 21.750000 80.500000 88.550000\n", + "50% 22.500000 85.500000 94.050000\n", + "75% 23.250000 90.500000 99.550000\n", + "max 24.000000 95.000000 104.500000\n", + "Имя 0\n", + "Возраст 0\n", + "Баллы 0\n", + "Новый столбец 0\n", + "dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
БаллыВозраст
meansummaxminmean
Баллы
7676.076767622.0
8282.082828224.0
8989.089898921.0
9595.095959523.0
\n", + "
" + ], + "text/plain": [ + " Баллы Возраст\n", + " mean sum max min mean\n", + "Баллы \n", + "76 76.0 76 76 76 22.0\n", + "82 82.0 82 82 82 24.0\n", + "89 89.0 89 89 89 21.0\n", + "95 95.0 95 95 95 23.0" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Создадим DataFrame\n", + "data = {\n", + " \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\"],\n", + " \"Возраст\": [21, 22, 23, 24],\n", + " \"Баллы\": [89, 76, 95, 82]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "df[\"Новый столбец\"] = df[\"Баллы\"] * 1.1\n", + "\n", + "grouped_df = df.groupby(\"Баллы\").agg({\n", + " \"Баллы\": [\"mean\", \"sum\", \"max\", \"min\"],\n", + " \"Возраст\": \"mean\"\n", + "})\n", + "\n", + "print(\"Первый взгляд на данные:\")\n", + "print(df.head())\n", + "print(df.info())\n", + "print(df.describe())\n", + "print(df.isnull().sum())\n", + "\n", + "grouped_df" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c7b256e1-c871-4131-bd8d-a0608a18ec28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Первый взгляд на данные:\n", + " Имя Возраст Баллы Новый столбец\n", + "0 Анна 21 89 97.9\n", + "1 Борис 22 76 83.6\n", + "2 Виктор 23 95 104.5\n", + "3 Галина 24 82 90.2\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Имя 4 non-null object \n", + " 1 Возраст 4 non-null int64 \n", + " 2 Баллы 4 non-null int64 \n", + " 3 Новый столбец 4 non-null float64\n", + "dtypes: float64(1), int64(2), object(1)\n", + "memory usage: 260.0+ bytes\n", + "None\n", + " Возраст Баллы Новый столбец\n", + "count 4.000000 4.000000 4.000000\n", + "mean 22.500000 85.500000 94.050000\n", + "std 1.290994 8.266398 9.093038\n", + "min 21.000000 76.000000 83.600000\n", + "25% 21.750000 80.500000 88.550000\n", + "50% 22.500000 85.500000 94.050000\n", + "75% 23.250000 90.500000 99.550000\n", + "max 24.000000 95.000000 104.500000\n", + "Имя 0\n", + "Возраст 0\n", + "Баллы 0\n", + "Новый столбец 0\n", + "dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ИмяВозрастБаллыНовый столбец
0Анна218997.9
1Борис227683.6
2Виктор2395104.5
3Галина248290.2
\n", + "
" + ], + "text/plain": [ + " Имя Возраст Баллы Новый столбец\n", + "0 Анна 21 89 97.9\n", + "1 Борис 22 76 83.6\n", + "2 Виктор 23 95 104.5\n", + "3 Галина 24 82 90.2" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Создадим DataFrame\n", + "data = {\n", + " \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\"],\n", + " \"Возраст\": [21, 22, 23, 24],\n", + " \"Баллы\": [89, 76, 95, 82]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "df[\"Новый столбец\"] = df[\"Баллы\"] * 1.1\n", + "df[df[\"Возраст\"] > 21]\n", + "\n", + "print(\"Первый взгляд на данные:\")\n", + "print(df.head())\n", + "print(df.info())\n", + "print(df.describe())\n", + "print(df.isnull().sum())\n", + "\n", + "df" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "6e2f0daf-10fe-4892-8438-7a10f5cc05bd", + "id": "5a137f99-a29a-4c32-8727-8fe71e93f82f", "metadata": {}, "outputs": [], "source": []