MasteringJupyterLab/week2_analysis.ipynb

261 lines
7.6 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-05-05T16:56:23.391024900Z",
"start_time": "2026-05-05T16:56:23.052361300Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"# Создадим DataFrame\n",
"data = {\n",
" \"Имя\": [\"Анна\", \"Борис\", \"Виктор\", \"Галина\", \"Саша\"],\n",
" \"Возраст\": [21, 22, 23, 24, 22],\n",
" \"Баллы\": [89, 76, 95, 82, 80]\n",
"}\n",
"df = pd.DataFrame(data)\n",
"print(\"Первый взгляд на данные:\")\n",
"print(df.head())\n",
"print(df.info())\n",
"print(df.describe())\n",
"print(df.isnull().sum())\n",
"df[\"Новый столбец\"] = df[\"Баллы\"] * 1.1\n",
"\n",
"groups = df.groupby(\"Возраст\")[\"Баллы\"].agg('mean')\n",
"df_filter = df[df[\"Возраст\"] > 21]\n",
"\n",
"print(\"Студенты старше 21 года:\")\n",
"display(df_filter)\n",
"\n",
"print(\"Средние баллы по возрастам:\")\n",
"df\n",
"groups"
],
"id": "c6257ab155ac3a98",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Первый взгляд на данные:\n",
" Имя Возраст Баллы\n",
"0 Анна 21 89\n",
"1 Борис 22 76\n",
"2 Виктор 23 95\n",
"3 Галина 24 82\n",
"4 Саша 22 80\n",
"<class 'pandas.DataFrame'>\n",
"RangeIndex: 5 entries, 0 to 4\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 Имя 5 non-null str \n",
" 1 Возраст 5 non-null int64\n",
" 2 Баллы 5 non-null int64\n",
"dtypes: int64(2), str(1)\n",
"memory usage: 252.0 bytes\n",
"None\n",
" Возраст Баллы\n",
"count 5.000000 5.000000\n",
"mean 22.400000 84.400000\n",
"std 1.140175 7.569676\n",
"min 21.000000 76.000000\n",
"25% 22.000000 80.000000\n",
"50% 22.000000 82.000000\n",
"75% 23.000000 89.000000\n",
"max 24.000000 95.000000\n",
"Имя 0\n",
"Возраст 0\n",
"Баллы 0\n",
"dtype: int64\n",
"Студенты старше 21 года:\n"
]
},
{
"data": {
"text/plain": [
" Имя Возраст Баллы Новый столбец\n",
"1 Борис 22 76 83.6\n",
"2 Виктор 23 95 104.5\n",
"3 Галина 24 82 90.2\n",
"4 Саша 22 80 88.0"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Имя</th>\n",
" <th>Возраст</th>\n",
" <th>Баллы</th>\n",
" <th>Новый столбец</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Борис</td>\n",
" <td>22</td>\n",
" <td>76</td>\n",
" <td>83.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Виктор</td>\n",
" <td>23</td>\n",
" <td>95</td>\n",
" <td>104.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Галина</td>\n",
" <td>24</td>\n",
" <td>82</td>\n",
" <td>90.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Саша</td>\n",
" <td>22</td>\n",
" <td>80</td>\n",
" <td>88.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Средние баллы по возрастам:\n"
]
},
{
"data": {
"text/plain": [
"Возраст\n",
"21 89.0\n",
"22 78.0\n",
"23 95.0\n",
"24 82.0\n",
"Name: Баллы, dtype: float64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 22
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-05-05T16:56:23.914438700Z",
"start_time": "2026-05-05T16:56:23.527643100Z"
}
},
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"arr = np.array([1, 2, 3, 4, 5])\n",
"X = np.array([[1, 2], [3, 4]])\n",
"\n",
"\n",
"#Создание ровных интервалов\n",
"range = np.linspace(np.min(X), np.max(X), 5)\n",
"print(\"\\nИнтервал от min(A) до max(A):\\n\", range)\n",
"\n",
"\n",
"#Стандартное нормальное распределение\n",
"norm_random_array = np.random.randn(*X.shape)\n",
"print(\"\\nСлучайный массив того же размера, что и A:\\n\", norm_random_array)\n",
"\n",
"#Матричное умножение\n",
"res_dot = np.dot(X, X)\n",
"print(\"\\nРезультат умножения матриц:\\n\", res_dot)\n",
"\n",
"\n",
"print(\"\\nСумма элементов массива:\", np.sum(arr))\n",
"print(\"Среднее значение:\", np.mean(arr))\n",
"print(\"Медиана:\", np.median(arr))\n",
"print(\"Стандартное отклонение:\", np.std(arr))\n"
],
"id": "159535e1a3ef473a",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Интервал от min(A) до max(A):\n",
" [1. 1.75 2.5 3.25 4. ]\n",
"\n",
"Случайный массив того же размера, что и A:\n",
" [[-0.03315556 -0.16675026]\n",
" [-0.91077853 -0.68547878]]\n",
"\n",
"Результат умножения матриц:\n",
" [[ 7 10]\n",
" [15 22]]\n",
"\n",
"Сумма элементов массива: 15\n",
"Среднее значение: 3.0\n",
"Медиана: 3.0\n",
"Стандартное отклонение: 1.4142135623730951\n"
]
}
],
"execution_count": 23
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}