Добавлен анализ данных в JupyterLab

This commit is contained in:
Дмитрий Аглямов 2026-05-28 00:30:25 +03:00
parent beea950236
commit c6fb265a2f
3 changed files with 274 additions and 0 deletions

21
movies.csv Normal file
View File

@ -0,0 +1,21 @@
title,year,country,rating,duration,genre
The Shawshank Redemption,1994,USA,9.3,142,Drama
The Godfather,1972,USA,9.2,175,Crime
The Dark Knight,2008,USA,9.0,152,Action
Pulp Fiction,1994,USA,8.9,154,Crime
Forrest Gump,1994,USA,8.8,142,Drama
Inception,2010,USA,8.8,148,Sci-Fi
Fight Club,1999,USA,8.8,139,Drama
Interstellar,2014,USA,8.7,169,Sci-Fi
The Matrix,1999,USA,8.7,136,Sci-Fi
Gladiator,2000,USA,8.5,155,Action
The Green Mile,1999,USA,8.6,189,Drama
Whiplash,2014,USA,8.5,106,Drama
Parasite,2019,South Korea,8.5,132,Thriller
Joker,2019,USA,8.4,122,Drama
Avengers Endgame,2019,USA,8.4,181,Action
Django Unchained,2012,USA,8.5,165,Western
The Departed,2006,USA,8.5,151,Crime
The Prestige,2006,USA,8.5,130,Drama
Coco,2017,USA,8.4,105,Animation
WALL-E,2008,USA,8.4,98,Animation
1 title year country rating duration genre
2 The Shawshank Redemption 1994 USA 9.3 142 Drama
3 The Godfather 1972 USA 9.2 175 Crime
4 The Dark Knight 2008 USA 9.0 152 Action
5 Pulp Fiction 1994 USA 8.9 154 Crime
6 Forrest Gump 1994 USA 8.8 142 Drama
7 Inception 2010 USA 8.8 148 Sci-Fi
8 Fight Club 1999 USA 8.8 139 Drama
9 Interstellar 2014 USA 8.7 169 Sci-Fi
10 The Matrix 1999 USA 8.7 136 Sci-Fi
11 Gladiator 2000 USA 8.5 155 Action
12 The Green Mile 1999 USA 8.6 189 Drama
13 Whiplash 2014 USA 8.5 106 Drama
14 Parasite 2019 South Korea 8.5 132 Thriller
15 Joker 2019 USA 8.4 122 Drama
16 Avengers Endgame 2019 USA 8.4 181 Action
17 Django Unchained 2012 USA 8.5 165 Western
18 The Departed 2006 USA 8.5 151 Crime
19 The Prestige 2006 USA 8.5 130 Drama
20 Coco 2017 USA 8.4 105 Animation
21 WALL-E 2008 USA 8.4 98 Animation

View File

@ -1,5 +1,102 @@
anyio==4.13.0
argon2-cffi==25.1.0
argon2-cffi-bindings==25.1.0
arrow==1.4.0
asttokens==3.0.1
async-lru==2.3.0
attrs==26.1.0
babel==2.18.0
beautifulsoup4==4.14.3
bleach==6.3.0
certifi==2026.5.20
cffi==2.0.0
charset-normalizer==3.4.7
colorama==0.4.6
comm==0.2.3
contourpy==1.3.3
cycler==0.12.1
debugpy==1.8.20
decorator==5.3.1
defusedxml==0.7.1
executing==2.2.1
fastjsonschema==2.21.2
fonttools==4.63.0
fqdn==1.5.1
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
idna==3.16
ipykernel==7.2.0
ipython==9.13.0
ipython_pygments_lexers==1.1.1
isoduration==20.11.0
jedi==0.20.0
Jinja2==3.1.6
json5==0.14.0
jsonpointer==3.1.1
jsonschema==4.26.0
jsonschema-specifications==2025.9.1
jupyter-events==0.12.1
jupyter-lsp==2.3.1
jupyter_client==8.8.0
jupyter_core==5.9.1
jupyter_server==2.18.2
jupyter_server_terminals==0.5.4
jupyterlab==4.5.7
jupyterlab_pygments==0.3.0
jupyterlab_server==2.28.0
kiwisolver==1.5.0
lark==1.3.1
MarkupSafe==3.0.3
matplotlib==3.10.9
matplotlib-inline==0.2.2
mistune==3.2.1
nbclient==0.10.4
nbconvert==7.17.1
nbformat==5.10.4
nest-asyncio==1.6.0
notebook_shim==0.2.4
numpy==2.4.6
packaging==26.2
pandas==3.0.3
pandocfilters==1.5.1
parso==0.8.7
pillow==12.2.0
platformdirs==4.9.6
prometheus_client==0.25.0
prompt_toolkit==3.0.52
psutil==7.2.2
pure_eval==0.2.3
pycparser==3.0
Pygments==2.20.0
pyparsing==3.3.2
python-dateutil==2.9.0.post0
python-json-logger==4.1.0
pywinpty==3.0.3
PyYAML==6.0.3
pyzmq==27.1.0
referencing==0.37.0
requests==2.34.2
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rfc3987-syntax==1.1.0
rpds-py==0.30.0
seaborn==0.13.2
Send2Trash==2.1.0
setuptools==82.0.1
six==1.17.0
soupsieve==2.8.4
stack-data==0.6.3
terminado==0.18.1
tinycss2==1.4.0
tornado==6.5.6
tqdm==4.67.3
traitlets==5.15.0
typing_extensions==4.15.0
tzdata==2026.2
uri-template==1.3.0
urllib3==2.7.0
wcwidth==0.7.0
webcolors==25.10.0
webencodings==0.5.1
websocket-client==1.9.0

156
week2_analysis.ipynb Normal file
View File

@ -0,0 +1,156 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e7297b19-5736-4a17-911d-645849a628a6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from tqdm import tqdm\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "711ef140-61f6-4b64-8fcf-598247617d65",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"movies.csv\")\n",
"\n",
"print(\"Размер таблицы:\", df.shape)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d269a73-b428-421b-a9e7-e1158368d099",
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "965d5583-2cc3-409c-8db4-a7a00494a6ed",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c74ac668-6c80-49c8-8b72-eea13fdf5ded",
"metadata": {},
"outputs": [],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4940a598-773c-4f6e-8bce-e841404eebc3",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 5))\n",
"sns.histplot(df[\"rating\"], bins=8, kde=True)\n",
"plt.title(\"Распределение рейтингов фильмов\")\n",
"plt.xlabel(\"Рейтинг\")\n",
"plt.ylabel(\"Количество фильмов\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "906ffd6f-e7a2-402a-9a33-40a075cc5685",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 5))\n",
"sns.scatterplot(data=df, x=\"year\", y=\"rating\", hue=\"genre\")\n",
"plt.title(\"Связь года выпуска и рейтинга фильма\")\n",
"plt.xlabel(\"Год выпуска\")\n",
"plt.ylabel(\"Рейтинг\")\n",
"plt.legend(title=\"Жанр\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a05e677a-74e7-4a3a-adce-02f8dc04153e",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=df, x=\"genre\", y=\"rating\")\n",
"plt.title(\"Распределение рейтингов по жанрам\")\n",
"plt.xlabel(\"Жанр\")\n",
"plt.ylabel(\"Рейтинг\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55e5ea86-f072-419c-b937-d752f8fec4e3",
"metadata": {},
"outputs": [],
"source": [
"ratings = df[\"rating\"].to_numpy()\n",
"\n",
"print(\"Средний рейтинг:\", np.mean(ratings))\n",
"print(\"Минимальный рейтинг:\", np.min(ratings))\n",
"print(\"Максимальный рейтинг:\", np.max(ratings))\n",
"print(\"Стандартное отклонение:\", np.std(ratings))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86546195-d108-4c55-9401-9d7017d96277",
"metadata": {},
"outputs": [],
"source": [
"for i in tqdm(range(100)):\n",
" time.sleep(0.01)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}