practice_git_python/week2_analysis.ipynb

157 lines
3.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e7297b19-5736-4a17-911d-645849a628a6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from tqdm import tqdm\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "711ef140-61f6-4b64-8fcf-598247617d65",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"movies.csv\")\n",
"\n",
"print(\"Размер таблицы:\", df.shape)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d269a73-b428-421b-a9e7-e1158368d099",
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "965d5583-2cc3-409c-8db4-a7a00494a6ed",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c74ac668-6c80-49c8-8b72-eea13fdf5ded",
"metadata": {},
"outputs": [],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4940a598-773c-4f6e-8bce-e841404eebc3",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 5))\n",
"sns.histplot(df[\"rating\"], bins=8, kde=True)\n",
"plt.title(\"Распределение рейтингов фильмов\")\n",
"plt.xlabel(\"Рейтинг\")\n",
"plt.ylabel(\"Количество фильмов\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "906ffd6f-e7a2-402a-9a33-40a075cc5685",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 5))\n",
"sns.scatterplot(data=df, x=\"year\", y=\"rating\", hue=\"genre\")\n",
"plt.title(\"Связь года выпуска и рейтинга фильма\")\n",
"plt.xlabel(\"Год выпуска\")\n",
"plt.ylabel(\"Рейтинг\")\n",
"plt.legend(title=\"Жанр\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a05e677a-74e7-4a3a-adce-02f8dc04153e",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10, 5))\n",
"sns.boxplot(data=df, x=\"genre\", y=\"rating\")\n",
"plt.title(\"Распределение рейтингов по жанрам\")\n",
"plt.xlabel(\"Жанр\")\n",
"plt.ylabel(\"Рейтинг\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55e5ea86-f072-419c-b937-d752f8fec4e3",
"metadata": {},
"outputs": [],
"source": [
"ratings = df[\"rating\"].to_numpy()\n",
"\n",
"print(\"Средний рейтинг:\", np.mean(ratings))\n",
"print(\"Минимальный рейтинг:\", np.min(ratings))\n",
"print(\"Максимальный рейтинг:\", np.max(ratings))\n",
"print(\"Стандартное отклонение:\", np.std(ratings))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86546195-d108-4c55-9401-9d7017d96277",
"metadata": {},
"outputs": [],
"source": [
"for i in tqdm(range(100)):\n",
" time.sleep(0.01)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}