Add JupyterLab analysis notebook

This commit is contained in:
Тимур Рочев 2026-05-30 11:56:33 +03:00
parent edc59e10c7
commit 63bfdd0759

View File

@ -0,0 +1,144 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e33cfdf4",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 1: Импорт библиотек\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from tqdm import tqdm\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc6236ff",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 2: Загрузка данных\n",
"df = pd.read_csv(\"kinopoisk-top250.csv\", index_col=0)\n",
"print(f'Размер таблицы: {df.shape}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e616e786",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 3: Анализ\n",
"df.info()\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9df3843",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 4: Гистограмма\n",
"plt.figure(figsize=(8, 5))\n",
"sns.histplot(df['year'], kde=True, bins=20, color='blue')\n",
"plt.title('Распределение фильмов по годам')\n",
"plt.xlabel('Год')\n",
"plt.ylabel('Количество фильмов')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e82506e0",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 5: Scatter plot\n",
"df_top30 = df.head(30).copy()\n",
"plt.figure(figsize=(10, 6))\n",
"sns.scatterplot(x='director', y='year', hue='rating_ball', \n",
" data=df_top30, palette='viridis')\n",
"plt.title('Режиссеры, год выпуска и рейтинг')\n",
"plt.xlabel('Режиссер')\n",
"plt.ylabel('Год выпуска')\n",
"plt.xticks(rotation=90)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f51c0053",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 6: Boxplot\n",
"filtered_df = df[df['year'] > 2000].copy()\n",
"plt.figure(figsize=(10, 6))\n",
"sns.boxplot(x='year', y='rating_ball', data=filtered_df, palette='pastel')\n",
"plt.title('Распределение рейтингов (>2000)')\n",
"plt.xlabel('Год')\n",
"plt.ylabel('Рейтинг')\n",
"plt.xticks(rotation=90)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc5173ad",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Ячейка 7: tqdm\n",
"for i in tqdm(range(100), desc=\"Обработка\"):\n",
" time.sleep(0.01)\n",
"print(\"Готово\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}