Найден датасет, начат анализ задачи
This commit is contained in:
parent
9b100fd5e0
commit
bd2af689de
567
.ipynb_checkpoints/week2_analysis-checkpoint.ipynb
Normal file
567
.ipynb_checkpoints/week2_analysis-checkpoint.ipynb
Normal file
@ -0,0 +1,567 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "fa1e3762-fa47-4329-94a6-a0ba89929225",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Датасет скопирован в рабочую директорию: ./datasets\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import kagglehub\n",
|
||||
"import shutil\n",
|
||||
"import os\n",
|
||||
"import tqdm as notebook_tqdm\n",
|
||||
"\n",
|
||||
"# Скачиваем датасет в кеш\n",
|
||||
"cache_path = kagglehub.dataset_download(\"podsyp/sales-in-craft-beer-bar\")\n",
|
||||
"\n",
|
||||
"# Укажите целевую рабочую директорию\n",
|
||||
"target_directory = \"./datasets\"\n",
|
||||
"\n",
|
||||
"# Создайте целевую директорию, если она не существует\n",
|
||||
"os.makedirs(target_directory, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Копируем файлы из кеша в рабочую директорию\n",
|
||||
"shutil.copytree(cache_path, target_directory, dirs_exist_ok=True)\n",
|
||||
"\n",
|
||||
"print(f\"Датасет скопирован в рабочую директорию: {target_directory}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "18c8b086-9293-43af-bb52-3452bbc69f9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"df_pr = pd.read_csv(\"./datasets/Product_range.csv\")\n",
|
||||
"df_tr = pd.read_csv(\"./datasets/Transactions.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "aa213ec0-625f-4811-9c6e-072ca2a61e52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 5314 entries, 0 to 5313\n",
|
||||
"Data columns (total 8 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Product_code 5314 non-null int64 \n",
|
||||
" 1 Vendor_code 4288 non-null object \n",
|
||||
" 2 Name 5314 non-null object \n",
|
||||
" 3 Retail_price 4878 non-null float64\n",
|
||||
" 4 Base_unit 4910 non-null object \n",
|
||||
" 5 Country_of_Origin 4205 non-null object \n",
|
||||
" 6 Size 4626 non-null float64\n",
|
||||
" 7 ABV 4622 non-null float64\n",
|
||||
"dtypes: float64(3), int64(1), object(4)\n",
|
||||
"memory usage: 332.3+ KB\n",
|
||||
"None\n",
|
||||
" Product_code Retail_price Size ABV\n",
|
||||
"count 5314.000000 4878.000000 4626.000000 4622.000000\n",
|
||||
"mean 2690.844750 637.839502 0.626917 7.074273\n",
|
||||
"std 1543.217814 504.895006 0.953290 2.457970\n",
|
||||
"min 2.000000 1.000000 0.150000 0.500000\n",
|
||||
"25% 1357.250000 350.000000 0.330000 5.300000\n",
|
||||
"50% 2690.500000 520.000000 0.500000 6.500000\n",
|
||||
"75% 4027.750000 730.000000 1.000000 8.200000\n",
|
||||
"max 5358.000000 8484.850000 30.000000 21.000000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Product_code</th>\n",
|
||||
" <th>Vendor_code</th>\n",
|
||||
" <th>Name</th>\n",
|
||||
" <th>Retail_price</th>\n",
|
||||
" <th>Base_unit</th>\n",
|
||||
" <th>Country_of_Origin</th>\n",
|
||||
" <th>Size</th>\n",
|
||||
" <th>ABV</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>5028</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1 Symbiotica Apple 0.375</td>\n",
|
||||
" <td>300.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>0.375</td>\n",
|
||||
" <td>4.5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>4846</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1 Symbiotica Hard Kombucha Renegade Aronia 0.33</td>\n",
|
||||
" <td>200.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>0.330</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1340</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1000 IBU Imperial IPA Barrel l</td>\n",
|
||||
" <td>960.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>9.6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4372</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>18th Street Brewery</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>USA</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>4302</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>18th Street Brewery Deal With The Devil 0.473</td>\n",
|
||||
" <td>630.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>USA</td>\n",
|
||||
" <td>0.473</td>\n",
|
||||
" <td>8.5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5309</th>\n",
|
||||
" <td>868</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Spagetti Vestern</td>\n",
|
||||
" <td>880.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>8.7</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5310</th>\n",
|
||||
" <td>1861</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Stoun Imperial Rashn Stout Barrel l</td>\n",
|
||||
" <td>1200.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>10.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5311</th>\n",
|
||||
" <td>4724</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Semjuel Adams Boston Lager Barrel</td>\n",
|
||||
" <td>720.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>4.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5312</th>\n",
|
||||
" <td>822</td>\n",
|
||||
" <td>Bakunin</td>\n",
|
||||
" <td>Bakunin Urban Juice</td>\n",
|
||||
" <td>600.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>7.2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5313</th>\n",
|
||||
" <td>1682</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Hazeljuteli Shoktabulous Barrel l temnoe fil't...</td>\n",
|
||||
" <td>880.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>5.7</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5314 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Product_code Vendor_code \\\n",
|
||||
"0 5028 NaN \n",
|
||||
"1 4846 NaN \n",
|
||||
"2 1340 NaN \n",
|
||||
"3 4372 NaN \n",
|
||||
"4 4302 NaN \n",
|
||||
"... ... ... \n",
|
||||
"5309 868 NaN \n",
|
||||
"5310 1861 NaN \n",
|
||||
"5311 4724 NaN \n",
|
||||
"5312 822 Bakunin \n",
|
||||
"5313 1682 NaN \n",
|
||||
"\n",
|
||||
" Name Retail_price \\\n",
|
||||
"0 1 Symbiotica Apple 0.375 300.0 \n",
|
||||
"1 1 Symbiotica Hard Kombucha Renegade Aronia 0.33 200.0 \n",
|
||||
"2 1000 IBU Imperial IPA Barrel l 960.0 \n",
|
||||
"3 18th Street Brewery NaN \n",
|
||||
"4 18th Street Brewery Deal With The Devil 0.473 630.0 \n",
|
||||
"... ... ... \n",
|
||||
"5309 Spagetti Vestern 880.0 \n",
|
||||
"5310 Stoun Imperial Rashn Stout Barrel l 1200.0 \n",
|
||||
"5311 Semjuel Adams Boston Lager Barrel 720.0 \n",
|
||||
"5312 Bakunin Urban Juice 600.0 \n",
|
||||
"5313 Hazeljuteli Shoktabulous Barrel l temnoe fil't... 880.0 \n",
|
||||
"\n",
|
||||
" Base_unit Country_of_Origin Size ABV \n",
|
||||
"0 Pieces Russia 0.375 4.5 \n",
|
||||
"1 Pieces Russia 0.330 3.0 \n",
|
||||
"2 Liters NaN 1.000 9.6 \n",
|
||||
"3 NaN USA NaN NaN \n",
|
||||
"4 Pieces USA 0.473 8.5 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"5309 Liters NaN 1.000 8.7 \n",
|
||||
"5310 Liters NaN 1.000 10.8 \n",
|
||||
"5311 Liters NaN 1.000 4.8 \n",
|
||||
"5312 Liters Russia 1.000 7.2 \n",
|
||||
"5313 Liters NaN 1.000 5.7 \n",
|
||||
"\n",
|
||||
"[5314 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df_pr.info())\n",
|
||||
"print(df_pr.describe())\n",
|
||||
"df_pr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "13968604-1997-4b8b-a250-906fc834a2ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 50084 entries, 0 to 50083\n",
|
||||
"Data columns (total 8 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Date_and_time_of_unloading 50084 non-null object \n",
|
||||
" 1 Product_code 50084 non-null int64 \n",
|
||||
" 2 Amount 50084 non-null float64\n",
|
||||
" 3 Sale_amount 50033 non-null float64\n",
|
||||
" 4 Discount_amount 39882 non-null float64\n",
|
||||
" 5 Profit 50070 non-null float64\n",
|
||||
" 6 Percentage_markup 48145 non-null float64\n",
|
||||
" 7 Discount_percentage 39882 non-null float64\n",
|
||||
"dtypes: float64(6), int64(1), object(1)\n",
|
||||
"memory usage: 3.1+ MB\n",
|
||||
"None\n",
|
||||
" Product_code Amount Sale_amount Discount_amount \\\n",
|
||||
"count 50084.000000 50084.000000 50033.000000 39882.000000 \n",
|
||||
"mean 2153.160031 3.553146 1429.786677 243.761323 \n",
|
||||
"std 1367.357705 5.485805 2419.585455 405.902060 \n",
|
||||
"min 99.000000 0.033000 0.410000 0.010000 \n",
|
||||
"25% 899.000000 1.000000 330.000000 60.000000 \n",
|
||||
"50% 2098.000000 2.000000 630.000000 121.760000 \n",
|
||||
"75% 3059.000000 4.000000 1436.130000 270.615000 \n",
|
||||
"max 5322.000000 248.000000 58184.070000 20440.630000 \n",
|
||||
"\n",
|
||||
" Profit Percentage_markup Discount_percentage \n",
|
||||
"count 50070.000000 48145.000000 39882.000000 \n",
|
||||
"mean 705.901987 109.184511 17.251313 \n",
|
||||
"std 1352.628611 1182.538753 16.608075 \n",
|
||||
"min -9300.630000 -100.000000 0.000000 \n",
|
||||
"25% 143.070000 59.850000 7.930000 \n",
|
||||
"50% 283.000000 84.210000 12.000000 \n",
|
||||
"75% 687.015000 107.790000 20.210000 \n",
|
||||
"max 33352.460000 79900.000000 100.000000 \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Date_and_time_of_unloading</th>\n",
|
||||
" <th>Product_code</th>\n",
|
||||
" <th>Amount</th>\n",
|
||||
" <th>Sale_amount</th>\n",
|
||||
" <th>Discount_amount</th>\n",
|
||||
" <th>Profit</th>\n",
|
||||
" <th>Percentage_markup</th>\n",
|
||||
" <th>Discount_percentage</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>144</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>280.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>155.00</td>\n",
|
||||
" <td>124.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>209</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>545.73</td>\n",
|
||||
" <td>294.27</td>\n",
|
||||
" <td>75.73</td>\n",
|
||||
" <td>16.11</td>\n",
|
||||
" <td>35.03</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>213</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1265.05</td>\n",
|
||||
" <td>34.95</td>\n",
|
||||
" <td>653.05</td>\n",
|
||||
" <td>106.71</td>\n",
|
||||
" <td>2.69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>217</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>630.00</td>\n",
|
||||
" <td>70.00</td>\n",
|
||||
" <td>220.50</td>\n",
|
||||
" <td>53.85</td>\n",
|
||||
" <td>10.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>222</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1104.75</td>\n",
|
||||
" <td>195.25</td>\n",
|
||||
" <td>393.75</td>\n",
|
||||
" <td>55.38</td>\n",
|
||||
" <td>15.02</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50079</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5316</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1875.95</td>\n",
|
||||
" <td>104.05</td>\n",
|
||||
" <td>1095.95</td>\n",
|
||||
" <td>140.51</td>\n",
|
||||
" <td>5.26</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50080</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5317</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>555.95</td>\n",
|
||||
" <td>104.05</td>\n",
|
||||
" <td>315.95</td>\n",
|
||||
" <td>131.65</td>\n",
|
||||
" <td>15.77</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50081</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5318</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>572.50</td>\n",
|
||||
" <td>87.50</td>\n",
|
||||
" <td>312.50</td>\n",
|
||||
" <td>120.19</td>\n",
|
||||
" <td>13.26</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50082</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5321</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>300.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>180.00</td>\n",
|
||||
" <td>150.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50083</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5322</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>600.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>340.00</td>\n",
|
||||
" <td>130.77</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>50084 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Date_and_time_of_unloading Product_code Amount Sale_amount \\\n",
|
||||
"0 2020-01-01 23:00:00 144 1.0 280.00 \n",
|
||||
"1 2020-01-01 23:00:00 209 2.0 545.73 \n",
|
||||
"2 2020-01-01 23:00:00 213 2.0 1265.05 \n",
|
||||
"3 2020-01-01 23:00:00 217 1.0 630.00 \n",
|
||||
"4 2020-01-01 23:00:00 222 2.0 1104.75 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"50079 2022-09-18 15:00:00 5316 6.0 1875.95 \n",
|
||||
"50080 2022-09-18 15:00:00 5317 2.0 555.95 \n",
|
||||
"50081 2022-09-18 15:00:00 5318 2.0 572.50 \n",
|
||||
"50082 2022-09-18 15:00:00 5321 1.0 300.00 \n",
|
||||
"50083 2022-09-18 15:00:00 5322 2.0 600.00 \n",
|
||||
"\n",
|
||||
" Discount_amount Profit Percentage_markup Discount_percentage \n",
|
||||
"0 NaN 155.00 124.00 NaN \n",
|
||||
"1 294.27 75.73 16.11 35.03 \n",
|
||||
"2 34.95 653.05 106.71 2.69 \n",
|
||||
"3 70.00 220.50 53.85 10.00 \n",
|
||||
"4 195.25 393.75 55.38 15.02 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"50079 104.05 1095.95 140.51 5.26 \n",
|
||||
"50080 104.05 315.95 131.65 15.77 \n",
|
||||
"50081 87.50 312.50 120.19 13.26 \n",
|
||||
"50082 NaN 180.00 150.00 NaN \n",
|
||||
"50083 NaN 340.00 130.77 NaN \n",
|
||||
"\n",
|
||||
"[50084 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df_tr.info())\n",
|
||||
"print(df_tr.describe())\n",
|
||||
"df_tr"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
1
Pipfile
1
Pipfile
@ -10,6 +10,7 @@ pandas = "*"
|
||||
matplotlib = "*"
|
||||
seaborn = "*"
|
||||
tqdm = "*"
|
||||
kagglehub = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
11
Pipfile.lock
generated
11
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "1d557e5cfc407c7091eb306b7694852c98248c8df7ea9e2b7f3a7e484c1bbdbf"
|
||||
"sha256": "c54c5c40dce5ec80b0605b62020eea74e559b5454438d53a87e8f814e4b498e5"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@ -696,6 +696,15 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.27.3"
|
||||
},
|
||||
"kagglehub": {
|
||||
"hashes": [
|
||||
"sha256:7df4238eea20817bce13bfacbe79ff4c0a583a9e876bfaf16d7ad6179611fb7c",
|
||||
"sha256:82a204e77919da54021038971ed735450c91ea479c5c003d140a22503bc20981"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.9'",
|
||||
"version": "==0.3.10"
|
||||
},
|
||||
"kiwisolver": {
|
||||
"hashes": [
|
||||
"sha256:01c3d31902c7db5fb6182832713d3b4122ad9317c2c5877d0539227d96bb2e50",
|
||||
|
5315
datasets/.ipynb_checkpoints/Product_range-checkpoint.csv
Normal file
5315
datasets/.ipynb_checkpoints/Product_range-checkpoint.csv
Normal file
File diff suppressed because it is too large
Load Diff
50085
datasets/.ipynb_checkpoints/Transactions-checkpoint.csv
Normal file
50085
datasets/.ipynb_checkpoints/Transactions-checkpoint.csv
Normal file
File diff suppressed because it is too large
Load Diff
5315
datasets/Product_range.csv
Normal file
5315
datasets/Product_range.csv
Normal file
File diff suppressed because it is too large
Load Diff
50085
datasets/Transactions.csv
Normal file
50085
datasets/Transactions.csv
Normal file
File diff suppressed because it is too large
Load Diff
567
week2_analysis.ipynb
Normal file
567
week2_analysis.ipynb
Normal file
@ -0,0 +1,567 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "fa1e3762-fa47-4329-94a6-a0ba89929225",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Датасет скопирован в рабочую директорию: ./datasets\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import kagglehub\n",
|
||||
"import shutil\n",
|
||||
"import os\n",
|
||||
"import tqdm as notebook_tqdm\n",
|
||||
"\n",
|
||||
"# Скачиваем датасет в кеш\n",
|
||||
"cache_path = kagglehub.dataset_download(\"podsyp/sales-in-craft-beer-bar\")\n",
|
||||
"\n",
|
||||
"# Укажите целевую рабочую директорию\n",
|
||||
"target_directory = \"./datasets\"\n",
|
||||
"\n",
|
||||
"# Создайте целевую директорию, если она не существует\n",
|
||||
"os.makedirs(target_directory, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Копируем файлы из кеша в рабочую директорию\n",
|
||||
"shutil.copytree(cache_path, target_directory, dirs_exist_ok=True)\n",
|
||||
"\n",
|
||||
"print(f\"Датасет скопирован в рабочую директорию: {target_directory}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "18c8b086-9293-43af-bb52-3452bbc69f9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"df_pr = pd.read_csv(\"./datasets/Product_range.csv\")\n",
|
||||
"df_tr = pd.read_csv(\"./datasets/Transactions.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "aa213ec0-625f-4811-9c6e-072ca2a61e52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 5314 entries, 0 to 5313\n",
|
||||
"Data columns (total 8 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Product_code 5314 non-null int64 \n",
|
||||
" 1 Vendor_code 4288 non-null object \n",
|
||||
" 2 Name 5314 non-null object \n",
|
||||
" 3 Retail_price 4878 non-null float64\n",
|
||||
" 4 Base_unit 4910 non-null object \n",
|
||||
" 5 Country_of_Origin 4205 non-null object \n",
|
||||
" 6 Size 4626 non-null float64\n",
|
||||
" 7 ABV 4622 non-null float64\n",
|
||||
"dtypes: float64(3), int64(1), object(4)\n",
|
||||
"memory usage: 332.3+ KB\n",
|
||||
"None\n",
|
||||
" Product_code Retail_price Size ABV\n",
|
||||
"count 5314.000000 4878.000000 4626.000000 4622.000000\n",
|
||||
"mean 2690.844750 637.839502 0.626917 7.074273\n",
|
||||
"std 1543.217814 504.895006 0.953290 2.457970\n",
|
||||
"min 2.000000 1.000000 0.150000 0.500000\n",
|
||||
"25% 1357.250000 350.000000 0.330000 5.300000\n",
|
||||
"50% 2690.500000 520.000000 0.500000 6.500000\n",
|
||||
"75% 4027.750000 730.000000 1.000000 8.200000\n",
|
||||
"max 5358.000000 8484.850000 30.000000 21.000000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Product_code</th>\n",
|
||||
" <th>Vendor_code</th>\n",
|
||||
" <th>Name</th>\n",
|
||||
" <th>Retail_price</th>\n",
|
||||
" <th>Base_unit</th>\n",
|
||||
" <th>Country_of_Origin</th>\n",
|
||||
" <th>Size</th>\n",
|
||||
" <th>ABV</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>5028</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1 Symbiotica Apple 0.375</td>\n",
|
||||
" <td>300.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>0.375</td>\n",
|
||||
" <td>4.5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>4846</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1 Symbiotica Hard Kombucha Renegade Aronia 0.33</td>\n",
|
||||
" <td>200.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>0.330</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1340</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1000 IBU Imperial IPA Barrel l</td>\n",
|
||||
" <td>960.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>9.6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4372</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>18th Street Brewery</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>USA</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>4302</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>18th Street Brewery Deal With The Devil 0.473</td>\n",
|
||||
" <td>630.0</td>\n",
|
||||
" <td>Pieces</td>\n",
|
||||
" <td>USA</td>\n",
|
||||
" <td>0.473</td>\n",
|
||||
" <td>8.5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5309</th>\n",
|
||||
" <td>868</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Spagetti Vestern</td>\n",
|
||||
" <td>880.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>8.7</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5310</th>\n",
|
||||
" <td>1861</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Stoun Imperial Rashn Stout Barrel l</td>\n",
|
||||
" <td>1200.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>10.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5311</th>\n",
|
||||
" <td>4724</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Semjuel Adams Boston Lager Barrel</td>\n",
|
||||
" <td>720.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>4.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5312</th>\n",
|
||||
" <td>822</td>\n",
|
||||
" <td>Bakunin</td>\n",
|
||||
" <td>Bakunin Urban Juice</td>\n",
|
||||
" <td>600.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>Russia</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>7.2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5313</th>\n",
|
||||
" <td>1682</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Hazeljuteli Shoktabulous Barrel l temnoe fil't...</td>\n",
|
||||
" <td>880.0</td>\n",
|
||||
" <td>Liters</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1.000</td>\n",
|
||||
" <td>5.7</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5314 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Product_code Vendor_code \\\n",
|
||||
"0 5028 NaN \n",
|
||||
"1 4846 NaN \n",
|
||||
"2 1340 NaN \n",
|
||||
"3 4372 NaN \n",
|
||||
"4 4302 NaN \n",
|
||||
"... ... ... \n",
|
||||
"5309 868 NaN \n",
|
||||
"5310 1861 NaN \n",
|
||||
"5311 4724 NaN \n",
|
||||
"5312 822 Bakunin \n",
|
||||
"5313 1682 NaN \n",
|
||||
"\n",
|
||||
" Name Retail_price \\\n",
|
||||
"0 1 Symbiotica Apple 0.375 300.0 \n",
|
||||
"1 1 Symbiotica Hard Kombucha Renegade Aronia 0.33 200.0 \n",
|
||||
"2 1000 IBU Imperial IPA Barrel l 960.0 \n",
|
||||
"3 18th Street Brewery NaN \n",
|
||||
"4 18th Street Brewery Deal With The Devil 0.473 630.0 \n",
|
||||
"... ... ... \n",
|
||||
"5309 Spagetti Vestern 880.0 \n",
|
||||
"5310 Stoun Imperial Rashn Stout Barrel l 1200.0 \n",
|
||||
"5311 Semjuel Adams Boston Lager Barrel 720.0 \n",
|
||||
"5312 Bakunin Urban Juice 600.0 \n",
|
||||
"5313 Hazeljuteli Shoktabulous Barrel l temnoe fil't... 880.0 \n",
|
||||
"\n",
|
||||
" Base_unit Country_of_Origin Size ABV \n",
|
||||
"0 Pieces Russia 0.375 4.5 \n",
|
||||
"1 Pieces Russia 0.330 3.0 \n",
|
||||
"2 Liters NaN 1.000 9.6 \n",
|
||||
"3 NaN USA NaN NaN \n",
|
||||
"4 Pieces USA 0.473 8.5 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"5309 Liters NaN 1.000 8.7 \n",
|
||||
"5310 Liters NaN 1.000 10.8 \n",
|
||||
"5311 Liters NaN 1.000 4.8 \n",
|
||||
"5312 Liters Russia 1.000 7.2 \n",
|
||||
"5313 Liters NaN 1.000 5.7 \n",
|
||||
"\n",
|
||||
"[5314 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df_pr.info())\n",
|
||||
"print(df_pr.describe())\n",
|
||||
"df_pr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "13968604-1997-4b8b-a250-906fc834a2ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 50084 entries, 0 to 50083\n",
|
||||
"Data columns (total 8 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Date_and_time_of_unloading 50084 non-null object \n",
|
||||
" 1 Product_code 50084 non-null int64 \n",
|
||||
" 2 Amount 50084 non-null float64\n",
|
||||
" 3 Sale_amount 50033 non-null float64\n",
|
||||
" 4 Discount_amount 39882 non-null float64\n",
|
||||
" 5 Profit 50070 non-null float64\n",
|
||||
" 6 Percentage_markup 48145 non-null float64\n",
|
||||
" 7 Discount_percentage 39882 non-null float64\n",
|
||||
"dtypes: float64(6), int64(1), object(1)\n",
|
||||
"memory usage: 3.1+ MB\n",
|
||||
"None\n",
|
||||
" Product_code Amount Sale_amount Discount_amount \\\n",
|
||||
"count 50084.000000 50084.000000 50033.000000 39882.000000 \n",
|
||||
"mean 2153.160031 3.553146 1429.786677 243.761323 \n",
|
||||
"std 1367.357705 5.485805 2419.585455 405.902060 \n",
|
||||
"min 99.000000 0.033000 0.410000 0.010000 \n",
|
||||
"25% 899.000000 1.000000 330.000000 60.000000 \n",
|
||||
"50% 2098.000000 2.000000 630.000000 121.760000 \n",
|
||||
"75% 3059.000000 4.000000 1436.130000 270.615000 \n",
|
||||
"max 5322.000000 248.000000 58184.070000 20440.630000 \n",
|
||||
"\n",
|
||||
" Profit Percentage_markup Discount_percentage \n",
|
||||
"count 50070.000000 48145.000000 39882.000000 \n",
|
||||
"mean 705.901987 109.184511 17.251313 \n",
|
||||
"std 1352.628611 1182.538753 16.608075 \n",
|
||||
"min -9300.630000 -100.000000 0.000000 \n",
|
||||
"25% 143.070000 59.850000 7.930000 \n",
|
||||
"50% 283.000000 84.210000 12.000000 \n",
|
||||
"75% 687.015000 107.790000 20.210000 \n",
|
||||
"max 33352.460000 79900.000000 100.000000 \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Date_and_time_of_unloading</th>\n",
|
||||
" <th>Product_code</th>\n",
|
||||
" <th>Amount</th>\n",
|
||||
" <th>Sale_amount</th>\n",
|
||||
" <th>Discount_amount</th>\n",
|
||||
" <th>Profit</th>\n",
|
||||
" <th>Percentage_markup</th>\n",
|
||||
" <th>Discount_percentage</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>144</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>280.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>155.00</td>\n",
|
||||
" <td>124.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>209</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>545.73</td>\n",
|
||||
" <td>294.27</td>\n",
|
||||
" <td>75.73</td>\n",
|
||||
" <td>16.11</td>\n",
|
||||
" <td>35.03</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>213</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1265.05</td>\n",
|
||||
" <td>34.95</td>\n",
|
||||
" <td>653.05</td>\n",
|
||||
" <td>106.71</td>\n",
|
||||
" <td>2.69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>217</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>630.00</td>\n",
|
||||
" <td>70.00</td>\n",
|
||||
" <td>220.50</td>\n",
|
||||
" <td>53.85</td>\n",
|
||||
" <td>10.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2020-01-01 23:00:00</td>\n",
|
||||
" <td>222</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1104.75</td>\n",
|
||||
" <td>195.25</td>\n",
|
||||
" <td>393.75</td>\n",
|
||||
" <td>55.38</td>\n",
|
||||
" <td>15.02</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50079</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5316</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1875.95</td>\n",
|
||||
" <td>104.05</td>\n",
|
||||
" <td>1095.95</td>\n",
|
||||
" <td>140.51</td>\n",
|
||||
" <td>5.26</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50080</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5317</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>555.95</td>\n",
|
||||
" <td>104.05</td>\n",
|
||||
" <td>315.95</td>\n",
|
||||
" <td>131.65</td>\n",
|
||||
" <td>15.77</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50081</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5318</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>572.50</td>\n",
|
||||
" <td>87.50</td>\n",
|
||||
" <td>312.50</td>\n",
|
||||
" <td>120.19</td>\n",
|
||||
" <td>13.26</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50082</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5321</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>300.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>180.00</td>\n",
|
||||
" <td>150.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50083</th>\n",
|
||||
" <td>2022-09-18 15:00:00</td>\n",
|
||||
" <td>5322</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>600.00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>340.00</td>\n",
|
||||
" <td>130.77</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>50084 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Date_and_time_of_unloading Product_code Amount Sale_amount \\\n",
|
||||
"0 2020-01-01 23:00:00 144 1.0 280.00 \n",
|
||||
"1 2020-01-01 23:00:00 209 2.0 545.73 \n",
|
||||
"2 2020-01-01 23:00:00 213 2.0 1265.05 \n",
|
||||
"3 2020-01-01 23:00:00 217 1.0 630.00 \n",
|
||||
"4 2020-01-01 23:00:00 222 2.0 1104.75 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"50079 2022-09-18 15:00:00 5316 6.0 1875.95 \n",
|
||||
"50080 2022-09-18 15:00:00 5317 2.0 555.95 \n",
|
||||
"50081 2022-09-18 15:00:00 5318 2.0 572.50 \n",
|
||||
"50082 2022-09-18 15:00:00 5321 1.0 300.00 \n",
|
||||
"50083 2022-09-18 15:00:00 5322 2.0 600.00 \n",
|
||||
"\n",
|
||||
" Discount_amount Profit Percentage_markup Discount_percentage \n",
|
||||
"0 NaN 155.00 124.00 NaN \n",
|
||||
"1 294.27 75.73 16.11 35.03 \n",
|
||||
"2 34.95 653.05 106.71 2.69 \n",
|
||||
"3 70.00 220.50 53.85 10.00 \n",
|
||||
"4 195.25 393.75 55.38 15.02 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"50079 104.05 1095.95 140.51 5.26 \n",
|
||||
"50080 104.05 315.95 131.65 15.77 \n",
|
||||
"50081 87.50 312.50 120.19 13.26 \n",
|
||||
"50082 NaN 180.00 150.00 NaN \n",
|
||||
"50083 NaN 340.00 130.77 NaN \n",
|
||||
"\n",
|
||||
"[50084 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df_tr.info())\n",
|
||||
"print(df_tr.describe())\n",
|
||||
"df_tr"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user