{
"cells": [
{
"cell_type": "markdown",
"id": "7d018438",
"metadata": {},
"source": [
"## Auto Feature Engineering Sample Code"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "8d9d4ab7",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import featuretools as ft"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "228497a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CID</th>\n",
" <th>Customer_Name</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Married</th>\n",
" <th>Kids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Anurag</td>\n",
" <td>28</td>\n",
" <td>10000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Brajesh</td>\n",
" <td>24</td>\n",
" <td>15000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Chatira</td>\n",
" <td>25</td>\n",
" <td>5000</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Debasis</td>\n",
" <td>26</td>\n",
" <td>20000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Faran</td>\n",
" <td>27</td>\n",
" <td>30000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CID Customer_Name Age Salary Married Kids\n",
"0 1 Anurag 28 10000 1 1\n",
"1 2 Brajesh 24 15000 0 0\n",
"2 3 Chatira 25 5000 1 0\n",
"3 4 Debasis 26 20000 1 1\n",
"4 5 Faran 27 30000 0 0"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer = {'CID':[1,2,3,4,5],\n",
" 'Customer_Name':['Anurag','Brajesh','Chatira','Debasis','Faran'],\n",
" 'Age':[28,24,25,26,27],\n",
" 'Salary':[10000,15000,5000,20000,30000],\n",
" 'Married':[1,0,1,1,0],\n",
" 'Kids':[1,0,0,1,0]}\n",
"\n",
"customer = pd.DataFrame(customer) \n",
"\n",
"customer"
]
},
{
"cell_type": "code",
"execution_count": 151,
"id": "afc8faa4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PID</th>\n",
" <th>PCID</th>\n",
" <th>Product_Name</th>\n",
" <th>Price</th>\n",
" <th>Purchase_Date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mobile</td>\n",
" <td>35</td>\n",
" <td>03/09/2022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Laptop</td>\n",
" <td>76</td>\n",
" <td>07/09/2022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>Camera</td>\n",
" <td>29</td>\n",
" <td>12/09/2022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>TV</td>\n",
" <td>64</td>\n",
" <td>15/09/2022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Recorder</td>\n",
" <td>71</td>\n",
" <td>20/09/2022</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PID PCID Product_Name Price Purchase_Date\n",
"0 1 1 Mobile 35 03/09/2022\n",
"1 2 2 Laptop 76 07/09/2022\n",
"2 3 3 Camera 29 12/09/2022\n",
"3 4 4 TV 64 15/09/2022\n",
"4 5 5 Recorder 71 20/09/2022"
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"product = {'PID':[1,2,3,4,5],\n",
" 'PCID':[1,2,3,4,5],\n",
" 'Product_Name':['Mobile','Laptop','Camera','TV','Recorder'],\n",
" 'Price':[35,76,29,64,71],\n",
" 'Purchase_Date':['03/09/2022','07/09/2022','12/09/2022','15/09/2022','20/09/2022']\n",
" }\n",
"product = pd.DataFrame(product)\n",
"\n",
"product"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "e266e4b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Entityset: shopping\n",
" Entities:\n",
" Relationships:\n",
" No relationships"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Creating Entity Set\n",
"es = ft.EntitySet(id = 'shopping')\n",
"es"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "f5a26fbd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\H515738\\Anaconda3\\lib\\site-packages\\featuretools\\utils\\entity_utils.py:217: UserWarning: Parsing '15/09/2022' in DD/MM/YYYY format. Provide format or specify infer_datetime_format=True for consistent parsing.\n",
" pd.to_datetime(dropped_na, errors='raise')\n",
"C:\\Users\\H515738\\Anaconda3\\lib\\site-packages\\featuretools\\utils\\entity_utils.py:217: UserWarning: Parsing '20/09/2022' in DD/MM/YYYY format. Provide format or specify infer_datetime_format=True for consistent parsing.\n",
" pd.to_datetime(dropped_na, errors='raise')\n",
"C:\\Users\\H515738\\Anaconda3\\lib\\site-packages\\featuretools\\utils\\entity_utils.py:175: UserWarning: Parsing '15/09/2022' in DD/MM/YYYY format. Provide format or specify infer_datetime_format=True for consistent parsing.\n",
" df[column_id] = pd.to_datetime(df[column_id], format=format,\n",
"C:\\Users\\H515738\\Anaconda3\\lib\\site-packages\\featuretools\\utils\\entity_utils.py:175: UserWarning: Parsing '20/09/2022' in DD/MM/YYYY format. Provide format or specify infer_datetime_format=True for consistent parsing.\n",
" df[column_id] = pd.to_datetime(df[column_id], format=format,\n"
]
},
{
"data": {
"text/plain": [
"Entityset: shopping\n",
" Entities:\n",
" cust [Rows: 5, Columns: 6]\n",
" prod [Rows: 5, Columns: 5]\n",
" Relationships:\n",
" No relationships"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Adding Tables To The Entity Set\n",
"es = es.entity_from_dataframe(entity_id = 'cust', dataframe = customer, index = 'CID')\n",
"es = es.entity_from_dataframe(entity_id = 'prod', dataframe = product, index = 'PID')\n",
"es"
]
},
{
"cell_type": "code",
"execution_count": 154,
"id": "db1367e4",
"metadata": {},
"outputs": [],
"source": [
"# Relationship Customer and Product\n",
"r_cust_prod = ft.Relationship(es['cust']['CID'], es['prod']['PCID'])"
]
},
{
"cell_type": "code",
"execution_count": 155,
"id": "8b12ad41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Entityset: shopping\n",
" Entities:\n",
" cust [Rows: 5, Columns: 6]\n",
" prod [Rows: 5, Columns: 5]\n",
" Relationships:\n",
" prod.PCID -> cust.CID"
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Adding The Defined Relationships\n",
"es = es.add_relationships([r_cust_prod])\n",
"es"
]
},
{
"cell_type": "code",
"execution_count": 156,
"id": "324405bf",
"metadata": {},
"outputs": [],
"source": [
"# Default primitives from featuretools\n",
"default_agg_primitives = [\"sum\", \"std\", \"max\", \"skew\", \"min\", \"mean\", \"count\", \"percent_true\", \"num_unique\", \"mode\"]\n",
"default_trans_primitives = [\"day\", \"year\", \"month\", \"weekday\", \"haversine\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 157,
"id": "3d1a2c92",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\H515738\\Anaconda3\\lib\\site-packages\\featuretools\\synthesis\\dfs.py:307: UnusedPrimitiveWarning: Some specified primitives were not used during DFS:\n",
" trans_primitives: ['haversine']\n",
" agg_primitives: ['percent_true']\n",
"This may be caused by a using a value of max_depth that is too small, not setting interesting values, or it may indicate no compatible variable types for the primitive were found in the data.\n",
" warnings.warn(warning_msg, UnusedPrimitiveWarning)\n"
]
}
],
"source": [
"# DFS with specified primitives\n",
"feature_matrix, feature_names = ft.dfs(entityset = es, target_entity = 'cust',\n",
" trans_primitives = default_trans_primitives,\n",
" agg_primitives=default_agg_primitives, \n",
" max_depth = 2, features_only=False)"
]
},
{
"cell_type": "code",
"execution_count": 158,
"id": "ed8f567b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5, 22)"
]
},
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_matrix.shape"
]
},
{
"cell_type": "code",
"execution_count": 159,
"id": "0ad97377",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Feature: Customer_Name>,\n",
" <Feature: Age>,\n",
" <Feature: Salary>,\n",
" <Feature: Married>,\n",
" <Feature: Kids>,\n",
" <Feature: COUNT(prod)>,\n",
" <Feature: MAX(prod.Price)>,\n",
" <Feature: MEAN(prod.Price)>,\n",
" <Feature: MIN(prod.Price)>,\n",
" <Feature: MODE(prod.Product_Name)>,\n",
" <Feature: NUM_UNIQUE(prod.Product_Name)>,\n",
" <Feature: SKEW(prod.Price)>,\n",
" <Feature: STD(prod.Price)>,\n",
" <Feature: SUM(prod.Price)>,\n",
" <Feature: MODE(prod.DAY(Purchase_Date))>,\n",
" <Feature: MODE(prod.MONTH(Purchase_Date))>,\n",
" <Feature: MODE(prod.WEEKDAY(Purchase_Date))>,\n",
" <Feature: MODE(prod.YEAR(Purchase_Date))>,\n",
" <Feature: NUM_UNIQUE(prod.DAY(Purchase_Date))>,\n",
" <Feature: NUM_UNIQUE(prod.MONTH(Purchase_Date))>,\n",
" <Feature: NUM_UNIQUE(prod.WEEKDAY(Purchase_Date))>,\n",
" <Feature: NUM_UNIQUE(prod.YEAR(Purchase_Date))>]"
]
},
"execution_count": 159,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_names"
]
},
{
"cell_type": "code",
"execution_count": 160,
"id": "c0104f83",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Customer_Name</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Married</th>\n",
" <th>Kids</th>\n",
" <th>COUNT(prod)</th>\n",
" <th>MAX(prod.Price)</th>\n",
" <th>MEAN(prod.Price)</th>\n",
" <th>MIN(prod.Price)</th>\n",
" <th>MODE(prod.Product_Name)</th>\n",
" <th>...</th>\n",
" <th>STD(prod.Price)</th>\n",
" <th>SUM(prod.Price)</th>\n",
" <th>MODE(prod.DAY(Purchase_Date))</th>\n",
" <th>MODE(prod.MONTH(Purchase_Date))</th>\n",
" <th>MODE(prod.WEEKDAY(Purchase_Date))</th>\n",
" <th>MODE(prod.YEAR(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.DAY(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.MONTH(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.WEEKDAY(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.YEAR(Purchase_Date))</th>\n",
" </tr>\n",
" <tr>\n",
" <th>CID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Anurag</td>\n",
" <td>28</td>\n",
" <td>10000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>35.0</td>\n",
" <td>35</td>\n",
" <td>Mobile</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>35</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Brajesh</td>\n",
" <td>24</td>\n",
" <td>15000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>76</td>\n",
" <td>76.0</td>\n",
" <td>76</td>\n",
" <td>Laptop</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>76</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Chatira</td>\n",
" <td>25</td>\n",
" <td>5000</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>29</td>\n",
" <td>29.0</td>\n",
" <td>29</td>\n",
" <td>Camera</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>29</td>\n",
" <td>9</td>\n",
" <td>12</td>\n",
" <td>4</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Debasis</td>\n",
" <td>26</td>\n",
" <td>20000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>64</td>\n",
" <td>64.0</td>\n",
" <td>64</td>\n",
" <td>TV</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>64</td>\n",
" <td>15</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Faran</td>\n",
" <td>27</td>\n",
" <td>30000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>71</td>\n",
" <td>71.0</td>\n",
" <td>71</td>\n",
" <td>Recorder</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>71</td>\n",
" <td>20</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" Customer_Name Age Salary Married Kids COUNT(prod) MAX(prod.Price) \\\n",
"CID \n",
"1 Anurag 28 10000 1 1 1 35 \n",
"2 Brajesh 24 15000 0 0 1 76 \n",
"3 Chatira 25 5000 1 0 1 29 \n",
"4 Debasis 26 20000 1 1 1 64 \n",
"5 Faran 27 30000 0 0 1 71 \n",
"\n",
" MEAN(prod.Price) MIN(prod.Price) MODE(prod.Product_Name) ... \\\n",
"CID ... \n",
"1 35.0 35 Mobile ... \n",
"2 76.0 76 Laptop ... \n",
"3 29.0 29 Camera ... \n",
"4 64.0 64 TV ... \n",
"5 71.0 71 Recorder ... \n",
"\n",
" STD(prod.Price) SUM(prod.Price) MODE(prod.DAY(Purchase_Date)) \\\n",
"CID \n",
"1 NaN 35 9 \n",
"2 NaN 76 9 \n",
"3 NaN 29 9 \n",
"4 NaN 64 15 \n",
"5 NaN 71 20 \n",
"\n",
" MODE(prod.MONTH(Purchase_Date)) MODE(prod.WEEKDAY(Purchase_Date)) \\\n",
"CID \n",
"1 3 2 \n",
"2 7 5 \n",
"3 12 4 \n",
"4 9 3 \n",
"5 9 1 \n",
"\n",
" MODE(prod.YEAR(Purchase_Date)) NUM_UNIQUE(prod.DAY(Purchase_Date)) \\\n",
"CID \n",
"1 2022 1 \n",
"2 2022 1 \n",
"3 2022 1 \n",
"4 2022 1 \n",
"5 2022 1 \n",
"\n",
" NUM_UNIQUE(prod.MONTH(Purchase_Date)) \\\n",
"CID \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"5 1 \n",
"\n",
" NUM_UNIQUE(prod.WEEKDAY(Purchase_Date)) \\\n",
"CID \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"5 1 \n",
"\n",
" NUM_UNIQUE(prod.YEAR(Purchase_Date)) \n",
"CID \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"5 1 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_matrix"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "5084dac9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Customer_Name',\n",
" 'Age',\n",
" 'Salary',\n",
" 'Married',\n",
" 'Kids',\n",
" 'COUNT(prod)',\n",
" 'MAX(prod.Price)',\n",
" 'MEAN(prod.Price)',\n",
" 'MIN(prod.Price)',\n",
" 'MODE(prod.Product_Name)',\n",
" 'NUM_UNIQUE(prod.Product_Name)',\n",
" 'SKEW(prod.Price)',\n",
" 'STD(prod.Price)',\n",
" 'SUM(prod.Price)',\n",
" 'MODE(prod.DAY(Purchase_Date))',\n",
" 'MODE(prod.MONTH(Purchase_Date))',\n",
" 'MODE(prod.WEEKDAY(Purchase_Date))',\n",
" 'MODE(prod.YEAR(Purchase_Date))',\n",
" 'NUM_UNIQUE(prod.DAY(Purchase_Date))',\n",
" 'NUM_UNIQUE(prod.MONTH(Purchase_Date))',\n",
" 'NUM_UNIQUE(prod.WEEKDAY(Purchase_Date))',\n",
" 'NUM_UNIQUE(prod.YEAR(Purchase_Date))',\n",
" 'PID',\n",
" 'PCID',\n",
" 'Product_Name',\n",
" 'Price',\n",
" 'Purchase_Date']"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_dat = feature_matrix.merge(product,left_on='CID',right_on='PCID')\n",
"all_dat.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 162,
"id": "bc0a4ebd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Customer_Name</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Married</th>\n",
" <th>Kids</th>\n",
" <th>COUNT(prod)</th>\n",
" <th>MAX(prod.Price)</th>\n",
" <th>MEAN(prod.Price)</th>\n",
" <th>MIN(prod.Price)</th>\n",
" <th>MODE(prod.Product_Name)</th>\n",
" <th>...</th>\n",
" <th>MODE(prod.YEAR(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.DAY(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.MONTH(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.WEEKDAY(Purchase_Date))</th>\n",
" <th>NUM_UNIQUE(prod.YEAR(Purchase_Date))</th>\n",
" <th>PID</th>\n",
" <th>PCID</th>\n",
" <th>Product_Name</th>\n",
" <th>Price</th>\n",
" <th>Purchase_Date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Anurag</td>\n",
" <td>28</td>\n",
" <td>10000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>35</td>\n",
" <td>35.0</td>\n",
" <td>35</td>\n",
" <td>Mobile</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mobile</td>\n",
" <td>35</td>\n",
" <td>2022-03-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Brajesh</td>\n",
" <td>24</td>\n",
" <td>15000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>76</td>\n",
" <td>76.0</td>\n",
" <td>76</td>\n",
" <td>Laptop</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Laptop</td>\n",
" <td>76</td>\n",
" <td>2022-07-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Chatira</td>\n",
" <td>25</td>\n",
" <td>5000</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>29</td>\n",
" <td>29.0</td>\n",
" <td>29</td>\n",
" <td>Camera</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>Camera</td>\n",
" <td>29</td>\n",
" <td>2022-12-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Debasis</td>\n",
" <td>26</td>\n",
" <td>20000</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>64</td>\n",
" <td>64.0</td>\n",
" <td>64</td>\n",
" <td>TV</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>TV</td>\n",
" <td>64</td>\n",
" <td>2022-09-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Faran</td>\n",
" <td>27</td>\n",
" <td>30000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>71</td>\n",
" <td>71.0</td>\n",
" <td>71</td>\n",
" <td>Recorder</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Recorder</td>\n",
" <td>71</td>\n",
" <td>2022-09-20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" Customer_Name Age Salary Married Kids COUNT(prod) MAX(prod.Price) \\\n",
"0 Anurag 28 10000 1 1 1 35 \n",
"1 Brajesh 24 15000 0 0 1 76 \n",
"2 Chatira 25 5000 1 0 1 29 \n",
"3 Debasis 26 20000 1 1 1 64 \n",
"4 Faran 27 30000 0 0 1 71 \n",
"\n",
" MEAN(prod.Price) MIN(prod.Price) MODE(prod.Product_Name) ... \\\n",
"0 35.0 35 Mobile ... \n",
"1 76.0 76 Laptop ... \n",
"2 29.0 29 Camera ... \n",
"3 64.0 64 TV ... \n",
"4 71.0 71 Recorder ... \n",
"\n",
" MODE(prod.YEAR(Purchase_Date)) NUM_UNIQUE(prod.DAY(Purchase_Date)) \\\n",
"0 2022 1 \n",
"1 2022 1 \n",
"2 2022 1 \n",
"3 2022 1 \n",
"4 2022 1 \n",
"\n",
" NUM_UNIQUE(prod.MONTH(Purchase_Date)) \\\n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"\n",
" NUM_UNIQUE(prod.WEEKDAY(Purchase_Date)) \\\n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"\n",
" NUM_UNIQUE(prod.YEAR(Purchase_Date)) PID PCID Product_Name Price \\\n",
"0 1 1 1 Mobile 35 \n",
"1 1 2 2 Laptop 76 \n",
"2 1 3 3 Camera 29 \n",
"3 1 4 4 TV 64 \n",
"4 1 5 5 Recorder 71 \n",
"\n",
" Purchase_Date \n",
"0 2022-03-09 \n",
"1 2022-07-09 \n",
"2 2022-12-09 \n",
"3 2022-09-15 \n",
"4 2022-09-20 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_dat.head()"
]
},
{
"cell_type": "markdown",
"id": "3f736424",
"metadata": {},
"source": [
"### Feature Importance"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "e2b87d4e",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import make_regression\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from matplotlib import pyplot"
]
},
{
"cell_type": "code",
"execution_count": 164,
"id": "3db3aa7a",
"metadata": {},
"outputs": [],
"source": [
"# Defining Dataset\n",
"all_dat.dropna(inplace=True,axis=1)\n",
"X = all_dat.drop(columns=['Customer_Name','Product_Name','PID','PCID','MODE(prod.Product_Name)','Purchase_Date','Price'],axis=1)\n",
"y = all_dat[['Price']]"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "4e0bedec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5, 18)"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 166,
"id": "a636de72",
"metadata": {},
"outputs": [],
"source": [
"# Define The Model\n",
"model = RandomForestRegressor()"
]
},
{
"cell_type": "code",
"execution_count": 167,
"id": "c32e5075",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\H515738\\AppData\\Local\\Temp\\ipykernel_40348\\426620408.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" model.fit(X, y)\n"
]
},
{
"data": {
"text/plain": [
"RandomForestRegressor()"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Fit The Model\n",
"model.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 168,
"id": "8fee06ea",
"metadata": {},
"outputs": [],
"source": [
"# Get Feature Importance\n",
"importance = model.feature_importances_"
]
},
{
"cell_type": "code",
"execution_count": 169,
"id": "a6c4e849",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature: 0, Score: 0.05173\n",
"Feature: 1, Score: 0.15903\n",
"Feature: 2, Score: 0.04232\n",
"Feature: 3, Score: 0.03185\n",
"Feature: 4, Score: 0.00000\n",
"Feature: 5, Score: 0.16503\n",
"Feature: 6, Score: 0.12647\n",
"Feature: 7, Score: 0.11905\n",
"Feature: 8, Score: 0.00000\n",
"Feature: 9, Score: 0.11312\n",
"Feature: 10, Score: 0.10897\n",
"Feature: 11, Score: 0.05045\n",
"Feature: 12, Score: 0.03198\n",
"Feature: 13, Score: 0.00000\n",
"Feature: 14, Score: 0.00000\n",
"Feature: 15, Score: 0.00000\n",
"Feature: 16, Score: 0.00000\n",
"Feature: 17, Score: 0.00000\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATzUlEQVR4nO3dfYwc933f8fenJxOtH1Ql1bVSSKKkC8IqWzQ1cWDkuDWCOC5IyTCdIigk1JahtiAIiJZUREiZBE3yp9O6RqxCEMFaTKNGiILKTkPYl8hBYqMIYAk8PVg2TTM+s0p5IWVdKkRyK8AUq2//2FGwXe1xh/fMn94v4HAzv4fZ7w6XHw5nZ2dTVUiS2vVXNroASdLaMuglqXEGvSQ1zqCXpMYZ9JLUuGs2uoBxrr/++tqxY8dGlyFJV42nnnrqz6tqelzfpgz6HTt2MDc3t9FlSNJVI8mfLtXnqRtJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcpvxkrDaXHUe+tKx5z3/q1lWuRNJyeEQvSY0z6CWpcb2CPsm+JGeSzCc5Mqb/piRfS/KDJPeN9F2X5LEk305yOsn7Vqt4SdJkE8/RJ5kCHgA+BCwAJ5OcqKpvDQ17Cbgb+OiYTXwW+P2q+pkkW4C3r7hqSVJvfY7o9wLzVXW2qi4CjwIHhgdU1YtVdRJ4bbg9ybXAB4CHunEXq+ovVqNwSVI/fYJ+K3BuaH2ha+vj3cAi8OtJnknyuSTvGDcwycEkc0nmFhcXe25ekjRJn8srM6atrmD7e4BPVtWTST4LHAH+7Zs2WHUMOAYwMzPTd/trwssJJbWkzxH9ArB9aH0bcL7n9heAhap6slt/jEHwS5LWSZ+gPwnsSrKzezP1NuBEn41X1QvAuSTv6Zo+CHzrMlMkSats4qmbqrqU5DDwODAFHK+qU0kOdf1Hk9wAzAHXAq8nuRfYXVWvAJ8EHun+kTgL3Lk2T0WSNE6vWyBU1SwwO9J2dGj5BQandMbNfRaYWX6JkqSV8JOxktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TG9Qr6JPuSnEkyn+TImP6bknwtyQ+S3DemfyrJM0m+uBpFS5L6mxj0SaaAB4D9wG7g9iS7R4a9BNwNfHqJzdwDnF5BnZKkZepzRL8XmK+qs1V1EXgUODA8oKperKqTwGujk5NsA24FPrcK9UqSrlCfoN8KnBtaX+ja+vo14OeA1y83KMnBJHNJ5hYXF69g85Kky+kT9BnTVn02nuTDwItV9dSksVV1rKpmqmpmenq6z+YlST30CfoFYPvQ+jbgfM/tvx/4SJLnGZzy+ckkv3lFFUqSVqRP0J8EdiXZmWQLcBtwos/Gq+rnq2pbVe3o5v1RVX1s2dVKkq7YNZMGVNWlJIeBx4Ep4HhVnUpyqOs/muQGYA64Fng9yb3A7qp6Ze1KlyT1MTHoAapqFpgdaTs6tPwCg1M6l9vGV4GvXnGFkqQV8ZOxktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS43rd60ZaqR1HvrSsec9/6tZVrkR66/GIXpIaZ9BLUuMMeklqnEEvSY3rFfRJ9iU5k2Q+yZEx/Tcl+VqSHyS5b6h9e5KvJDmd5FSSe1azeEnSZBOvukkyBTwAfIjBF4WfTHKiqr41NOwl4G7goyPTLwE/W1VPJ3kX8FSSPxiZK0laQ32O6PcC81V1tqouAo8CB4YHVNWLVXUSeG2k/UJVPd0tfx84DWxdlcolSb30CfqtwLmh9QWWEdZJdgDvBZ5cov9gkrkkc4uLi1e6eUnSEvp8YCpj2upKHiTJO4HPA/dW1SvjxlTVMeAYwMzMzBVtX28NfuhKWp4+R/QLwPah9W3A+b4PkORtDEL+kar6wpWVJ0laqT5BfxLYlWRnki3AbcCJPhtPEuAh4HRVfWb5ZUqSlmviqZuqupTkMPA4MAUcr6pTSQ51/UeT3ADMAdcCrye5F9gN/APg48A3kjzbbfIXqmp21Z+JJGmsXjc164J5dqTt6NDyCwxO6Yz6Y8af45ckrRM/GStJjTPoJalxBr0kNc4vHpGWYTnX9Hs9vzaKR/SS1DiDXpIaZ9BLUuMMeklqnG/GShvEN3S1Xjyil6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcb2CPsm+JGeSzCc5Mqb/piRfS/KDJPddyVxJ0tqaGPRJpoAHgP0Mvh7w9iS7R4a9BNwNfHoZcyVJa6jPEf1eYL6qzlbVReBR4MDwgKp6sapOAq9d6VxJ0trqE/RbgXND6wtdWx+95yY5mGQuydzi4mLPzUuSJukT9OO+3Lt6br/33Ko6VlUzVTUzPT3dc/OSpEn6BP0CsH1ofRtwvuf2VzJXkrQK+gT9SWBXkp1JtgC3ASd6bn8lcyVJq2DibYqr6lKSw8DjwBRwvKpOJTnU9R9NcgMwB1wLvJ7kXmB3Vb0ybu4aPRdJ0hi97kdfVbPA7Ejb0aHlFxicluk1V5K0fvxkrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcb2CPsm+JGeSzCc5MqY/Se7v+p9Lsmeo718nOZXkm0l+K8lfXc0nIEm6vIlBn2QKeADYD+wGbk+ye2TYfmBX93MQeLCbuxW4G5ipqr/P4OsEb1u16iVJE/U5ot8LzFfV2aq6CDwKHBgZcwB4uAaeAK5LcmPXdw3w15JcA7wdOL9KtUuSeugT9FuBc0PrC13bxDFV9WfAp4H/CVwAXq6qL497kCQHk8wlmVtcXOxbvyRpgj5BnzFt1WdMkh9icLS/E/gR4B1JPjbuQarqWFXNVNXM9PR0j7IkSX30CfoFYPvQ+jbefPplqTE/BfyPqlqsqteALwA/vvxyJUlXqk/QnwR2JdmZZAuDN1NPjIw5AdzRXX1zM4NTNBcYnLK5OcnbkwT4IHB6FeuXJE1wzaQBVXUpyWHgcQZXzRyvqlNJDnX9R4FZ4BZgHngVuLPrezLJY8DTwCXgGeDYWjwRSdJ4E4MeoKpmGYT5cNvRoeUC7lpi7i8Dv7yCGiVJK+AnYyWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjesV9En2JTmTZD7JkTH9SXJ/1/9ckj1DfdcleSzJt5OcTvK+1XwCkqTLmxj0SaaAB4D9wG7g9iS7R4btB3Z1PweBB4f6Pgv8flXdBPwofmesJK2rPkf0e4H5qjpbVReBR4EDI2MOAA/XwBPAdUluTHIt8AHgIYCqulhVf7F65UuSJukT9FuBc0PrC11bnzHvBhaBX0/yTJLPJXnHCuqVJF2hPkGfMW3Vc8w1wB7gwap6L/B/gDed4wdIcjDJXJK5xcXFHmVJkvroE/QLwPah9W3A+Z5jFoCFqnqya3+MQfC/SVUdq6qZqpqZnp7uU7skqYc+QX8S2JVkZ5ItwG3AiZExJ4A7uqtvbgZerqoLVfUCcC7Je7pxHwS+tVrFS5Imu2bSgKq6lOQw8DgwBRyvqlNJDnX9R4FZ4BZgHngVuHNoE58EHun+kTg70idJWmMTgx6gqmYZhPlw29Gh5QLuWmLus8DM8kuUJK2En4yVpMYZ9JLUOINekhpn0EtS4wx6SWpcr6turiY7jnzpiuc8/6lb16ASSdocmgt66a1kOQc24MHNW42nbiSpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXG9gj7JviRnkswnedOXe3dfIXh/1/9ckj0j/VNJnknyxdUqXJLUz8RbICSZAh4APsTgy75PJjlRVcPf/bof2NX9/BjwYPf7DfcAp4FrV6nuTc+PpkvaLPoc0e8F5qvqbFVdBB4FDoyMOQA8XANPANcluREgyTbgVuBzq1i3JKmnPkG/FTg3tL7QtfUd82vAzwGvL69ESdJK9An6jGmrPmOSfBh4saqemvggycEkc0nmFhcXe5QlSeqjT9AvANuH1rcB53uOeT/wkSTPMzjl85NJfnPcg1TVsaqaqaqZ6enpnuVLkibpE/QngV1JdibZAtwGnBgZcwK4o7v65mbg5aq6UFU/X1XbqmpHN++Pqupjq/kEJEmXN/Gqm6q6lOQw8DgwBRyvqlNJDnX9R4FZ4BZgHngVuHPtSpYkXYle3zBVVbMMwny47ejQcgF3TdjGV4GvXnGFkqQV8ZOxktQ4g16SGmfQS1LjDHpJapxBL0mN63XVjTaGN0bTevB11j6P6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMb1Cvok+5KcSTKf5MiY/iS5v+t/Lsmern17kq8kOZ3kVJJ7VvsJSJIub2LQJ5kCHgD2A7uB25PsHhm2H9jV/RwEHuzaLwE/W1V/F7gZuGvMXEnSGupzRL8XmK+qs1V1EXgUODAy5gDwcA08AVyX5MaqulBVTwNU1feB08DWVaxfkjRBn6DfCpwbWl/gzWE9cUySHcB7gSfHPUiSg0nmkswtLi72KEuS1EefoM+YtrqSMUneCXweuLeqXhn3IFV1rKpmqmpmenq6R1mSpD76BP0CsH1ofRtwvu+YJG9jEPKPVNUXll+qJGk5+gT9SWBXkp1JtgC3ASdGxpwA7uiuvrkZeLmqLiQJ8BBwuqo+s6qVS5J6mfhVglV1Kclh4HFgCjheVaeSHOr6jwKzwC3APPAqcGc3/f3Ax4FvJHm2a/uFqppd1WchSVpSr++M7YJ5dqTt6NByAXeNmffHjD9/L0laJ34yVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuF5Bn2RfkjNJ5pMcGdOfJPd3/c8l2dN3riRpbU0M+iRTwAPAfmA3cHuS3SPD9gO7up+DwINXMFeStIb6HNHvBear6mxVXQQeBQ6MjDkAPFwDTwDXJbmx51xJ0hrK4OteLzMg+RlgX1X9q27948CPVdXhoTFfBD7VfUcsSf4Q+DfAjklzh7ZxkMH/BgDeA5xZ2VMb63rgz9dgu6vtaqkTrHWtXC21Xi11Qvu1/u2qmh7X0efLwcd9uffovw5Ljekzd9BYdQw41qOeZUsyV1Uza/kYq+FqqROsda1cLbVeLXXCW7vWPkG/AGwfWt8GnO85ZkuPuZKkNdTnHP1JYFeSnUm2ALcBJ0bGnADu6K6+uRl4uaou9JwrSVpDE4/oq+pSksPA48AUcLyqTiU51PUfBWaBW4B54FXgzsvNXZNn0s+anhpaRVdLnWCta+VqqfVqqRPewrVOfDNWknR185OxktQ4g16SGtdc0K/kdg3rXOf2JF9JcjrJqST3jBnzE0leTvJs9/NLG1FrV8vzSb7R1TE3pn+z7Nf3DO2vZ5O8kuTekTEbtl+THE/yYpJvDrX9cJI/SPKd7vcPLTF33W4nskSd/z7Jt7s/399Jct0Scy/7WlmnWn8lyZ8N/RnfssTcdb1FyxK1/vZQnc8neXaJucvfr1XVzA+DN3y/C7ybwaWdXwd2j4y5Bfg9Btf43ww8uUG13gjs6ZbfBfzJmFp/AvjiRu/Xrpbngesv078p9uuY18MLDD5Isin2K/ABYA/wzaG2fwcc6ZaPAL+6xHO57Gt7Her8J8A13fKvjquzz2tlnWr9FeC+Hq+PddunS9U60v8fgF9a7f3a2hH9Sm7XsK6q6kJVPd0tfx84DWxd7zpW0abYryM+CHy3qv50g+v4S1X134GXRpoPAL/RLf8G8NExU9f1diLj6qyqL1fVpW71CQafi9lwS+zTPtb9Fi2XqzVJgH8G/NZqP25rQb8VODe0vsCbw7PPmHWVZAfwXuDJMd3vS/L1JL+X5O+tb2X/nwK+nOSp7nYVozbdfmXwuY2l/tJslv0K8Ldq8LkTut9/c8yYzbZ//wWD/8GNM+m1sl4Od6eZji9xOmyz7dN/DHyvqr6zRP+y92trQb+S2zVsiCTvBD4P3FtVr4x0P83gtMOPAv8R+G/rXN6w91fVHgZ3Ir0ryQdG+jfbft0CfAT4r2O6N9N+7WvT7N8kvwhcAh5ZYsik18p6eBD4O8A/BC4wOCUyatPs087tXP5oftn7tbWgX8ntGtZdkrcxCPlHquoLo/1V9UpV/e9ueRZ4W5Lr17nMN2o53/1+EfgdBv/tHbZp9mtnP/B0VX1vtGMz7dfO9944zdX9fnHMmE2xf5N8Avgw8M+rO3E8qsdrZc1V1feq6v9W1evAf1qihk2xTwGSXAP8U+C3lxqzkv3aWtCv5HYN66o7H/cQcLqqPrPEmBu6cSTZy+DP63+tX5V/Wcc7krzrjWUGb8p9c2TYptivQ5Y8Otos+3XICeAT3fIngN8dM2bDbyeSZB+Du9J+pKpeXWJMn9fKmht5f+inl6hhw/fpkJ8Cvl1VC+M6V7xf1/Id5o34YXD1x58weDf9F7u2Q8ChbjkMvgzlu8A3gJkNqvMfMfhv4nPAs93PLSO1HgZOMbga4Angxzeo1nd3NXy9q2fT7teulrczCO6/PtS2KfYrg398LgCvMTii/JfA3wD+EPhO9/uHu7E/Asxe7rW9znXOMzin/cbr9ehonUu9Vjag1v/SvQ6fYxDeN270Pl2q1q79P7/x+hwau2r71VsgSFLjWjt1I0kaYdBLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxv0/7Sxa4uZqGaMAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# summarize feature importance\n",
"for i,v in enumerate(importance):\n",
"\tprint('Feature: %0d, Score: %.5f' % (i,v))\n",
"# plot feature importance\n",
"pyplot.bar([x for x in range(len(importance))], importance)\n",
"pyplot.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c94cfbe",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}