{
"cells": [
{
"cell_type": "markdown",
"id": "f70cba96",
"metadata": {},
"source": [
"### Configuration Variables."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "84c02166",
"metadata": {},
"outputs": [],
"source": [
"# We Need To Provide All These Variables Before Proceding.\n",
"# All Column Names Of Your DataSet.\n",
"all_columns = [\"Meter\",\"dt\",\"Global_reactive_power\",\"Voltage\",\"Global_intensity\",\"Sub_metering_1\",\"Sub_metering_2\",\"Sub_metering_3\",\"Power_Consumption\"] \n",
"# Columns You Want To Drop From Your DataSet.\n",
"dropped_columns = [\"dt\"]\n",
"# Columns You Want To Train Your Model.\n",
"training_columns = [\"Global_reactive_power\",\"Voltage\",\"Sub_metering_1\",\"Sub_metering_2\",\"Sub_metering_3\"]\n",
"# Target Column You Are Predicting.\n",
"target_column = \"Power_Consumption\"\n",
"# Name Of The Column You Want To Do Iteration.\n",
"iter_column = 'Meter'\n",
"# Input File Location\n",
"file_location= \"C:/Subrat Documents/Honeywell Project Work/1- Multiple Model Performance/datasets/Power_Consumption_Meters.csv\"\n",
"# All Model Performance File\n",
"all_model_performance_location = \"C:/Subrat Documents/Honeywell Project Work/1- Multiple Model Performance/datasets/Model_Performance.csv\"\n",
"# Model Coefficients File\n",
"coeff_file_location = \"C:/Subrat Documents/Honeywell Project Work/1- Multiple Model Performance/datasets/Model_Coefficients.csv\"\n",
"#Final Output File\n",
"final_output_file_location = \"C:/Subrat Documents/Honeywell Project Work/1- Multiple Model Performance/datasets/Final_Output.csv\""
]
},
{
"cell_type": "markdown",
"id": "00a65583",
"metadata": {},
"source": [
"### Required Libraries"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "565e1479",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'xgboost'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Input \u001b[1;32mIn [2]\u001b[0m, in \u001b[0;36m<cell line: 14>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlinear_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LinearRegression, Lasso,Ridge,ElasticNet\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtree\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DecisionTreeRegressor\n\u001b[1;32m---> 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m XGBRegressor\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mensemble\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GradientBoostingRegressor\n\u001b[0;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpreprocessing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LabelEncoder\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'xgboost'"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.tree import DecisionTreeRegressor, plot_tree\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split # Hyperparameter tuning\n",
"from category_encoders import OneHotEncoder\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"from sklearn.linear_model import LinearRegression, Lasso,Ridge,ElasticNet\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from xgboost import XGBRegressor\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn import preprocessing\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "markdown",
"id": "a933bdc9",
"metadata": {},
"source": [
"### Input Data Reading"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d8769e9",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(file_location)"
]
},
{
"cell_type": "markdown",
"id": "20b92ba2",
"metadata": {},
"source": [
"### Dropping The Not Required Columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b9747df",
"metadata": {},
"outputs": [],
"source": [
"required_data = data.drop(dropped_columns , axis = 1, inplace=False)"
]
},
{
"cell_type": "markdown",
"id": "7ef08092",
"metadata": {},
"source": [
"### Auto Encodes Any Dataframe Column Of Type Category Or Object."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc8d2d12",
"metadata": {},
"outputs": [],
"source": [
"def dummyEncode(data):\n",
" columnsToEncode = list(data.select_dtypes(include=['category','object']))\n",
" le = LabelEncoder()\n",
" for feature in columnsToEncode:\n",
" try:\n",
" data[feature] = le.fit_transform(data[feature])\n",
" except:\n",
" print('Error encoding '+feature)\n",
" return data"
]
},
{
"cell_type": "markdown",
"id": "1d59ce20",
"metadata": {},
"source": [
"### Training & Testing The Model & Predicting Performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "403bf37f",
"metadata": {},
"outputs": [],
"source": [
"def train_test_splitting(data):\n",
" # Do The Label Encoding Of Your DataSet.\n",
" encoded_data = dummyEncode(data[training_columns])\n",
" # Add Target Column To The \"Encoded Data\" Dataset.\n",
" encoded_data[target_column]=data[target_column]\n",
" # Assign X To The Training Column Values.\n",
" X = np.array(encoded_data.drop([target_column],1))\n",
" # Assign \"y\" to the target Column Values.\n",
" y = np.array(encoded_data[target_column])\n",
" # skale X – normalized -1 to 1.\n",
" X = preprocessing.scale(X)\n",
" # Split The Encoded Data Into Train & Test Split\n",
" X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30,random_state=48)\n",
" # Return The Train & Test Split Values As A Tuple\n",
" return (X_train, X_test, y_train, y_test)\n",
" "
]
},
{
"cell_type": "markdown",
"id": "5acf269b",
"metadata": {},
"source": [
"### Creating Base Model Pipeline."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cb031a9",
"metadata": {},
"outputs": [],
"source": [
"def model_pipeline(data):\n",
" \n",
" # This If Statement Is A Hard Coded Value, Only Works For One DataSet. \n",
" # Ignore This If Statement.\n",
" if \"Global_intensity\" in all_columns:\n",
" if \"Global_intensity\" in training_columns:\n",
" training_columns.remove(\"Global_intensity\")\n",
" \n",
" # Call The \"train_test_splitting\" Method\n",
" X_train, X_test, y_train, y_test = train_test_splitting(data)\n",
" \n",
" # Linear Regression Model\n",
" model_lr = make_pipeline(\n",
" LinearRegression()\n",
" \n",
" )\n",
" model_lr.fit(X_train,y_train)\n",
" \n",
" \n",
" # Lasso Model\n",
" model_ls = make_pipeline(\n",
" \n",
" Lasso(alpha=1)\n",
" )\n",
" model_ls.fit(X_train,y_train)\n",
" \n",
" \n",
" #XGB Regressor Model\n",
" model_xgb = make_pipeline(\n",
" \n",
" XGBRegressor(booster='gblinear') # learning_rate=0.1, 0.01, 02\n",
" )\n",
" \n",
" model_xgb.fit(X_train,y_train)\n",
" \n",
" # Ridge Model\n",
" model_R = make_pipeline(Ridge()\n",
" \n",
" )\n",
" model_R.fit(X_train,y_train)\n",
" \n",
" # Elastic-net Model\n",
" model_Er = make_pipeline(\n",
" \n",
" ElasticNet()\n",
" )\n",
" model_Er.fit(X_train,y_train)\n",
" \n",
" # Return All The Trained Model\n",
" return (model_lr,model_ls,model_xgb,model_R,model_Er)\n",
" "
]
},
{
"cell_type": "markdown",
"id": "b7320dc4",
"metadata": {},
"source": [
"### Evaluating Base Model Performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfb91a52",
"metadata": {},
"outputs": [],
"source": [
"def base_model_evaluation(data,trained_model_list, model_names):\n",
" # \"trained_model_list\" = Will Contain All The Trained Model Objects\n",
" # \"model_names\" = Will Contain The Algorithms Names Only\n",
" \n",
" # This If Statement Is A Hard Coded Value, Only Works For One DataSet. \n",
" # Ignore This If Statement.\n",
" if \"Global_intensity\" in all_columns:\n",
" if \"Global_intensity\" in training_columns:\n",
" training_columns.remove(\"Global_intensity\")\n",
" \n",
" # Call The \"train_test_splitting\" Method\n",
" X_train, X_test, y_train, y_test = train_test_splitting(data)\n",
" \n",
"\n",
" col_names = ['Algorithm','Accuracy_Before','RMSE_Before','Intercept_Before']\n",
" \n",
" # \"model_list\" = It Will Contain All Model Names and The 'Accuracy','RMSE','Intercept' Details\n",
" model_list=[] \n",
" \n",
" #Loop through models in trained_model_list and obtain metrics and add to model_list\n",
" for i, model_i in enumerate(trained_model_list):\n",
" # \"row_list\" = It Will Contain 'Algorithm','Accuracy','RMSE','Intercept' Values\n",
" row_list = [model_names[i]]\n",
" row_list.extend([\n",
" str(round(r2_score(y_test,model_i.predict(X_test))*100)) + '%',\n",
" mean_squared_error(y_test,model_i.predict(X_test))**0.5,\n",
" model_i._final_estimator.intercept_\n",
" ]);\n",
" model_list.append(row_list)\n",
" \n",
" # \"base_model\" = Data Frame Will Store All Model Performance Details \n",
" base_model = pd.DataFrame(model_list, columns=col_names)\n",
" \n",
" # Return Base Model DataFrame\n",
" # Model Name,Accuracy RMSE and Intercept\n",
" return base_model\n",
" \n",
" "
]
},
{
"cell_type": "markdown",
"id": "ee51ee0d",
"metadata": {},
"source": [
"### Calling Base Model Evaluation Method"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08e8769f",
"metadata": {},
"outputs": [],
"source": [
"# Declare A Method That Will Call The \"base_model_evaluation\" Method And Get The Scores\n",
"def evaluate_base_model(data):\n",
" # Calling \"model_pipeline\" To Get The Model Objects\n",
" model_lr, model_ls, model_xgb,model_R,model_Er = model_pipeline(data)\n",
" \n",
" # Store It Inside A List\n",
" models = [model_lr, model_ls, model_xgb,model_R,model_Er]\n",
" \n",
" # List Of Algorithms You Are Using\n",
" modelnams = ['linear_regression','lasso', 'xgb_boost_regression','Ridge regression','Elastic net']\n",
" \n",
" # Calling \"base_model_evaluation\" Method\n",
" base_model = base_model_evaluation(data,models,modelnams)\n",
" \n",
" return base_model"
]
},
{
"cell_type": "markdown",
"id": "8ddcd17a",
"metadata": {},
"source": [
"### Tuning Our Base Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0384ad4d",
"metadata": {},
"outputs": [],
"source": [
"# Declare A Method That Will Tune The Base Model\n",
"def tuning_model(data):\n",
" \n",
" # This If Statement Is A Hard Coded Value, Only Works For One DataSet. \n",
" # Ignore This If Statement.\n",
" if \"Global_intensity\" in all_columns:\n",
" if \"Global_intensity\" not in training_columns:\n",
" training_columns.append(\"Global_intensity\")\n",
" \n",
" # Call The \"train_test_splitting\" Method\n",
" X_train, X_test, y_train, y_test = train_test_splitting(data)\n",
" \n",
" # Calling \"model_pipeline\" To Get The Model Objects\n",
" model_lr, model_ls, model_xgb,model_R,model_Er = model_pipeline(data)\n",
" \n",
" # Store It Inside A List\n",
" models = [model_lr, model_ls, model_xgb,model_R,model_Er]\n",
" \n",
" #(1) Linear Regression Tuning\n",
" param_grid = {'n_jobs':[0,1,2,3,4,5],\n",
" 'positive':[True,False]}\n",
" \n",
" model_lr_t = GridSearchCV(\n",
" estimator = model_lr[0],\n",
" param_grid = param_grid,\n",
" n_jobs=-1,\n",
" cv=5,\n",
" verbose=0\n",
" )\n",
" model_lr_t.fit(X_train,np.ravel(y_train))\n",
" model_lr_t_params = model_lr_t.best_params_\n",
" print('best params:- ',model_lr_t_params) \n",
" print('R2 Score:', str(round(r2_score(y_train,model_lr_t.predict(X_train))*100)) + '%')\n",
" \n",
" \n",
" #(2) Lasso Tuning\n",
" param_grid = {'alpha': [0.1, 0.3, 0.5, 0.7, 0.9, 1, 1.5, 2, 2.5]}\n",
" # Lasso Model\n",
" model_ls_t = GridSearchCV(\n",
" estimator = model_ls[0],\n",
" param_grid = param_grid,\n",
" n_jobs=-1,\n",
" cv=5,\n",
" verbose=0\n",
" )\n",
" model_ls_t.fit(X_train,np.ravel(y_train))\n",
" model_ls_t_params = model_ls_t.best_params_\n",
" print('best params:- ',model_ls_t_params) \n",
" print('R2 Score:', str(round(r2_score(y_train,model_ls_t.predict(X_train))*100)) + '%')\n",
" \n",
" \n",
" #(3) XGB Boost tuning\n",
" param_grid = {\n",
" 'learning_rate':[0.1, 0.3, 0.5, 0.7, 0.9, 1, 1]\n",
" }\n",
" # Boost Model\n",
" model_xgb_t = GridSearchCV(\n",
" estimator = model_xgb[0],\n",
" param_grid = param_grid,\n",
" n_jobs=-1,\n",
" cv=5,\n",
" verbose=0,\n",
"\n",
" )\n",
" model_xgb_t.fit(X_train,np.ravel(y_train))\n",
" model_xgb_t_params = model_xgb_t.best_params_\n",
" print('best params:- ',model_xgb_t_params)\n",
" print('R2 Score:', str(round(r2_score(y_train,model_xgb_t.predict(X_train))*100)) + '%')\n",
" \n",
" #(4) Ridge Tuning\n",
" param_grid = {'alpha': np.logspace(-3,3,10)}\n",
" # Ridge Model\n",
" model_R_t = GridSearchCV(\n",
" estimator=model_R[0],\n",
" param_grid = param_grid,\n",
"\n",
" cv=5 \n",
" )\n",
" model_R_t.fit(X_train,y_train)\n",
" model_R_t_params = model_R_t.best_params_\n",
" \n",
" print('best params:- ',model_R_t_params)\n",
" print('R2 Score:', str(round(r2_score(y_train,model_R_t.predict(X_train))*100)) + '%')\n",
" \n",
" \n",
" #(5) Elastic net Tuning\n",
" param_grid = {'alpha': np.logspace(-0.5,2,3)}\n",
" # Ridge Model\n",
" model_E_t = GridSearchCV(\n",
" estimator=model_Er[0],\n",
" param_grid = param_grid,\n",
"\n",
" cv=5\n",
"\n",
" )\n",
" model_E_t.fit(X_train,y_train)\n",
" model_E_t_params = model_E_t.best_params_\n",
" \n",
" print('best params:- ',model_E_t_params)\n",
" print('R2 Score:', str(round(r2_score(y_train,model_E_t.predict(X_train))*100)) + '%')\n",
" \n",
" # Return All The Tuned Model Objects\n",
" return (model_lr_t,model_ls_t,model_xgb_t,model_R_t,model_E_t)"
]
},
{
"cell_type": "markdown",
"id": "e9e2604d",
"metadata": {},
"source": [
"### Evaluating Tuned Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4feb2ed",
"metadata": {},
"outputs": [],
"source": [
"# Define A Method To Evaluate The Tuned Model\n",
"def evaluate_tuned_model(data):\n",
" \n",
" # This If Statement Is A Hard Coded Value, Only Works For One DataSet. \n",
" # Ignore This If Statement.\n",
" if \"Global_intensity\" in all_columns:\n",
" if \"Global_intensity\" not in training_columns:\n",
" training_columns.append(\"Global_intensity\")\n",
" \n",
" # Call The \"train_test_splitting\" Method\n",
" X_train, X_test, y_train, y_test = train_test_splitting(data)\n",
" \n",
" # Call Tuned Model\n",
" model_lr_t,model_ls_t, model_xgb_t, model_R_t,model_E_t = tuning_model(data)\n",
" \n",
" # Store It Inside List\n",
" tuned_model_list = [model_lr_t,model_ls_t, model_xgb_t, model_R_t,model_E_t]\n",
" \n",
" #Model Names\n",
" model_names = ['linear_regression','lasso', 'xgb_boost_regression','Ridge regression','Elastic net']\n",
" \n",
" col_names = ['Algorithm','Accuracy_After','RMSE_After','Intercept_After']\n",
" \n",
" model_list=[] \n",
" #Loop through models in model_list and obtain metrics and add to data_list\n",
" for i, model_i in enumerate(tuned_model_list):\n",
" row_list = [model_names[i]]\n",
" row_list.extend([\n",
"\n",
" str(round(r2_score(y_test,model_i.predict(X_test))*100)) + ' %',\n",
" mean_squared_error(y_test,model_i.predict(X_test))**0.5,\n",
" model_i.best_estimator_.intercept_\n",
"\n",
" ]);\n",
" model_list.append(row_list)\n",
" tuned_model = pd.DataFrame(model_list, columns=col_names)\n",
" \n",
" # Return Tuned Model Performance Details\n",
" return tuned_model\n"
]
},
{
"cell_type": "markdown",
"id": "1efd8649",
"metadata": {},
"source": [
"### Combining Base & Tuned Model Performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9340eaba",
"metadata": {},
"outputs": [],
"source": [
"# This Method Will Combine The BASE Model $ TUNED Model Results.\n",
"def combine_base_tuned_model(data):\n",
" \n",
" # Call Base Model Performance\n",
" base_model = evaluate_base_model(data)\n",
" \n",
" # Call Tuned Model Performance\n",
" tuned_model = evaluate_tuned_model(data)\n",
" \n",
" # Merge Base and Tuned Model Performance\n",
" base_tuned_result = pd.merge(base_model,tuned_model).sort_values(by=['Accuracy_Before','Accuracy_After'],ascending=False)\n",
" \n",
" # Return The Combined Result\n",
" return base_tuned_result\n",
" "
]
},
{
"cell_type": "markdown",
"id": "6d08a8de",
"metadata": {},
"source": [
"### Flatterning List Function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c58f8e82",
"metadata": {},
"outputs": [],
"source": [
"# Flatten Method To Flatten The List Into Indivisual Values\n",
"def flatten(test_list):\n",
" if isinstance(test_list, list):\n",
" temp = []\n",
" for ele in test_list:\n",
" temp.extend(flatten(ele))\n",
" return temp\n",
" else:\n",
" return [test_list]"
]
},
{
"cell_type": "markdown",
"id": "3ac9031b",
"metadata": {},
"source": [
"### Generating Model Coefficients Values"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5569200b",
"metadata": {},
"outputs": [],
"source": [
"def model_coefficients(data):\n",
" \n",
" # Call Tuned Model\n",
" model_lr_t,model_ls_t, model_xgb_t, model_R_t,model_E_t = tuning_model(data)\n",
" \n",
" #Calling \"combine_base_tuned_model\" Method.\n",
" base_tuned_result = combine_base_tuned_model(data)\n",
" \n",
" coef1=model_lr_t.best_estimator_.coef_\n",
" coef2=model_ls_t.best_estimator_.coef_\n",
" coef3=model_xgb_t.best_estimator_.coef_\n",
" coef4=model_R_t.best_estimator_.coef_\n",
" coef5=model_E_t.best_estimator_.coef_\n",
" \n",
" coeffs=[flatten(['linear_regression',coef1.tolist()]),\n",
" flatten(['lasso',coef2.tolist()]),\n",
" flatten(['xgb_boost_regression',coef3.tolist()]),\n",
" flatten(['Ridge regression',coef4.tolist()]),\n",
" flatten(['Elastic net',coef5.tolist()])]\n",
" \n",
" # This If Statement Is A Hard Coded Value, Only Works For One DataSet. \n",
" # Ignore This If Statement.\n",
" if \"Global_intensity\" in all_columns:\n",
" if \"Global_intensity\" not in training_columns:\n",
" training_columns.append(\"Global_intensity\")\n",
" \n",
" needed_columns = ['Algorithm']\n",
" \n",
" needed_columns.extend(training_columns)\n",
" \n",
" coeffs_df=pd.DataFrame(coeffs,columns=needed_columns) # Contents All The Intercept Values\n",
" \n",
" # Contents Accuracy Score and The Intercept Values\n",
" model_coefficient=pd.merge(base_tuned_result,coeffs_df) \n",
" \n",
" required_columns = ['Algorithm','Intercept_After']\n",
" \n",
" required_columns.extend(training_columns)\n",
" \n",
" required_columns.extend(['Accuracy_Before','Accuracy_After','RMSE_Before','RMSE_After'])\n",
" \n",
" model_coefficient = model_coefficient[required_columns] # Will contain algo,intercepts,coefficients, accuracy\n",
" \n",
" # Return The Coefficient Data Frame Contains All Coefficients And Intercepts\n",
" return model_coefficient\n"
]
},
{
"cell_type": "markdown",
"id": "544c2741",
"metadata": {},
"source": [
"### Main Function Defination "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f415c67d",
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
" \n",
" # Global Variables\n",
" # i = Meter1, Meter2, Meter3 Values\n",
" # j = Number Of Meters [0,1,2] ,It will iterate over model_coeff dataset.\n",
" # k = Training Column Names\n",
" j = 0\n",
" \n",
" meter_data = {} # Meter Dictionary That Will Contain All The Distinct Meter DataSet.\n",
" \n",
" insert_loc = len(all_columns) - 1\n",
" \n",
" pred_column_name = 'Pred_'+target_column\n",
" \n",
" meter_dict = {}\n",
"\n",
" meter_list = []\n",
"\n",
" frames = []\n",
"\n",
" final_frames = []\n",
"\n",
" model_coeff = pd.DataFrame()\n",
"\n",
" final_table = pd.DataFrame()\n",
" \n",
"\n",
" # Creating Different DataFrames For Different Values Of Iter_Columns.\n",
" # This Will Store All The Coefficient Values.\n",
" for i in required_data[iter_column].unique():\n",
"\n",
" meter_data_values = required_data[required_data[iter_column]==i]\n",
"\n",
" meter_data_values = meter_data_values.reset_index(drop=True)\n",
"\n",
" meter_list.append(model_coefficients(meter_data_values))\n",
"\n",
" if(os.path.exists(all_model_performance_location) and os.path.isfile(all_model_performance_location)):\n",
" os.remove(all_model_performance_location)\n",
" \n",
" # Inserting New Meter Columns In All The Meter Coefficient Dataset\n",
" for i,j in zip(required_data[iter_column].unique(),meter_list):\n",
"\n",
" meter_dict[i] = j\n",
"\n",
" meter_dict[i].insert(loc=0, column=\"MeterDetails\", value=i)\n",
" \n",
" meter_dict[i].to_csv(all_model_performance_location,mode='a', index=False)\n",
"\n",
" \n",
" \n",
" #Creating A Final Table Containing All the Meters Coefficient Values Of Efficent Model.\n",
" for i in required_data[iter_column].unique():\n",
" \n",
" frames.append(meter_dict[i].iloc[:1])\n",
" \n",
" model_coeff = pd.concat(frames,ignore_index=True)\n",
" \n",
" # Storing The Coefficient Values In A CSV File \n",
" model_coeff.to_csv(coeff_file_location,index=False)\n",
" \n",
" # In This For Loop We Will Calculate The Predicted Power Consumption. \n",
" # First For Loop Will Iterate Over All The Unique Meters Present. \n",
" j = 0\n",
" for i in required_data[iter_column].unique(): \n",
" meter_data[i] = data[data[iter_column]== i]\n",
" sum_coeff = 0\n",
" # Second For Loop Will Iterate Over All The Training Columns \n",
" # And Multiply It With The Coefficient Values And Add It.\n",
" for k in training_columns:\n",
" x = model_coeff.loc[j,k] # Coefficient Value\n",
" y = meter_data[i][k] # Actual Value It Will Return A Column\n",
" dataType = meter_data[i].dtypes\n",
" if dataType[k] == 'O':\n",
" y = 1\n",
" else:\n",
" y = preprocessing.scale(y)\n",
" \n",
" sum_coeff = sum_coeff + (x * y)\n",
"\n",
" meter_data[i][pred_column_name] = model_coeff.loc[j,'Intercept_After'] + sum_coeff\n",
" meter_data[i]['Error_Difference'] = meter_data[i][target_column] - meter_data[i][pred_column_name]\n",
" meter_data[i].insert(loc=insert_loc, column='Algorithm', value=model_coeff.loc[j,'Algorithm'])\n",
" meter_data[i].insert(loc=insert_loc+1, column='Accuracy_Before', value=model_coeff.loc[j,'Accuracy_Before'])\n",
" meter_data[i].insert(loc=insert_loc+2, column='Accuracy_After', value=model_coeff.loc[j,'Accuracy_After'])\n",
" meter_data[i].insert(loc=insert_loc+3, column='RMSE_Before', value=model_coeff.loc[j,'RMSE_Before'])\n",
" meter_data[i].insert(loc=insert_loc+4, column='RMSE_After', value=model_coeff.loc[j,'RMSE_After'])\n",
" j = j + 1\n",
"\n",
"\n",
" #Creating A Final Table Containing All the Meters Coefficient Values Of Efficent Model.¶\n",
"\n",
" for i in required_data[iter_column].unique():\n",
" \n",
" final_frames.append(meter_data[i])\n",
"\n",
" final_table = pd.concat(final_frames,ignore_index=True)\n",
" \n",
" final_table.to_csv(final_output_file_location,index=False)\n",
" "
]
},
{
"cell_type": "markdown",
"id": "ff57190e",
"metadata": {},
"source": [
"### Calling Main Function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79e8fdd0",
"metadata": {},
"outputs": [],
"source": [
"main()"
]
},
{
"cell_type": "markdown",
"id": "b5742ec4",
"metadata": {},
"source": [
"### All Model Performance Metrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1e518a9",
"metadata": {},
"outputs": [],
"source": [
"all_model_perform = pd.read_csv(all_model_performance_location)\n",
"all_model_perform"
]
},
{
"cell_type": "markdown",
"id": "d24099b8",
"metadata": {},
"source": [
"### Model Coefficients Matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a63a8e4",
"metadata": {},
"outputs": [],
"source": [
"model_coeff = pd.read_csv(coeff_file_location)\n",
"model_coeff"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7395402",
"metadata": {},
"outputs": [],
"source": [
"model_coeff[[\"MeterDetails\",\"Algorithm\",\"Accuracy_Before\",\"Accuracy_After\",\"RMSE_Before\",\"RMSE_After\"]]"
]
},
{
"cell_type": "markdown",
"id": "54661746",
"metadata": {},
"source": [
"### Final Prediction Table"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f378fa0d",
"metadata": {},
"outputs": [],
"source": [
"final_output = pd.read_csv(final_output_file_location)\n",
"final_output.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d137427",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d69cae07",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b47fa548",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f8c8a61",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "03b29293",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}