#!/usr/bin/env python
# coding: utf-8
# # <span style='color:DarkBlue'> Day21 - PyCaret for </span> <span style='color:Red'> Regression </span>
#
# - ### An open source automated library for Machine Learning.
# - ### <span style='color:Red'> Three Step Process</span> to build machine learning models for:
# - Classification
# - Regression
# - Clustering
#
# ### Self Learning Resource
# 1. Explore Pycaret mannual on Regression: <a href="https://pycaret.org/regression/"> Click Here </a>
# 2. Tutorial on Pycaret <a href="https://pycaret.readthedocs.io/en/latest/tutorials.html"> Click Here</a>
#
#
#
# ### <span style='color:DarkBlue'> Method 1</span>: To install `pycaret`
# - Installing PyCaret in Local Jupyter Notebook, Google Colab or Azure Notebooks
# - Using conda: `!conda install pycaret`
# - Using pip: `!pip install pycaret`
# - Installing PyCaret in Anaconda
# - Using conda: `conda install pycaret`
# - Using pip: `pip install pycaret`
#
#
# ### <span style='color:DarkBlue'> Method 2</span>: To install `pycaret` | Online manual to install pycaret <a href="https://pycaret.org/install/"> Click Here</a>
# - <span style='color:DarkRed'> Step 1</span>: To Install pycaret (One Time)
# - Open Anaconda prompt
# - Create a conda environment: `conda create --name myenv python=3.6`
# - Activate environment: `conda activate myenv`
# - To install pycaret: `pip install pycaret`
#
# - <span style='color:DarkRed'> Step 2</span>: To use pycaret environment through Jypyter notebook (Always)
# - Open Anaconda prompt
# - Activate environment: `conda activate myenv`
# - Start Jupyter Notebook: `jupyter notebook`
#
#
#
# ### In this tutorial we will learn:
#
# - Getting Data: How to import data from PyCaret repository
# - Setting up Environment: How to setup an experiment in PyCaret and get started with building regression models
# - Create Model: How to create a model, perform cross validation and evaluate regression metrics
# - Tune Model: How to automatically tune the hyperparameters of a regression model
# - Plot Model: How to analyze model performance using various plots
# - Finalize Model: How to finalize the best model at the end of the experiment
# - Predict Model: How to make prediction on new / unseen data
# - Save / Load Model: How to save / load a model for future use
#
# # <span style='color:Red'> 1. Regression: Basics </span>
# ### <span style='color:DarkBlue'>1.1 Data loading</span>
# #### Get the version of the pycaret
# In[1]:
from pycaret.utils import version
version()
# #### Loading dataset from pycaret
# In[2]:
from pycaret.datasets import get_data
# #### Get the list of datasets available in pycaret
# In[3]:
# Internet connection is required
dataSets = get_data('index')
dataSets
# #### Get boston dataset
# In[19]:
# Internet connection is required
boston_df = get_data("traffic")
# This is regression dataset. The values in medv are continuous values
# #### Get the dimention of dataset
# In[20]:
print(boston_df.shape)
# #### Remove duplicates
# In[21]:
print(boston_df.shape)
boston_df.drop_duplicates()
print(boston_df.shape)
# ### <span style='color:DarkBlue'>1.2 Parameter setting for all regression models</span>
# - Train/Test division
# - Sampling
# - Normalization
# - Transformation
# - PCA (Dimention Reduction)
# - Handaling of Outliers
# - Feature Selection
# #### Setup parameters for regression models (defaults)
# In[22]:
from pycaret.regression import *
reg = setup(data = boston_df, target='traffic_volume',fold = 15,data_split_shuffle=False)
# ### <span style='color:DarkBlue'>1.3 Run and compare the Model Performance</span>
# #### Comparing models
# In[23]:
compare_models()
# Explore more parameters
# ### <span style='color:DarkBlue'>1.4 Plot the Best Model</span>
# ##### Plot Residuals
# In[24]:
from pycaret.regression import *
reg = setup(data = boston_df, target='traffic_volume',fold = 15,data_split_shuffle=False, normalize = True, normalize_method = 'zscore')
compare_models()
# ##### Plot Error (Scatter Plot)
# In[ ]:
from pycaret.regression import *
reg = setup(data = boston_df, target='traffic_volume',fold = 15,data_split_shuffle=False, normalize = True, normalize_method = 'zscore',transformation = True, transformation_method = 'yeo-johnson')
compare_models()
# ##### Plot Learning Curve
# In[26]:
from pycaret.regression import *
reg = setup(data = boston_df, target='traffic_volume',fold = 15,data_split_shuffle=False, normalize = True, normalize_method = 'zscore', remove_outliers = True, outliers_threshold = 0.1)
compare_models()
# ##### Plot Validation Curve
# In[27]:
from pycaret.regression import *
reg = setup(data = boston_df, target='traffic_volume',fold = 15,data_split_shuffle=False, normalize = True, pca = True, pca_method = 'linear', remove_outliers = True, outliers_threshold = 0.15)
compare_models()
Add a code snippet to your website: www.paste.org