27
Oct
In this Notebook, we will describe how to deal with outliers #Importing the dataset import pandas as p import numpy as n import matplotlib.pyplot as plt import seaborn as sns from sklearn.datasets import load_boston import warnings warnings.filterwarnings('ignore') boston=load_boston() #it is stored as dictionary df= p.DataFrame(boston['data'],columns=boston['feature_names']) df.head() sns.distplot(df['RM']) #As we can see outliers sns.boxplot(df['RM']) Trimming outliers from the dataset def outliers(data): IQR=data.quantile(0.75)-data.quantile(0.25) lr=data.quantile(0.25)-(1.5*IQR) #lower range hr=data.quantile(0.70)+(1.5*IQR) #higher range return data.loc[~(n.where(data<lr,True,n.where(data>hr,True,False)))] outliers(df['RM']) #as we csn there is no outliers sns.boxplot(outliers(df['RM'])) #We can find outlier with using mean and standard deviation in case of IQR def outliers(data,k): lr=data.mean()-(data.std()*k) #where n is number hr=data.mean()+(data.std()*k)…