File tree Expand file tree Collapse file tree 1 file changed +30
-0
lines changed Expand file tree Collapse file tree 1 file changed +30
-0
lines changed Original file line number Diff line number Diff line change 1+ import pandas as pd
2+ import numpy as np
3+
4+
5+ """
6+ Detect and handle outliers in a DataFrame.
7+
8+ Parameters:
9+ data: DataFrame
10+ method: str, default 'z-score'. The method used for outlier detection. Options: 'z-score' or 'IQR' (Interquartile Range).
11+ threshold: float, default 3. The threshold for identifying outliers. Data points beyond this threshold are considered outliers.
12+
13+ Returns:
14+ DataFrame: DataFrame with outliers handled (replaced or removed).
15+ """
16+ def handle_outliers (data , method = 'z-score' , threshold = 3 ):
17+ if method == 'z-score' :
18+ z_scores = np .abs ((data - data .mean ()) / data .std ())
19+ data_no_outliers = data [(z_scores < threshold ).all (axis = 1 )]
20+
21+ elif method == 'IQR' :
22+ Q1 = data .quantile (0.25 )
23+ Q3 = data .quantile (0.75 )
24+ IQR = Q3 - Q1
25+ data_no_outliers = data [~ ((data < (Q1 - 1.5 * IQR )) | (data > (Q3 + 1.5 * IQR ))).any (axis = 1 )]
26+
27+ else :
28+ raise ValueError ("Invalid method. Use z-score or IQR" )
29+
30+ return data_no_outliers
You can’t perform that action at this time.
0 commit comments