Source code for RADAR.pos_process_module

import numpy as np


[docs] def process_scores(d_scores, contamination): """ Calculate binary labels based on a contamination threshold. Args: d_scores (array-like): Decision scores. contamination (float): Proportion of outliers in the dataset (0.0 to 0.5). Returns: np.ndarray: Binary labels (0 for normal, 1 for anomaly). """ num_anomalies = int(contamination * len(d_scores)) threshold = np.partition(d_scores, -num_anomalies)[ -num_anomalies ] # Efficient threshold selection return (d_scores >= threshold).astype(int)
[docs] def process_scores_with_percentile(d_scores, contamination): """ Compute the threshold using the percentile method. Args: d_scores (array-like): Decision scores. contamination (float): Proportion of outliers in the dataset (0.0 to 0.5). Returns: float: Threshold value. """ return np.percentile(d_scores, 100 * (1 - contamination))
[docs] def process_scores_with_threshold(d_scores): """ Compute the threshold using mean and standard deviation. Args: d_scores (array-like): Decision scores. Returns: float: Threshold value. """ return np.mean(d_scores) + 2 * np.std(d_scores)
[docs] def compute_anomaly_proportion(labels): """ Calculates the proportion of anomalies in the data set. Args: ----- labels: Binary anomaly labels (0: normal, 1: anomalous). Returns: -------- proportion: Proportion of anomalies in the data. """ proportion = np.sum(labels) / len(labels) return proportion
[docs] def remove_low_confidence_anomalies(d_scores, anomalies, confidence_threshold=0.8): """ Removes anomalies with a decision score below a confidence threshold. Args: ----- d_scores: List of decision scores. anomalies: Indices of detected anomalies. confidence_threshold: Confidence threshold (between 0 and 1). Returns: -------- filtered_anomalies: List of anomaly indexes with high confidence. """ filtered_anomalies = [i for i in anomalies if d_scores[i] >= confidence_threshold] return filtered_anomalies