Source code for RADAR.pos_process_module

import numpy as np



[docs]
def process_scores(d_scores, contamination):
    """
    Calculate binary labels based on a contamination threshold.

    Args:
        d_scores (array-like): Decision scores.
        contamination (float): Proportion of outliers in the dataset (0.0 to 0.5).

    Returns:
        np.ndarray: Binary labels (0 for normal, 1 for anomaly).
    """
    num_anomalies = int(contamination * len(d_scores))
    threshold = np.partition(d_scores, -num_anomalies)[
        -num_anomalies
    ]  # Efficient threshold selection
    return (d_scores >= threshold).astype(int)




[docs]
def process_scores_with_percentile(d_scores, contamination):
    """
    Compute the threshold using the percentile method.

    Args:
        d_scores (array-like): Decision scores.
        contamination (float): Proportion of outliers in the dataset (0.0 to 0.5).

    Returns:
        float: Threshold value.
    """
    return np.percentile(d_scores, 100 * (1 - contamination))




[docs]
def process_scores_with_threshold(d_scores):
    """
    Compute the threshold using mean and standard deviation.

    Args:
        d_scores (array-like): Decision scores.

    Returns:
        float: Threshold value.
    """
    return np.mean(d_scores) + 2 * np.std(d_scores)




[docs]
def compute_anomaly_proportion(labels):
    """
    Calculates the proportion of anomalies in the data set.

    Args:
    -----
    labels: Binary anomaly labels (0: normal, 1: anomalous).

    Returns:
    --------
    proportion: Proportion of anomalies in the data.
    """
    proportion = np.sum(labels) / len(labels)
    return proportion




[docs]
def remove_low_confidence_anomalies(d_scores, anomalies, confidence_threshold=0.8):
    """
    Removes anomalies with a decision score below a confidence threshold.
    Args:
    -----
    d_scores: List of decision scores.
    anomalies: Indices of detected anomalies.
    confidence_threshold: Confidence threshold (between 0 and 1).

    Returns:
    --------
    filtered_anomalies: List of anomaly indexes with high confidence.
    """
    filtered_anomalies = [i for i in anomalies if d_scores[i] >= confidence_threshold]
    return filtered_anomalies