Skip to content

Duo Guard

any_guardrail.guardrails.duo_guard.duo_guard

DuoGuard

Bases: ThreeStageGuardrail[AnyDict, AnyDict, bool, dict[str, bool], float]

Guardrail that classifies text based on the categories in DUOGUARD_CATEGORIES.

For more information, please see the model card:

Source code in src/any_guardrail/guardrails/duo_guard/duo_guard.py
class DuoGuard(ThreeStageGuardrail[AnyDict, AnyDict, bool, dict[str, bool], float]):
    """Guardrail that classifies text based on the categories in DUOGUARD_CATEGORIES.

    For more information, please see the model card:

    - [DuoGuard](https://huggingface.co/collections/DuoGuard/duoguard-models-67a29ad8bd579a404e504d21).
    """

    SUPPORTED_MODELS: ClassVar = [
        "DuoGuard/DuoGuard-0.5B",
        "DuoGuard/DuoGuard-1B-Llama-3.2-transfer",
        "DuoGuard/DuoGuard-1.5B-transfer",
    ]

    MODELS_TO_TOKENIZER: ClassVar = {
        "DuoGuard/DuoGuard-0.5B": "Qwen/Qwen2.5-0.5B",
        "DuoGuard/DuoGuard-1B-Llama-3.2-transfer": "meta-llama/Llama-3.2-1B",
        "DuoGuard/DuoGuard-1.5B-transfer": "Qwen/Qwen2.5-1.5B",
    }

    def __init__(
        self,
        model_id: str | None = None,
        threshold: float = DUOGUARD_DEFAULT_THRESHOLD,
        provider: StandardProvider | None = None,
    ) -> None:
        """Initialize the DuoGuard model."""
        self.model_id = default(model_id, self.SUPPORTED_MODELS)
        self.threshold = threshold
        self.provider = provider or HuggingFaceProvider(tokenizer_id=self.MODELS_TO_TOKENIZER[self.model_id])
        self.provider.load_model(self.model_id)
        self.provider.tokenizer.pad_token = self.provider.tokenizer.eos_token  # type: ignore[attr-defined]

    def _pre_processing(self, input_text: str) -> StandardPreprocessOutput:
        return self.provider.pre_process(input_text)

    def _inference(self, model_inputs: StandardPreprocessOutput) -> StandardInferenceOutput:
        return self.provider.infer(model_inputs)

    def _post_processing(self, model_outputs: StandardInferenceOutput) -> GuardrailOutput[bool, dict[str, bool], float]:
        probabilities = sigmoid(model_outputs.data["logits"][0]).tolist()
        predicted_labels = {
            category: prob > self.threshold for category, prob in zip(DUOGUARD_CATEGORIES, probabilities, strict=True)
        }
        return GuardrailOutput(
            valid=not any(predicted_labels.values()), explanation=predicted_labels, score=max(probabilities)
        )
__init__(model_id=None, threshold=DUOGUARD_DEFAULT_THRESHOLD, provider=None)

Initialize the DuoGuard model.

Source code in src/any_guardrail/guardrails/duo_guard/duo_guard.py
def __init__(
    self,
    model_id: str | None = None,
    threshold: float = DUOGUARD_DEFAULT_THRESHOLD,
    provider: StandardProvider | None = None,
) -> None:
    """Initialize the DuoGuard model."""
    self.model_id = default(model_id, self.SUPPORTED_MODELS)
    self.threshold = threshold
    self.provider = provider or HuggingFaceProvider(tokenizer_id=self.MODELS_TO_TOKENIZER[self.model_id])
    self.provider.load_model(self.model_id)
    self.provider.tokenizer.pad_token = self.provider.tokenizer.eos_token  # type: ignore[attr-defined]