Skip to main content
guardrails hub install hub://guardrails/similar_to_previous_values --quiet
    Installing hub://guardrails/similar_to_previous_values...
/Users/calebcourier/Projects/gr-mono/guardrails/docs/examples/.venv/lib/python3.12/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
warnings.warn(
✅Successfully installed guardrails/similar_to_previous_values!


Check whether a value is similar to a set of other values

Using the SimilarToPreviousValues validator

This validator validates whether a new value is similar to a set of previously known values. It is useful for checking whether a new value is within a distribution of known values. It supports both integer and string values.

For integer values, this validator checks whether the value lies within the specified standard deviations of the mean of the previous values. (Assumes that the previous values are normally distributed.)

For string values, this validator checks whether the average semantic similarity between the generated value and the previous values is less than a threshold.

# Create the Guard with the SimilarToList validator
from typing import Union
from pydantic import BaseModel, Field
from guardrails import Guard
from guardrails.hub import SimilarToPreviousValues


class MyModel(BaseModel):
value: int = Field(validators=[SimilarToPreviousValues(standard_deviations=2, threshold=0.8, on_fail="fix")])

guard = Guard.from_pydantic(MyModel)
    /Users/calebcourier/Projects/gr-mono/guardrails/docs/examples/.venv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from tqdm.autonotebook import tqdm, trange

Test with integer values

# Test with a value that is within the distribution
output = guard.parse(
llm_output='{ "value": 3 }',
metadata={"prev_values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
)

print(output.validated_output)
    {'value': 3}

As 3 is within the distribution of the given prev_values, the validator returns 3 as it is.

# Test with a value that is outside the distribution
output = guard.parse(
llm_output='{ "value": 300 }',
metadata={"prev_values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
)

print(output.validated_output)
    None

As 300 is not within the distribution of the given prev_values, the validator returns None.

Test with string values

# Define embed function
# Create an embedding function that uses openAI Ada to embed the text.
import numpy as np
import openai

# Define the embed function
def embed_function(text: str) -> np.ndarray:
"""Embed the text using openAI Ada."""
response = openai.embeddings.create(
model="text-embedding-ada-002",
input=text,
)
embeddings = response.data[0].embedding
# response["data"][0]["embedding"]
return np.array(embeddings)

guard = Guard().use(SimilarToPreviousValues(standard_deviations=2, threshold=0.8, on_fail="fix"))
# Test with a value that is similar to prev values
# As it is, it will return that value
output = guard.parse(
llm_output="cisco",
metadata={
"prev_values": ["broadcom", "paypal"],
"embed_function": embed_function,
},
)

print(output.validated_output)
    cisco
# Test with a value that is not similar to prev values
# As it is not, it will return None
output = guard.parse(
llm_output="taj mahal",
metadata={
"prev_values": ["broadcom", "paypal"],
"embed_function": embed_function,
},
)

print(output.validated_output)
    None