Skip to content

Rapids manager

RapidsManager #

RapidsManager()

Can be used to build different types of rapids. That can then be added to Validation sets

Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def __init__(self):
    pass

classification_rapid #

classification_rapid(
    instruction: str,
    answer_options: list[str],
    datapoint: str,
    truths: list[str],
    data_type: str = MEDIA,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a classification rapid

Parameters:

Name Type Description Default
instruction str

The instruction/question to be shown to the labeler.

required
answer_options list[str]

The options that the labeler can choose from to answer the question.

required
datapoint str

The datapoint that the labeler will be labeling.

required
truths list[str]

The correct answers to the question.

required
data_type str

The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.

MEDIA
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def classification_rapid(self,
        instruction: str,
        answer_options: list[str],
        datapoint: str,
        truths: list[str],
        data_type: str = RapidataDataTypes.MEDIA,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None,
) -> Rapid:
    """Build a classification rapid

    Args:
        instruction (str): The instruction/question to be shown to the labeler.
        answer_options (list[str]): The options that the labeler can choose from to answer the question.
        datapoint (str): The datapoint that the labeler will be labeling.
        truths (list[str]): The correct answers to the question.
        data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    if data_type == RapidataDataTypes.MEDIA:
        asset = MediaAsset(datapoint)
    elif data_type == RapidataDataTypes.TEXT:
        asset = TextAsset(datapoint)
    else:
        raise ValueError(f"Unsupported data type: {data_type}")

    if not all(truth in answer_options for truth in truths):
        raise ValueError("Truths must be part of the answer options")

    payload = ClassifyPayload(
        _t="ClassifyPayload", possibleCategories=answer_options, title=instruction
    )
    model_truth = AttachCategoryTruth(
        correctCategories=truths, _t="AttachCategoryTruth"
    )

    return Rapid(
            asset=asset,
            metadata=metadata,
            explanation=explanation,
            payload=payload,
            truth=model_truth,
            randomCorrectProbability=len(truths) / len(answer_options)
         )

compare_rapid #

compare_rapid(
    instruction: str,
    truth: str,
    datapoint: list[str],
    data_type: str = MEDIA,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a compare rapid

Parameters:

Name Type Description Default
instruction str

The instruction that the labeler will be comparing the assets on.

required
truth str

The correct answer to the comparison. (has to be one of the assets)

required
datapoint list[str]

The two assets that the labeler will be comparing.

required
data_type str

The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.

MEDIA
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def compare_rapid(self,
        instruction: str,
        truth: str,
        datapoint: list[str],
        data_type: str = RapidataDataTypes.MEDIA,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None,
) -> Rapid:
    """Build a compare rapid

    Args:
        instruction (str): The instruction that the labeler will be comparing the assets on.
        truth (str): The correct answer to the comparison. (has to be one of the assets)
        datapoint (list[str]): The two assets that the labeler will be comparing.
        data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    if data_type == RapidataDataTypes.MEDIA:
        assets = [MediaAsset(image) for image in datapoint]
    elif data_type == RapidataDataTypes.TEXT:
        assets = [TextAsset(text) for text in datapoint]
    else:
        raise ValueError(f"Unsupported data type: {data_type}")

    asset = MultiAsset(assets)

    payload = ComparePayload(_t="ComparePayload", criteria=instruction)
    # take only last part of truth path
    truth = os.path.basename(truth)
    model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)

    if len(asset) != 2:
        raise ValueError("Compare rapid requires exactly two media paths")


    return Rapid(
            asset=asset,
            truth=model_truth,
            metadata=metadata,
            payload=payload,
            explanation=explanation,
            randomCorrectProbability= 1 / len(asset.assets)
            )

select_words_rapid #

select_words_rapid(
    instruction: str,
    truths: list[int],
    datapoint: str,
    sentence: str,
    required_precision: float = 1,
    required_completeness: float = 1,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a select words rapid

Parameters:

Name Type Description Default
instruction str

The instruction for the labeler.

required
truths list[int]

The indices of the words that are the correct answers.

required
datapoint str

The asset that the labeler will be selecting words from.

required
sentence str

The sentence that the labeler will be selecting words from. (split up by spaces)

required
required_precision float

The required precision for the labeler to get the rapid correct (minimum ratio of the words selected that need to be correct). defaults to 1. (no wrong words can be selected)

1
required_completeness float

The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). defaults to 1. (all correct words need to be selected)

1
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def select_words_rapid(self,
        instruction: str,
        truths: list[int],
        datapoint: str,
        sentence: str,
        required_precision: float = 1,
        required_completeness: float = 1,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None,
) -> Rapid:
    """Build a select words rapid

    Args:
        instruction (str): The instruction for the labeler.
        truths (list[int]): The indices of the words that are the correct answers.
        datapoint (str): The asset that the labeler will be selecting words from.
        sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
        required_precision (float): The required precision for the labeler to get the rapid correct (minimum ratio of the words selected that need to be correct). defaults to 1. (no wrong words can be selected)
        required_completeness (float): The required completeness for the labeler to get the rapid correct (miminum ratio of total correct words selected). defaults to 1. (all correct words need to be selected)
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    asset = MediaAsset(datapoint)
    transcription_words = [
        TranscriptionWord(word=word, wordIndex=i)
        for i, word in enumerate(sentence.split(" "))
    ]

    correct_transcription_words: list[TranscriptionWord] = []
    for index in truths:
        correct_transcription_words.append(
            TranscriptionWord(word=transcription_words[index].word, wordIndex=index)
        )

    payload = TranscriptionPayload(
        _t="TranscriptionPayload", title=instruction, transcription=transcription_words
    )

    model_truth = TranscriptionTruth(
        _t="TranscriptionTruth",
        correctWords=correct_transcription_words,
        requiredPrecision=required_precision,
        requiredCompleteness=required_completeness,
    )

    return Rapid(
            payload=payload,
            truth=model_truth,
            asset=asset,
            metadata=metadata,
            explanation=explanation,
            randomCorrectProbability= len(correct_transcription_words) / len(transcription_words)
        )

locate_rapid #

locate_rapid(
    instruction: str,
    truths: list[Box],
    datapoint: str,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a locate rapid

Parameters:

Name Type Description Default
instruction str

The instruction on what the labeler should do.

required
truths list[Box]

The bounding boxes of the object that the labeler ought to be locating.

required
datapoint str

The asset that the labeler will be locating the object in.

required
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def locate_rapid(self,
        instruction: str,
        truths: list[Box],
        datapoint: str,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None,
) -> Rapid:
    """Build a locate rapid

    Args:
        instruction (str): The instruction on what the labeler should do.
        truths (list[Box]): The bounding boxes of the object that the labeler ought to be locating.
        datapoint (str): The asset that the labeler will be locating the object in.
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    asset = MediaAsset(datapoint)
    payload = LocatePayload(
        _t="LocatePayload", target=instruction
    )

    img_dimensions = asset.get_image_dimension()

    if not img_dimensions:
        raise ValueError("Failed to get image dimensions")

    model_truth = LocateBoxTruth(
        _t="LocateBoxTruth", 
        boundingBoxes=[BoxShape(
            _t="BoxShape",
            xMin=truth.x_min / img_dimensions[0] * 100,
            xMax=truth.x_max / img_dimensions[0] * 100,
            yMax=truth.y_max / img_dimensions[1] * 100,
            yMin=truth.y_min / img_dimensions[1] * 100,
        ) for truth in truths]
    )

    coverage = self._calculate_boxes_coverage(truths, img_dimensions[0], img_dimensions[1])

    return Rapid(
            payload=payload,
            truth=model_truth,
            asset=asset,
            metadata=metadata,
            explanation=explanation,
            randomCorrectProbability=coverage                
            )

draw_rapid #

draw_rapid(
    instruction: str,
    truths: list[Box],
    datapoint: str,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a draw rapid

Parameters:

Name Type Description Default
instruction str

The instructions on what the labeler

required
truths list[Box]

The bounding boxes of the object that the labeler ought to be drawing.

required
datapoint str

The asset that the labeler will be drawing the object in.

required
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def draw_rapid(self,
        instruction: str,
        truths: list[Box],
        datapoint: str,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None
) -> Rapid:
    """Build a draw rapid

    Args:
        instruction (str): The instructions on what the labeler
        truths (list[Box]): The bounding boxes of the object that the labeler ought to be drawing.
        datapoint (str): The asset that the labeler will be drawing the object in.
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    asset = MediaAsset(datapoint)

    payload = LinePayload(
        _t="LinePayload", target=instruction
    )

    img_dimensions = asset.get_image_dimension()

    if not img_dimensions:
        raise ValueError("Failed to get image dimensions")

    model_truth = BoundingBoxTruth(
        _t="BoundingBoxTruth", 
        xMax=truths[0].x_max / img_dimensions[0],
        xMin=truths[0].x_min / img_dimensions[0],
        yMax=truths[0].y_max / img_dimensions[1],
        yMin=truths[0].y_min / img_dimensions[1],
    )

    coverage = self._calculate_boxes_coverage(truths, img_dimensions[0], img_dimensions[1])

    return Rapid(
        payload=payload,
        truth=model_truth,
        asset=asset,
        metadata=metadata,
        explanation=explanation,
        randomCorrectProbability=coverage
    )

timestamp_rapid #

timestamp_rapid(
    instruction: str,
    truths: list[tuple[int, int]],
    datapoint: str,
    metadata: Sequence[Metadata] = [],
    explanation: str | None = None,
) -> Rapid

Build a timestamp rapid

Parameters:

Name Type Description Default
instruction str

The instruction for the labeler.

required
truths list[tuple[int, int]]

The possible accepted timestamps intervals for the labeler (in miliseconds). The first element of the tuple is the start of the interval and the second element is the end of the interval.

required
datapoint str

The asset that the labeler will be timestamping.

required
metadata Sequence[Metadata]

The metadata that is attached to the rapid. Defaults to [].

[]
Source code in src/rapidata/rapidata_client/validation/rapids/rapids_manager.py
def timestamp_rapid(self,
        instruction: str,
        truths: list[tuple[int, int]],
        datapoint: str,
        metadata: Sequence[Metadata] = [],
        explanation: str | None = None
) -> Rapid:
    """Build a timestamp rapid

    Args:
        instruction (str): The instruction for the labeler.
        truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
            The first element of the tuple is the start of the interval and the second element is the end of the interval.
        datapoint (str): The asset that the labeler will be timestamping.
        metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
    """

    asset = MediaAsset(datapoint)

    for truth in truths:
        if len(truth) != 2:
            raise ValueError("The truths per datapoint must be a tuple of exactly two integers.")
        if truth[0] > truth[1]:
            raise ValueError("The start of the interval must be smaller than the end of the interval.")

    payload = ScrubPayload(
        _t="ScrubPayload", 
        target=instruction
    )

    model_truth = ScrubTruth(
        _t="ScrubTruth",
        validRanges=[ScrubRange(
            start=truth[0],
            end=truth[1]
        ) for truth in truths]
    )

    return Rapid(
            payload=payload,
            truth=model_truth,
            asset=asset,
            metadata=metadata,
            explanation=explanation,
            randomCorrectProbability=self._calculate_coverage_ratio(asset.get_duration(), truths),
            )