Rapidata order manager

RapidataOrderManager #

RapidataOrderManager(openapi_service: OpenAPIService)

Handels everything regarding the orders from creation to retrieval.

Attributes:

Name	Type	Description
`filters`	`RapidataFilters`	The RapidataFilters instance.
`settings`	`RapidataSettings`	The RapidataSettings instance.
`selections`	`RapidataSelections`	The RapidataSelections instance.

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def __init__(self, openapi_service: OpenAPIService):
    self.__openapi_service = openapi_service
    self.filters = RapidataFilters
    self.settings = RapidataSettings
    self.selections = RapidataSelections
    self.__priority: int | None = None
    self.__sticky_state: StickyStateLiteral | None = None
    self.__asset_uploader = AssetUploader(openapi_service)
    logger.debug("RapidataOrderManager initialized")

create_classification_order #

create_classification_order(
    name: str,
    instruction: str,
    answer_options: list[str],
    datapoints: list[str],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    confidence_threshold: float | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a classification order.

With this order you can have a datapoint (image, text, video, audio) be classified into one of the answer options. Each response will be exactly one of the answer options.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order. (Will not be shown to the labeler)	required
`instruction`	`str`	The instruction for how the data should be classified.	required
`answer_options`	`list[str]`	The list of options for the classification.	required
`datapoints`	`list[str]`	The list of datapoints for the classification - each datapoint will be labeled.	required
`data_type`	`str`	The data type of the datapoints. Defaults to "media" (any form of image, video or audio). Other option: "text".	`'media'`
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the classification. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction and options. (Therefore will be different for each datapoint) Will be match up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts for the classification i.e links to the images / videos. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction and options. (Therefore will be different for each datapoint)	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`confidence_threshold`	`float`	The probability threshold for the classification. Defaults to None. If provided, the classification datapoint will stop after the threshold is reached or at the number of responses, whatever happens first.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the classification. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the classification. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the classification. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the classification. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_classification_order(
    self,
    name: str,
    instruction: str,
    answer_options: list[str],
    datapoints: list[str],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    confidence_threshold: float | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a classification order.

    With this order you can have a datapoint (image, text, video, audio) be classified into one of the answer options.
    Each response will be exactly one of the answer options.

    Args:
        name (str): The name of the order. (Will not be shown to the labeler)
        instruction (str): The instruction for how the data should be classified.
        answer_options (list[str]): The list of options for the classification.
        datapoints (list[str]): The list of datapoints for the classification - each datapoint will be labeled.
        data_type (str, optional): The data type of the datapoints. Defaults to "media" (any form of image, video or audio). \n
            Other option: "text".
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the classification. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction and options. (Therefore will be different for each datapoint)
            Will be match up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts for the classification i.e links to the images / videos. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction and options. (Therefore will be different for each datapoint)
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        confidence_threshold (float, optional): The probability threshold for the classification. Defaults to None.\n
            If provided, the classification datapoint will stop after the threshold is reached or at the number of responses, whatever happens first.
        filters (Sequence[RapidataFilter], optional): The list of filters for the classification. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the classification. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the classification. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the classification. Defaults to None.
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span(
        "RapidataOrderManager.create_classification_order"
    ):
        if not isinstance(datapoints, list) or not all(
            isinstance(datapoint, str) for datapoint in datapoints
        ):
            raise ValueError("Datapoints must be a list of strings")

        return self._create_general_order(
            name=name,
            workflow=ClassifyWorkflow(
                instruction=instruction, answer_options=answer_options
            ),
            assets=datapoints,
            data_type=data_type,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            validation_set_id=validation_set_id,
            confidence_threshold=confidence_threshold,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

create_compare_order #

create_compare_order(
    name: str,
    instruction: str,
    datapoints: list[list[str]],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    a_b_names: list[str] | None = None,
    validation_set_id: str | None = None,
    confidence_threshold: float | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a compare order.

With this order you compare two datapoints (image, text, video, audio) and the annotators will choose one of the two based on the instruction.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order. (Will not be shown to the labeler)	required
`instruction`	`str`	The instruction for the comparison. Will be shown along side each datapoint.	required
`datapoints`	`list[list[str]]`	Outher list is the datapoints, inner list is the options for the comparison - each datapoint will be labeled.	required
`data_type`	`str`	The data type of the datapoints. Defaults to "media" (any form of image, video or audio). Other option: "text".	`'media'`
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the comparison. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be matched up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts i.e. links to the images / videos for the comparison. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be matched up with the datapoints using the list index.	`None`
`a_b_names`	`list[str]`	Custom naming for the two opposing models defined by the index in the datapoints list. Defaults to None. If provided has to be a list of exactly two strings. example: `datapoints = [["path_to_image_A", "path_to_image_B"], ["path_to_text_A", "path_to_text_B"]] a_b_naming = ["Model A", "Model B"]` The results will then correctly show "Model A" and "Model B". If not provided, the results will be shown as "A" and "B".	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`confidence_threshold`	`float`	The probability threshold for the comparison. Defaults to None. If provided, the comparison datapoint will stop after the threshold is reached or at the number of responses, whatever happens first.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the comparison. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the comparison. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the comparison. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the comparison. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_compare_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[list[str]],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    a_b_names: list[str] | None = None,
    validation_set_id: str | None = None,
    confidence_threshold: float | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a compare order.

    With this order you compare two datapoints (image, text, video, audio) and the annotators will choose one of the two based on the instruction.

    Args:
        name (str): The name of the order. (Will not be shown to the labeler)
        instruction (str): The instruction for the comparison. Will be shown along side each datapoint.
        datapoints (list[list[str]]): Outher list is the datapoints, inner list is the options for the comparison - each datapoint will be labeled.
        data_type (str, optional): The data type of the datapoints. Defaults to "media" (any form of image, video or audio). \n
            Other option: "text".
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the comparison. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be matched up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts i.e. links to the images / videos for the comparison. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be matched up with the datapoints using the list index.
        a_b_names (list[str], optional): Custom naming for the two opposing models defined by the index in the datapoints list. Defaults to None.\n
            If provided has to be a list of exactly two strings.
            example:
            ```python
            datapoints = [["path_to_image_A", "path_to_image_B"], ["path_to_text_A", "path_to_text_B"]]
            a_b_naming = ["Model A", "Model B"]
            ```
            The results will then correctly show "Model A" and "Model B".
            If not provided, the results will be shown as "A" and "B".
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        confidence_threshold (float, optional): The probability threshold for the comparison. Defaults to None.\n
            If provided, the comparison datapoint will stop after the threshold is reached or at the number of responses, whatever happens first.
        filters (Sequence[RapidataFilter], optional): The list of filters for the comparison. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the comparison. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the comparison. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the comparison. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span("RapidataOrderManager.create_compare_order"):
        if any(not isinstance(datapoint, list) for datapoint in datapoints):
            raise ValueError("Each datapoint must be a list of 2 paths/texts")

        if any(len(set(datapoint)) != 2 for datapoint in datapoints):
            raise ValueError(
                "Each datapoint must contain exactly two unique options"
            )

        if a_b_names is not None and len(a_b_names) != 2:
            raise ValueError(
                "A_B_naming must be a list of exactly two strings or None"
            )

        return self._create_general_order(
            name=name,
            workflow=CompareWorkflow(instruction=instruction, a_b_names=a_b_names),
            assets=datapoints,
            data_type=data_type,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            validation_set_id=validation_set_id,
            confidence_threshold=confidence_threshold,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

create_ranking_order #

create_ranking_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    total_comparison_budget: int,
    responses_per_comparison: int = 1,
    data_type: Literal["media", "text"] = "media",
    random_comparisons_ratio: float = 0.5,
    context: Optional[str] = None,
    media_context: Optional[str] = None,
    validation_set_id: Optional[str] = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
) -> RapidataOrder

Create a ranking order.

With this order you can rank a list of datapoints (image, text, video, audio) based on the instruction. The annotators will be shown two datapoints at a time. The ranking happens in terms of an elo system based on the matchup results.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The question asked from People when They see two datapoints.	required
`datapoints`	`list[str]`	A list of datapoints that will participate in the ranking.	required
`total_comparison_budget`	`int`	The total number of (pairwise-)comparisons that can be made.	required
`responses_per_comparison`	`int`	The number of responses collected per comparison. Defaults to 1.	`1`
`data_type`	`str`	The data type of the datapoints. Defaults to "media" (any form of image, video or audio). Other option: "text".	`'media'`
`random_comparisons_ratio`	`float`	The fraction of random comparisons in the ranking process. The rest will focus on pairing similarly ranked datapoints. Defaults to 0.5 and can be left untouched.	`0.5`
`context`	`str`	The context for all the comparison. Defaults to None. If provided will be shown in addition to the instruction for all the matchups.	`None`
`media_context`	`str`	The media context for all the comparison. Defaults to None. If provided will be shown in addition to the instruction for all the matchups.	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the order. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the order. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the order. Defaults to []. Decides in what order the tasks should be shown.	`[]`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_ranking_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    total_comparison_budget: int,
    responses_per_comparison: int = 1,
    data_type: Literal["media", "text"] = "media",
    random_comparisons_ratio: float = 0.5,
    context: Optional[str] = None,
    media_context: Optional[str] = None,
    validation_set_id: Optional[str] = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
) -> RapidataOrder:
    """
    Create a ranking order.

    With this order you can rank a list of datapoints (image, text, video, audio) based on the instruction.
    The annotators will be shown two datapoints at a time. The ranking happens in terms of an elo system based on the matchup results.

    Args:
        name (str): The name of the order.
        instruction (str): The question asked from People when They see two datapoints.
        datapoints (list[str]): A list of datapoints that will participate in the ranking.
        total_comparison_budget (int): The total number of (pairwise-)comparisons that can be made.
        responses_per_comparison (int, optional): The number of responses collected per comparison. Defaults to 1.
        data_type (str, optional): The data type of the datapoints. Defaults to "media" (any form of image, video or audio). \n
            Other option: "text".
        random_comparisons_ratio (float, optional): The fraction of random comparisons in the ranking process.
            The rest will focus on pairing similarly ranked datapoints. Defaults to 0.5 and can be left untouched.
        context (str, optional): The context for all the comparison. Defaults to None.\n
            If provided will be shown in addition to the instruction for all the matchups.
        media_context (str, optional): The media context for all the comparison. Defaults to None.\n
            If provided will be shown in addition to the instruction for all the matchups.
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        filters (Sequence[RapidataFilter], optional): The list of filters for the order. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the order. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the order. Defaults to []. Decides in what order the tasks should be shown.
    """

    with tracer.start_as_current_span("RapidataOrderManager.create_ranking_order"):
        if len(datapoints) < 2:
            raise ValueError("At least two datapoints are required")

        if len(set(datapoints)) != len(datapoints):
            raise ValueError("Datapoints must be unique")

        return self._create_general_order(
            name=name,
            workflow=RankingWorkflow(
                criteria=instruction,
                total_comparison_budget=total_comparison_budget,
                random_comparisons_ratio=random_comparisons_ratio,
                context=context,
                media_context=media_context,
                file_uploader=self.__asset_uploader,
            ),
            assets=datapoints,
            data_type=data_type,
            responses_per_datapoint=responses_per_comparison,
            validation_set_id=validation_set_id,
            filters=filters,
            selections=selections,
            settings=settings,
        )

create_free_text_order #

create_free_text_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a free text order.

With this order you can have a datapoint (image, text, video, audio) be labeled with free text. The annotators will be shown a datapoint and will be asked to answer a question with free text.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The instruction to answer with free text. Will be shown along side each datapoint.	required
`datapoints`	`list[str]`	The list of datapoints for the free text - each datapoint will be labeled.	required
`data_type`	`str`	The data type of the datapoints. Defaults to "media" (any form of image, video or audio). Other option: "text".	`'media'`
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the free text. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be matched up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts for the free text i.e links to the images / videos. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be matched up with the datapoints using the list index.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the free text. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the free text. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the free text. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the free text. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_free_text_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    data_type: Literal["media", "text"] = "media",
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a free text order.

    With this order you can have a datapoint (image, text, video, audio) be labeled with free text.
    The annotators will be shown a datapoint and will be asked to answer a question with free text.

    Args:
        name (str): The name of the order.
        instruction (str): The instruction to answer with free text. Will be shown along side each datapoint.
        datapoints (list[str]): The list of datapoints for the free text - each datapoint will be labeled.
        data_type (str, optional): The data type of the datapoints. Defaults to "media" (any form of image, video or audio). \n
            Other option: "text".
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the free text. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be matched up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts for the free text i.e links to the images / videos. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be matched up with the datapoints using the list index.
        filters (Sequence[RapidataFilter], optional): The list of filters for the free text. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the free text. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the free text. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the free text. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span(
        "RapidataOrderManager.create_free_text_order"
    ):
        return self._create_general_order(
            name=name,
            workflow=FreeTextWorkflow(instruction=instruction),
            assets=datapoints,
            data_type=data_type,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

create_select_words_order #

create_select_words_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    sentences: list[str],
    responses_per_datapoint: int = 10,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a select words order.

With this order you can have a datapoint (image, text, video, audio) be labeled with a list of words. The annotators will be shown a datapoint as well as a list of sentences split up by spaces. They will then select specific words based on the instruction.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The instruction for how the words should be selected. Will be shown along side each datapoint.	required
`datapoints`	`list[str]`	The list of datapoints for the select words - each datapoint will be labeled.	required
`sentences`	`list[str]`	The list of sentences for the select words - Will be split up by spaces and shown along side each datapoint. Must be the same length as datapoints.	required
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the select words. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the select words. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the select words. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the select words. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_select_words_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    sentences: list[str],
    responses_per_datapoint: int = 10,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a select words order.

    With this order you can have a datapoint (image, text, video, audio) be labeled with a list of words.
    The annotators will be shown a datapoint as well as a list of sentences split up by spaces.
    They will then select specific words based on the instruction.

    Args:
        name (str): The name of the order.
        instruction (str): The instruction for how the words should be selected. Will be shown along side each datapoint.
        datapoints (list[str]): The list of datapoints for the select words - each datapoint will be labeled.
        sentences (list[str]): The list of sentences for the select words - Will be split up by spaces and shown along side each datapoint.\n
            Must be the same length as datapoints.
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        filters (Sequence[RapidataFilter], optional): The list of filters for the select words. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the select words. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the select words. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the select words. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span(
        "RapidataOrderManager.create_select_words_order"
    ):
        return self._create_general_order(
            name=name,
            workflow=SelectWordsWorkflow(
                instruction=instruction,
            ),
            assets=datapoints,
            responses_per_datapoint=responses_per_datapoint,
            validation_set_id=validation_set_id,
            filters=filters,
            selections=selections,
            settings=settings,
            sentences=sentences,
            private_notes=private_notes,
        )

create_locate_order #

create_locate_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a locate order.

With this order you can have people locate specific objects in a datapoint (image, text, video, audio). The annotators will be shown a datapoint and will be asked to select locations based on the instruction.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The instruction what should be located. Will be shown along side each datapoint.	required
`datapoints`	`list[str]`	The list of datapoints for the locate - each datapoint will be labeled.	required
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the locate. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be match up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts for the locate i.e links to the images / videos. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the locate. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the locate. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the locate. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the locate. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_locate_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a locate order.

    With this order you can have people locate specific objects in a datapoint (image, text, video, audio).
    The annotators will be shown a datapoint and will be asked to select locations based on the instruction.

    Args:
        name (str): The name of the order.
        instruction (str): The instruction what should be located. Will be shown along side each datapoint.
        datapoints (list[str]): The list of datapoints for the locate - each datapoint will be labeled.
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the locate. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be match up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts for the locate i.e links to the images / videos. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        filters (Sequence[RapidataFilter], optional): The list of filters for the locate. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the locate. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the locate. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the locate. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span("RapidataOrderManager.create_locate_order"):

        return self._create_general_order(
            name=name,
            workflow=LocateWorkflow(target=instruction),
            assets=datapoints,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            validation_set_id=validation_set_id,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

create_draw_order #

create_draw_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a draw order.

With this order you can have people draw lines on a datapoint (image, text, video, audio). The annotators will be shown a datapoint and will be asked to draw lines based on the instruction.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The instruction for how the lines should be drawn. Will be shown along side each datapoint.	required
`datapoints`	`list[str]`	The list of datapoints for the draw lines - each datapoint will be labeled.	required
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the comparison. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be match up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts for the draw lines i.e links to the images / videos. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the draw lines. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the draw lines. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the draw lines. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the draw lines. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_draw_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a draw order.

    With this order you can have people draw lines on a datapoint (image, text, video, audio).
    The annotators will be shown a datapoint and will be asked to draw lines based on the instruction.

    Args:
        name (str): The name of the order.
        instruction (str): The instruction for how the lines should be drawn. Will be shown along side each datapoint.
        datapoints (list[str]): The list of datapoints for the draw lines - each datapoint will be labeled.
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the comparison. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be match up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts for the draw lines i.e links to the images / videos. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        filters (Sequence[RapidataFilter], optional): The list of filters for the draw lines. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the draw lines. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the draw lines. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the draw lines. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """
    with tracer.start_as_current_span("RapidataOrderManager.create_draw_order"):

        return self._create_general_order(
            name=name,
            workflow=DrawWorkflow(target=instruction),
            assets=datapoints,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            validation_set_id=validation_set_id,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

create_timestamp_order #

create_timestamp_order(
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder

Create a timestamp order.

Warning

This order is currently not fully supported and may give unexpected results.

With this order you can have people mark specific timestamps in a datapoint (video, audio). The annotators will be shown a datapoint and will be asked to select a timestamp based on the instruction.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order.	required
`instruction`	`str`	The instruction for the timestamp task. Will be shown along side each datapoint.	required
`datapoints`	`list[str]`	The list of datapoints for the timestamp - each datapoint will be labeled.	required
`responses_per_datapoint`	`int`	The number of responses that will be collected per datapoint. Defaults to 10.	`10`
`contexts`	`list[str]`	The list of contexts for the comparison. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint) Will be match up with the datapoints using the list index.	`None`
`media_contexts`	`list[str]`	The list of media contexts for the timestamp i.e links to the images / videos. Defaults to None. If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)	`None`
`validation_set_id`	`str`	The ID of the validation set. Defaults to None. If provided, one validation task will be shown infront of the datapoints that will be labeled.	`None`
`filters`	`Sequence[RapidataFilter]`	The list of filters for the timestamp. Defaults to []. Decides who the tasks should be shown to.	`[]`
`settings`	`Sequence[RapidataSetting]`	The list of settings for the timestamp. Defaults to []. Decides how the tasks should be shown.	`[]`
`selections`	`Sequence[RapidataSelection]`	The list of selections for the timestamp. Defaults to []. Decides in what order the tasks should be shown.	`[]`
`private_notes`	`list[str]`	The list of private notes for the timestamp. Defaults to None. If provided has to be the same length as datapoints. This will NOT be shown to the labelers but will be included in the result purely for your own reference.	`None`

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def create_timestamp_order(
    self,
    name: str,
    instruction: str,
    datapoints: list[str],
    responses_per_datapoint: int = 10,
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    validation_set_id: str | None = None,
    filters: Sequence[RapidataFilter] = [],
    settings: Sequence[RapidataSetting] = [],
    selections: Sequence[RapidataSelection] = [],
    private_notes: list[str] | None = None,
) -> RapidataOrder:
    """Create a timestamp order.

    Warning:
        This order is currently not fully supported and may give unexpected results.

    With this order you can have people mark specific timestamps in a datapoint (video, audio).
    The annotators will be shown a datapoint and will be asked to select a timestamp based on the instruction.

    Args:
        name (str): The name of the order.
        instruction (str): The instruction for the timestamp task. Will be shown along side each datapoint.
        datapoints (list[str]): The list of datapoints for the timestamp - each datapoint will be labeled.
        responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
        contexts (list[str], optional): The list of contexts for the comparison. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
            Will be match up with the datapoints using the list index.
        media_contexts (list[str], optional): The list of media contexts for the timestamp i.e links to the images / videos. Defaults to None.\n
            If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
        validation_set_id (str, optional): The ID of the validation set. Defaults to None.\n
            If provided, one validation task will be shown infront of the datapoints that will be labeled.
        filters (Sequence[RapidataFilter], optional): The list of filters for the timestamp. Defaults to []. Decides who the tasks should be shown to.
        settings (Sequence[RapidataSetting], optional): The list of settings for the timestamp. Defaults to []. Decides how the tasks should be shown.
        selections (Sequence[RapidataSelection], optional): The list of selections for the timestamp. Defaults to []. Decides in what order the tasks should be shown.
        private_notes (list[str], optional): The list of private notes for the timestamp. Defaults to None.\n
            If provided has to be the same length as datapoints.\n
            This will NOT be shown to the labelers but will be included in the result purely for your own reference.
    """

    with tracer.start_as_current_span(
        "RapidataOrderManager.create_timestamp_order"
    ):
        return self._create_general_order(
            name=name,
            workflow=TimestampWorkflow(instruction=instruction),
            assets=datapoints,
            responses_per_datapoint=responses_per_datapoint,
            contexts=contexts,
            media_contexts=media_contexts,
            validation_set_id=validation_set_id,
            filters=filters,
            selections=selections,
            settings=settings,
            private_notes=private_notes,
        )

get_order_by_id #

get_order_by_id(order_id: str) -> RapidataOrder

Get an order by ID.

Parameters:

Name	Type	Description	Default
`order_id`	`str`	The ID of the order.	required

Returns:

Name	Type	Description
`RapidataOrder`	`RapidataOrder`	The Order instance.

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def get_order_by_id(self, order_id: str) -> RapidataOrder:
    """Get an order by ID.

    Args:
        order_id (str): The ID of the order.

    Returns:
        RapidataOrder: The Order instance.
    """
    with tracer.start_as_current_span("RapidataOrderManager.get_order_by_id"):
        order = self.__openapi_service.order_api.order_order_id_get(order_id)

        return RapidataOrder(
            order_id=order_id,
            name=order.order_name,
            openapi_service=self.__openapi_service,
        )

find_orders #

find_orders(
    name: str = "", amount: int = 10
) -> list[RapidataOrder]

Find your recent orders given criteria. If nothing is provided, it will return the most recent order.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the order - matching order will contain the name. Defaults to "" for any order.	`''`
`amount`	`int`	The amount of orders to return. Defaults to 10.	`10`

Returns:

Type	Description
`list[RapidataOrder]`	list[RapidataOrder]: A list of RapidataOrder instances.

Source code in src/rapidata/rapidata_client/order/rapidata_order_manager.py

def find_orders(self, name: str = "", amount: int = 10) -> list[RapidataOrder]:
    """Find your recent orders given criteria. If nothing is provided, it will return the most recent order.

    Args:
        name (str, optional): The name of the order - matching order will contain the name. Defaults to "" for any order.
        amount (int, optional): The amount of orders to return. Defaults to 10.

    Returns:
        list[RapidataOrder]: A list of RapidataOrder instances.
    """
    with tracer.start_as_current_span("RapidataOrderManager.find_orders"):
        order_page_result = self.__openapi_service.order_api.orders_get(
            QueryModel(
                page=PageInfo(index=1, size=amount),
                filter=RootFilter(
                    filters=[
                        Filter(
                            field="OrderName",
                            operator=FilterOperator.CONTAINS,
                            value=name,
                        )
                    ]
                ),
                sortCriteria=[
                    SortCriterion(
                        direction=SortDirection.DESC, propertyName="OrderDate"
                    )
                ],
            )
        )

        orders = [
            self.get_order_by_id(order.id) for order in order_page_result.items
        ]
        return orders