Rapidata audience

RapidataAudience #

RapidataAudience(
    id: str,
    name: str,
    filters: list[RapidataFilter],
    openapi_service: OpenAPIService,
)

Represents a Rapidata audience.

An audience is a group of annotators that can be recruited based on example tasks and assigned jobs.

Attributes:

Name	Type	Description
`id`	`str`	The unique identifier of the audience.
`name`	`str`	The name of the audience.
`filters`	`list[RapidataFilter]`	The list of filters applied to the audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def __init__(
    self,
    id: str,
    name: str,
    filters: list[RapidataFilter],
    openapi_service: OpenAPIService,
):
    self.id = id
    self._name = name
    self._filters = filters
    self._openapi_service = openapi_service
    self._example_handler = AudienceExampleHandler(openapi_service, id)
    self._recruiting_started = False

name `property` #

name: str

The name of the audience.

filters `property` #

filters: list[RapidataFilter]

The list of filters applied to the audience.

delete #

delete() -> None

Deletes the audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def delete(self) -> None:
    """Deletes the audience."""
    with tracer.start_as_current_span("RapidataAudience.delete"):
        logger.info("Deleting audience '%s'", self)
        self._openapi_service.audience.audience_api.audience_audience_id_delete(
            self.id
        )
        logger.debug("Audience '%s' has been deleted.", self)
        managed_print(f"Audience '{self}' has been deleted.")

update_filters #

update_filters(
    filters: list[RapidataFilter],
) -> RapidataAudience

Update the filters for this audience.

Parameters:

Name	Type	Description	Default
`filters`	`list[RapidataFilter]`	The new list of filters to apply to the audience.	required

Returns:

Name	Type	Description
`RapidataAudience`	`RapidataAudience`	The updated audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def update_filters(self, filters: list[RapidataFilter]) -> RapidataAudience:
    """Update the filters for this audience.

    Args:
        filters (list[RapidataFilter]): The new list of filters to apply to the audience.

    Returns:
        RapidataAudience: The updated audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.update_filters"):
        from rapidata.api_client.models.update_audience_endpoint_input import (
            UpdateAudienceEndpointInput,
        )

        logger.debug(f"Updating filters for audience: {self.id} to {filters}")
        self._openapi_service.audience.audience_api.audience_audience_id_patch(
            audience_id=self.id,
            update_audience_endpoint_input=UpdateAudienceEndpointInput(
                filters=[filter._to_audience_model() for filter in filters],
            ),
        )
        self._filters = filters
        return self

update_name #

update_name(name: str) -> RapidataAudience

Update the name of this audience.

Parameters:

Name	Type	Description	Default
`name`	`str`	The new name for the audience.	required

Returns:

Name	Type	Description
`RapidataAudience`	`RapidataAudience`	The updated audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def update_name(self, name: str) -> RapidataAudience:
    """Update the name of this audience.

    Args:
        name (str): The new name for the audience.

    Returns:
        RapidataAudience: The updated audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.update_name"):
        from rapidata.api_client.models.update_audience_endpoint_input import (
            UpdateAudienceEndpointInput,
        )

        logger.debug(f"Updating name for audience: {self.id} to {name}")
        self._openapi_service.audience.audience_api.audience_audience_id_patch(
            audience_id=self.id,
            update_audience_endpoint_input=UpdateAudienceEndpointInput(name=name),
        )
        self._name = name
        return self

assign_job #

assign_job(
    job_definition: RapidataJobDefinition,
) -> RapidataJob

Assign a job to this audience.

Creates a new job instance from the job definition and assigns it to this audience. The job will be executed by the annotators in this audience.

Parameters:

Name	Type	Description	Default
`job_definition`	`JobDefinition`	The job definition to create and assign to the audience.	required

Returns:

Name	Type	Description
`RapidataJob`	`RapidataJob`	The created job instance.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def assign_job(self, job_definition: RapidataJobDefinition) -> RapidataJob:
    """Assign a job to this audience.

    Creates a new job instance from the job definition and assigns it to this audience.
    The job will be executed by the annotators in this audience.

    Args:
        job_definition (JobDefinition): The job definition to create and assign to the audience.

    Returns:
        RapidataJob: The created job instance.
    """
    with tracer.start_as_current_span("RapidataAudience.assign_job"):
        from rapidata.api_client.models.create_job_endpoint_input import (
            CreateJobEndpointInput,
        )
        from rapidata.rapidata_client.job.rapidata_job import RapidataJob
        from datetime import datetime

        logger.debug(f"Assigning job to audience: {self.id}")
        response = self._openapi_service.order.job_api.job_post(
            create_job_endpoint_input=CreateJobEndpointInput(
                audienceId=self.id,
                jobDefinitionId=job_definition.id,
            ),
        )
        job = RapidataJob(
            job_id=response.job_id,
            name=job_definition.name,
            audience_id=self.id,
            created_at=datetime.now(),
            definition_id=job_definition.id,
            openapi_service=self._openapi_service,
        )
        logger.info(f"Assigned job to audience: {self.id}")
        return job

add_classification_example #

add_classification_example(
    instruction: str,
    answer_options: list[str],
    datapoint: str,
    truth: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience

Add a classification training example to this audience.

Training examples help annotators understand the task by showing them a sample datapoint with the correct answer before they start labeling.

Parameters:

Name	Type	Description	Default
`instruction`	`str`	The instruction for how the data should be classified.	required
`answer_options`	`list[str]`	The list of possible answer options for the classification.	required
`datapoint`	`str`	The datapoint (URL or path) to use as the training example.	required
`truth`	`list[str]`	The correct answer(s) for this training example.	required
`data_type`	`Literal['media', 'text']`	The data type of the datapoint. Defaults to "media".	`'media'`
`context`	`str`	Additional text context to display with the example. Defaults to None.	`None`
`media_context`	`str`	Additional media (URL or path) to display with the example. Defaults to None.	`None`
`explanation`	`str`	An explanation of why the truth is correct. Defaults to None.	`None`

Returns:

Name	Type	Description
`RapidataAudience`	`RapidataAudience`	The audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def add_classification_example(
    self,
    instruction: str,
    answer_options: list[str],
    datapoint: str,
    truth: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience:
    """Add a classification training example to this audience.

    Training examples help annotators understand the task by showing them
    a sample datapoint with the correct answer before they start labeling.

    Args:
        instruction (str): The instruction for how the data should be classified.
        answer_options (list[str]): The list of possible answer options for the classification.
        datapoint (str): The datapoint (URL or path) to use as the training example.
        truth (list[str]): The correct answer(s) for this training example.
        data_type (Literal["media", "text"], optional): The data type of the datapoint. Defaults to "media".
        context (str, optional): Additional text context to display with the example. Defaults to None.
        media_context (str, optional): Additional media (URL or path) to display with the example. Defaults to None.
        explanation (str, optional): An explanation of why the truth is correct. Defaults to None.

    Returns:
        RapidataAudience: The audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span(
        "RapidataAudience.add_classification_example"
    ):
        logger.debug(
            f"Adding classification example to audience: {self.id} with instruction: {instruction}, answer_options: {answer_options}, datapoint: {datapoint}, truths: {truth}, data_type: {data_type}, context: {context}, media_context: {media_context}, explanation: {explanation}"
        )
        self._example_handler.add_classification_example(
            instruction,
            answer_options,
            datapoint,
            truth,
            data_type,
            context,
            media_context,
            explanation,
        )
        self._try_start_recruiting()
        return self

add_compare_example #

add_compare_example(
    instruction: str,
    truth: str,
    datapoint: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience

Add a comparison training example to this audience.

Training examples help annotators understand the task by showing them a sample comparison with the correct answer before they start labeling.

Parameters:

Name	Type	Description	Default
`instruction`	`str`	The instruction for the comparison task.	required
`truth`	`str`	The correct answer for this training example (which option should be selected).	required
`datapoint`	`list[str]`	A list of exactly two datapoints (URLs or paths) to compare.	required
`data_type`	`Literal['media', 'text']`	The data type of the datapoints. Defaults to "media".	`'media'`
`context`	`str`	Additional text context to display with the example. Defaults to None.	`None`
`media_context`	`str`	Additional media (URL or path) to display with the example. Defaults to None.	`None`
`explanation`	`str`	An explanation of why the truth is correct. Defaults to None.	`None`

Returns:

Name	Type	Description
`RapidataAudience`	`RapidataAudience`	The audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def add_compare_example(
    self,
    instruction: str,
    truth: str,
    datapoint: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience:
    """Add a comparison training example to this audience.

    Training examples help annotators understand the task by showing them
    a sample comparison with the correct answer before they start labeling.

    Args:
        instruction (str): The instruction for the comparison task.
        truth (str): The correct answer for this training example (which option should be selected).
        datapoint (list[str]): A list of exactly two datapoints (URLs or paths) to compare.
        data_type (Literal["media", "text"], optional): The data type of the datapoints. Defaults to "media".
        context (str, optional): Additional text context to display with the example. Defaults to None.
        media_context (str, optional): Additional media (URL or path) to display with the example. Defaults to None.
        explanation (str, optional): An explanation of why the truth is correct. Defaults to None.

    Returns:
        RapidataAudience: The audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.add_compare_example"):
        logger.debug(
            f"Adding compare example to audience: {self.id} with instruction: {instruction}, truth: {truth}, datapoint: {datapoint}, data_type: {data_type}, context: {context}, media_context: {media_context}, explanation: {explanation}"
        )
        self._example_handler.add_compare_example(
            instruction,
            truth,
            datapoint,
            data_type,
            context,
            media_context,
            explanation,
        )
        self._try_start_recruiting()
        return self

find_jobs #

find_jobs(
    name: str = "", amount: int = 10, page: int = 1
) -> list[RapidataJob]

Find jobs assigned to this audience.

Parameters:

Name	Type	Description	Default
`name`	`str`	Filter jobs by name (matching jobs will contain this string). Defaults to "" for any job.	`''`
`amount`	`int`	The maximum number of jobs to return. Defaults to 10.	`10`
`page`	`int`	The page of jobs to return. Defaults to 1.	`1`

Returns:

Type	Description
`list[RapidataJob]`	list[RapidataJob]: A list of RapidataJob instances assigned to this audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def find_jobs(
    self, name: str = "", amount: int = 10, page: int = 1
) -> list[RapidataJob]:
    """Find jobs assigned to this audience.

    Args:
        name (str, optional): Filter jobs by name (matching jobs will contain this string). Defaults to "" for any job.
        amount (int, optional): The maximum number of jobs to return. Defaults to 10.
        page (int, optional): The page of jobs to return. Defaults to 1.

    Returns:
        list[RapidataJob]: A list of RapidataJob instances assigned to this audience.
    """
    with tracer.start_as_current_span("RapidataAudience.find_jobs"):
        from rapidata.rapidata_client.job.rapidata_job import RapidataJob
        from rapidata.api_client.models.query_model import QueryModel
        from rapidata.api_client.models.root_filter import RootFilter
        from rapidata.api_client.models.filter import Filter
        from rapidata.api_client.models.filter_operator import FilterOperator
        from rapidata.api_client.models.logic_operator import LogicOperator
        from rapidata.api_client.models.pagination import Pagination
        from rapidata.api_client.models.sort_criterion import SortCriterion
        from rapidata.api_client.models.sort_direction import SortDirection

        response = self._openapi_service.order.job_api.jobs_get(
            request=QueryModel(
                page=Pagination(index=page, size=amount),
                filter=RootFilter(
                    filters=[
                        Filter(
                            field="AudienceId",
                            operator=FilterOperator.EQ,
                            value=self.id,
                        ),
                        Filter(
                            field="Name",
                            operator=FilterOperator.CONTAINS,
                            value=name,
                        ),
                    ],
                    logic=LogicOperator.AND,
                ),
                sortCriteria=[
                    SortCriterion(
                        direction=SortDirection.DESC, propertyName="CreatedAt"
                    )
                ],
            ),
        )
        return [
            RapidataJob(
                job_id=job.job_id,
                name=job.name,
                audience_id=job.audience_id,
                created_at=job.created_at,
                definition_id=job.definition_id,
                openapi_service=self._openapi_service,
                pipeline_id=job.pipeline_id,
            )
            for job in response.items
        ]

get_examples #

get_examples(amount: int = 10, page: int = 1) -> DataFrame

Get the examples for this audience as a DataFrame.

Returns a DataFrame with columns: asset, truth, context, contextAsset. Asset URLs are fully qualified with the environment's asset host.

Parameters:

Name	Type	Description	Default
`amount`	`int`	Number of examples per page.	`10`
`page`	`int`	Page number.	`1`

Returns:

Type	Description
`DataFrame`	A DataFrame containing the examples.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py

def get_examples(
    self,
    amount: int = 10,
    page: int = 1,
) -> pd.DataFrame:
    """Get the examples for this audience as a DataFrame.

    Returns a DataFrame with columns: asset, truth, context, contextAsset.
    Asset URLs are fully qualified with the environment's asset host.

    Args:
        amount: Number of examples per page.
        page: Page number.

    Returns:
        A DataFrame containing the examples.
    """
    with tracer.start_as_current_span("RapidataAudience.get_examples"):
        import pandas as pd

        from rapidata.rapidata_client.audience.example_formatter import (
            ExampleFormatter,
        )

        response = self._openapi_service.audience.examples_api.audience_audience_id_examples_get(
            audience_id=self.id,
            page=page,
            page_size=amount,
        )
        asset_url_prefix = f"https://assets.{self._openapi_service.environment}/"
        rows = ExampleFormatter.format_to_csv_rows(response.items, asset_url_prefix)
        return pd.DataFrame(rows)

Rapidata audience