Skip to content

Rapidata audience

RapidataAudience #

RapidataAudience(
    id: str,
    name: str,
    filters: list[RapidataFilter],
    openapi_service: OpenAPIService,
)

Represents a Rapidata audience.

An audience is a group of annotators that can be recruited based on example tasks and assigned jobs.

Attributes:

Name Type Description
id str

The unique identifier of the audience.

name str

The name of the audience.

filters list[RapidataFilter]

The list of filters applied to the audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def __init__(
    self,
    id: str,
    name: str,
    filters: list[RapidataFilter],
    openapi_service: OpenAPIService,
):
    self.id = id
    self._name = name
    self._filters = filters
    self._openapi_service = openapi_service
    self._example_handler = AudienceExampleHandler(openapi_service, id)
    self._recruiting_started = False

name property #

name: str

The name of the audience.

filters property #

filters: list[RapidataFilter]

The list of filters applied to the audience.

delete #

delete() -> None

Deletes the audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def delete(self) -> None:
    """Deletes the audience."""
    with tracer.start_as_current_span("RapidataAudience.delete"):
        logger.info("Deleting audience '%s'", self)
        self._openapi_service.audience.audience_api.audience_audience_id_delete(
            self.id
        )
        logger.debug("Audience '%s' has been deleted.", self)
        managed_print(f"Audience '{self}' has been deleted.")

update_filters #

update_filters(
    filters: list[RapidataFilter],
) -> RapidataAudience

Update the filters for this audience.

Parameters:

Name Type Description Default
filters list[RapidataFilter]

The new list of filters to apply to the audience.

required

Returns:

Name Type Description
RapidataAudience RapidataAudience

The updated audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def update_filters(self, filters: list[RapidataFilter]) -> RapidataAudience:
    """Update the filters for this audience.

    Args:
        filters (list[RapidataFilter]): The new list of filters to apply to the audience.

    Returns:
        RapidataAudience: The updated audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.update_filters"):
        from rapidata.api_client.models.update_audience_endpoint_input import (
            UpdateAudienceEndpointInput,
        )

        logger.debug(f"Updating filters for audience: {self.id} to {filters}")
        self._openapi_service.audience.audience_api.audience_audience_id_patch(
            audience_id=self.id,
            update_audience_endpoint_input=UpdateAudienceEndpointInput(
                filters=[filter._to_audience_model() for filter in filters],
            ),
        )
        self._filters = filters
        return self

update_name #

update_name(name: str) -> RapidataAudience

Update the name of this audience.

Parameters:

Name Type Description Default
name str

The new name for the audience.

required

Returns:

Name Type Description
RapidataAudience RapidataAudience

The updated audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def update_name(self, name: str) -> RapidataAudience:
    """Update the name of this audience.

    Args:
        name (str): The new name for the audience.

    Returns:
        RapidataAudience: The updated audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.update_name"):
        from rapidata.api_client.models.update_audience_endpoint_input import (
            UpdateAudienceEndpointInput,
        )

        logger.debug(f"Updating name for audience: {self.id} to {name}")
        self._openapi_service.audience.audience_api.audience_audience_id_patch(
            audience_id=self.id,
            update_audience_endpoint_input=UpdateAudienceEndpointInput(name=name),
        )
        self._name = name
        return self

assign_job #

assign_job(
    job_definition: RapidataJobDefinition,
) -> RapidataJob

Assign a job to this audience.

Creates a new job instance from the job definition and assigns it to this audience. The job will be executed by the annotators in this audience.

Parameters:

Name Type Description Default
job_definition JobDefinition

The job definition to create and assign to the audience.

required

Returns:

Name Type Description
RapidataJob RapidataJob

The created job instance.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def assign_job(self, job_definition: RapidataJobDefinition) -> RapidataJob:
    """Assign a job to this audience.

    Creates a new job instance from the job definition and assigns it to this audience.
    The job will be executed by the annotators in this audience.

    Args:
        job_definition (JobDefinition): The job definition to create and assign to the audience.

    Returns:
        RapidataJob: The created job instance.
    """
    with tracer.start_as_current_span("RapidataAudience.assign_job"):
        from rapidata.api_client.models.create_job_endpoint_input import (
            CreateJobEndpointInput,
        )
        from rapidata.rapidata_client.job.rapidata_job import RapidataJob
        from datetime import datetime

        logger.debug(f"Assigning job to audience: {self.id}")
        response = self._openapi_service.order.job_api.job_post(
            create_job_endpoint_input=CreateJobEndpointInput(
                audienceId=self.id,
                jobDefinitionId=job_definition.id,
            ),
        )
        job = RapidataJob(
            job_id=response.job_id,
            name=job_definition.name,
            audience_id=self.id,
            created_at=datetime.now(),
            definition_id=job_definition.id,
            openapi_service=self._openapi_service,
        )
        logger.info(f"Assigned job to audience: {self.id}")
        return job

add_classification_example #

add_classification_example(
    instruction: str,
    answer_options: list[str],
    datapoint: str,
    truth: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience

Add a classification training example to this audience.

Training examples help annotators understand the task by showing them a sample datapoint with the correct answer before they start labeling.

Parameters:

Name Type Description Default
instruction str

The instruction for how the data should be classified.

required
answer_options list[str]

The list of possible answer options for the classification.

required
datapoint str

The datapoint (URL or path) to use as the training example.

required
truth list[str]

The correct answer(s) for this training example.

required
data_type Literal['media', 'text']

The data type of the datapoint. Defaults to "media".

'media'
context str

Additional text context to display with the example. Defaults to None.

None
media_context str

Additional media (URL or path) to display with the example. Defaults to None.

None
explanation str

An explanation of why the truth is correct. Defaults to None.

None

Returns:

Name Type Description
RapidataAudience RapidataAudience

The audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def add_classification_example(
    self,
    instruction: str,
    answer_options: list[str],
    datapoint: str,
    truth: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience:
    """Add a classification training example to this audience.

    Training examples help annotators understand the task by showing them
    a sample datapoint with the correct answer before they start labeling.

    Args:
        instruction (str): The instruction for how the data should be classified.
        answer_options (list[str]): The list of possible answer options for the classification.
        datapoint (str): The datapoint (URL or path) to use as the training example.
        truth (list[str]): The correct answer(s) for this training example.
        data_type (Literal["media", "text"], optional): The data type of the datapoint. Defaults to "media".
        context (str, optional): Additional text context to display with the example. Defaults to None.
        media_context (str, optional): Additional media (URL or path) to display with the example. Defaults to None.
        explanation (str, optional): An explanation of why the truth is correct. Defaults to None.

    Returns:
        RapidataAudience: The audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span(
        "RapidataAudience.add_classification_example"
    ):
        logger.debug(
            f"Adding classification example to audience: {self.id} with instruction: {instruction}, answer_options: {answer_options}, datapoint: {datapoint}, truths: {truth}, data_type: {data_type}, context: {context}, media_context: {media_context}, explanation: {explanation}"
        )
        self._example_handler.add_classification_example(
            instruction,
            answer_options,
            datapoint,
            truth,
            data_type,
            context,
            media_context,
            explanation,
        )
        self._try_start_recruiting()
        return self

add_compare_example #

add_compare_example(
    instruction: str,
    truth: str,
    datapoint: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience

Add a comparison training example to this audience.

Training examples help annotators understand the task by showing them a sample comparison with the correct answer before they start labeling.

Parameters:

Name Type Description Default
instruction str

The instruction for the comparison task.

required
truth str

The correct answer for this training example (which option should be selected).

required
datapoint list[str]

A list of exactly two datapoints (URLs or paths) to compare.

required
data_type Literal['media', 'text']

The data type of the datapoints. Defaults to "media".

'media'
context str

Additional text context to display with the example. Defaults to None.

None
media_context str

Additional media (URL or path) to display with the example. Defaults to None.

None
explanation str

An explanation of why the truth is correct. Defaults to None.

None

Returns:

Name Type Description
RapidataAudience RapidataAudience

The audience instance (self) for method chaining.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def add_compare_example(
    self,
    instruction: str,
    truth: str,
    datapoint: list[str],
    data_type: Literal["media", "text"] = "media",
    context: str | None = None,
    media_context: str | None = None,
    explanation: str | None = None,
) -> RapidataAudience:
    """Add a comparison training example to this audience.

    Training examples help annotators understand the task by showing them
    a sample comparison with the correct answer before they start labeling.

    Args:
        instruction (str): The instruction for the comparison task.
        truth (str): The correct answer for this training example (which option should be selected).
        datapoint (list[str]): A list of exactly two datapoints (URLs or paths) to compare.
        data_type (Literal["media", "text"], optional): The data type of the datapoints. Defaults to "media".
        context (str, optional): Additional text context to display with the example. Defaults to None.
        media_context (str, optional): Additional media (URL or path) to display with the example. Defaults to None.
        explanation (str, optional): An explanation of why the truth is correct. Defaults to None.

    Returns:
        RapidataAudience: The audience instance (self) for method chaining.
    """
    with tracer.start_as_current_span("RapidataAudience.add_compare_example"):
        logger.debug(
            f"Adding compare example to audience: {self.id} with instruction: {instruction}, truth: {truth}, datapoint: {datapoint}, data_type: {data_type}, context: {context}, media_context: {media_context}, explanation: {explanation}"
        )
        self._example_handler.add_compare_example(
            instruction,
            truth,
            datapoint,
            data_type,
            context,
            media_context,
            explanation,
        )
        self._try_start_recruiting()
        return self

find_jobs #

find_jobs(
    name: str = "", amount: int = 10, page: int = 1
) -> list[RapidataJob]

Find jobs assigned to this audience.

Parameters:

Name Type Description Default
name str

Filter jobs by name (matching jobs will contain this string). Defaults to "" for any job.

''
amount int

The maximum number of jobs to return. Defaults to 10.

10
page int

The page of jobs to return. Defaults to 1.

1

Returns:

Type Description
list[RapidataJob]

list[RapidataJob]: A list of RapidataJob instances assigned to this audience.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def find_jobs(
    self, name: str = "", amount: int = 10, page: int = 1
) -> list[RapidataJob]:
    """Find jobs assigned to this audience.

    Args:
        name (str, optional): Filter jobs by name (matching jobs will contain this string). Defaults to "" for any job.
        amount (int, optional): The maximum number of jobs to return. Defaults to 10.
        page (int, optional): The page of jobs to return. Defaults to 1.

    Returns:
        list[RapidataJob]: A list of RapidataJob instances assigned to this audience.
    """
    with tracer.start_as_current_span("RapidataAudience.find_jobs"):
        from rapidata.rapidata_client.job.rapidata_job import RapidataJob
        from rapidata.api_client.models.query_model import QueryModel
        from rapidata.api_client.models.root_filter import RootFilter
        from rapidata.api_client.models.filter import Filter
        from rapidata.api_client.models.filter_operator import FilterOperator
        from rapidata.api_client.models.logic_operator import LogicOperator
        from rapidata.api_client.models.pagination import Pagination
        from rapidata.api_client.models.sort_criterion import SortCriterion
        from rapidata.api_client.models.sort_direction import SortDirection

        response = self._openapi_service.order.job_api.jobs_get(
            request=QueryModel(
                page=Pagination(index=page, size=amount),
                filter=RootFilter(
                    filters=[
                        Filter(
                            field="AudienceId",
                            operator=FilterOperator.EQ,
                            value=self.id,
                        ),
                        Filter(
                            field="Name",
                            operator=FilterOperator.CONTAINS,
                            value=name,
                        ),
                    ],
                    logic=LogicOperator.AND,
                ),
                sortCriteria=[
                    SortCriterion(
                        direction=SortDirection.DESC, propertyName="CreatedAt"
                    )
                ],
            ),
        )
        return [
            RapidataJob(
                job_id=job.job_id,
                name=job.name,
                audience_id=job.audience_id,
                created_at=job.created_at,
                definition_id=job.definition_id,
                openapi_service=self._openapi_service,
                pipeline_id=job.pipeline_id,
            )
            for job in response.items
        ]

get_examples #

get_examples(amount: int = 10, page: int = 1) -> DataFrame

Get the examples for this audience as a DataFrame.

Returns a DataFrame with columns: asset, truth, context, contextAsset. Asset URLs are fully qualified with the environment's asset host.

Parameters:

Name Type Description Default
amount int

Number of examples per page.

10
page int

Page number.

1

Returns:

Type Description
DataFrame

A DataFrame containing the examples.

Source code in src/rapidata/rapidata_client/audience/rapidata_audience.py
def get_examples(
    self,
    amount: int = 10,
    page: int = 1,
) -> pd.DataFrame:
    """Get the examples for this audience as a DataFrame.

    Returns a DataFrame with columns: asset, truth, context, contextAsset.
    Asset URLs are fully qualified with the environment's asset host.

    Args:
        amount: Number of examples per page.
        page: Page number.

    Returns:
        A DataFrame containing the examples.
    """
    with tracer.start_as_current_span("RapidataAudience.get_examples"):
        import pandas as pd

        from rapidata.rapidata_client.audience.example_formatter import (
            ExampleFormatter,
        )

        response = self._openapi_service.audience.examples_api.audience_audience_id_examples_get(
            audience_id=self.id,
            page=page,
            page_size=amount,
        )
        asset_url_prefix = f"https://assets.{self._openapi_service.environment}/"
        rows = ExampleFormatter.format_to_csv_rows(response.items, asset_url_prefix)
        return pd.DataFrame(rows)