Skip to content

Rapidata job definition

RapidataJobDefinition #

RapidataJobDefinition(
    id: str, name: str, openapi_service: OpenAPIService
)
Source code in src/rapidata/rapidata_client/job/rapidata_job_definition.py
def __init__(
    self,
    id: str,
    name: str,
    openapi_service: OpenAPIService,
):
    self.id = id
    self.name = name
    self._openapi_service = openapi_service
    self._job_details_page = (
        f"https://app.{self._openapi_service.environment}/definitions/{self.id}"
    )

preview #

preview() -> RapidataJobDefinition

Will open the browser where you can preview the job definition before giving it to an audience.

Source code in src/rapidata/rapidata_client/job/rapidata_job_definition.py
def preview(self) -> RapidataJobDefinition:
    """Will open the browser where you can preview the job definition before giving it to an audience."""
    logger.info("Opening order details page in browser...")
    if not webbrowser.open(self._job_details_page):
        encoded_url = urllib.parse.quote(
            self._job_details_page, safe="%/:=&?~#+!$,;'@()*[]"
        )
        managed_print(
            Fore.RED
            + f"Please open this URL in your browser: '{encoded_url}'"
            + Fore.RESET
        )
    return self

delete #

delete() -> None

Deletes the job definition and all its revisions.

Source code in src/rapidata/rapidata_client/job/rapidata_job_definition.py
def delete(self) -> None:
    """Deletes the job definition and all its revisions."""
    with tracer.start_as_current_span("RapidataJobDefinition.delete"):
        logger.info("Deleting job definition '%s'", self)
        self._openapi_service.order.job_api.job_definition_definition_id_delete(
            self.id
        )
        logger.debug("Job definition '%s' has been deleted.", self)
        managed_print(f"Job definition '{self}' has been deleted.")

update_dataset #

update_dataset(
    datapoints: list[str] | list[list[str]],
    data_type: Literal["text", "media"] = "media",
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    sentences: list[str] | None = None,
    private_metadata: list[dict[str, str]] | None = None,
) -> RapidataJobDefinition

Update the dataset of the job definition.

Parameters:

Name Type Description Default
datapoints list[str] | list[list[str]]

paths to the datapoints or strings for text datapoints.

required
data_type Literal['text', 'media']

The type of the datapoints.

'media'
Source code in src/rapidata/rapidata_client/job/rapidata_job_definition.py
def update_dataset(
    self,
    datapoints: list[str] | list[list[str]],
    data_type: Literal["text", "media"] = "media",
    contexts: list[str] | None = None,
    media_contexts: list[str] | None = None,
    sentences: list[str] | None = None,
    private_metadata: list[dict[str, str]] | None = None,
) -> RapidataJobDefinition:
    """Update the dataset of the job definition.

    Args:
        datapoints (list[str] | list[list[str]]): paths to the datapoints or strings for text datapoints.
        data_type (Literal["text", "media"]): The type of the datapoints.
    """
    with tracer.start_as_current_span("JobDefinition.update_dataset"):
        from rapidata.rapidata_client.datapoints._datapoints_validator import (
            DatapointsValidator,
        )
        from rapidata.api_client.models.create_dataset_endpoint_input import (
            CreateDatasetEndpointInput,
        )
        from rapidata.rapidata_client.dataset._rapidata_dataset import (
            RapidataDataset,
        )
        from rapidata.api_client.models.create_job_revision_endpoint_input import (
            CreateJobRevisionEndpointInput,
        )

        datapoints_list = DatapointsValidator.map_datapoints(
            datapoints=datapoints,
            contexts=contexts,
            media_contexts=media_contexts,
            sentences=sentences,
            private_metadata=private_metadata,
            data_type=data_type,
        )

        dataset = self._openapi_service.dataset.dataset_api.dataset_post(
            create_dataset_endpoint_input=CreateDatasetEndpointInput(
                name=self.name + "_dataset"
            )
        )

        rapidata_dataset = RapidataDataset(
            dataset.dataset_id, self._openapi_service
        )

        with tracer.start_as_current_span("update_datapoints"):
            _, failed_uploads = rapidata_dataset.add_datapoints(datapoints_list)
            if failed_uploads:
                raise FailedUploadException(
                    rapidata_dataset, failed_uploads, job_definition=self
                )

        self._openapi_service.order.job_api.job_definition_definition_id_revision_post(
            definition_id=self.id,
            create_job_revision_endpoint_input=CreateJobRevisionEndpointInput(
                datasetId=rapidata_dataset.id,
            ),
        )

        return self