Rapidata leaderboard

RapidataLeaderboard #

RapidataLeaderboard(
    name: str,
    instruction: str,
    show_prompt: bool,
    show_prompt_asset: bool,
    inverse_ranking: bool,
    response_budget: int,
    min_responses_per_matchup: int,
    benchmark_id: str,
    id: str,
    openapi_service: OpenAPIService,
)

An instance of a Rapidata leaderboard.

Used to interact with a specific leaderboard in the Rapidata system, such as retrieving prompts and evaluating models.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name that will be used to identify the leaderboard on the overview.	required
`instruction`	`str`	The instruction that will determine what how the models will be evaluated.	required
`show_prompt`	`bool`	Whether to show the prompt to the users.	required
`id`	`str`	The ID of the leaderboard.	required
`openapi_service`	`OpenAPIService`	The OpenAPIService instance for API interaction.	required

Source code in src/rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py

def __init__(
    self,
    name: str,
    instruction: str,
    show_prompt: bool,
    show_prompt_asset: bool,
    inverse_ranking: bool,
    response_budget: int,
    min_responses_per_matchup: int,
    benchmark_id: str,
    id: str,
    openapi_service: OpenAPIService,
):
    self.__openapi_service = openapi_service
    self.__name = name
    self.__instruction = instruction
    self.__show_prompt = show_prompt
    self.__show_prompt_asset = show_prompt_asset
    self.__inverse_ranking = inverse_ranking
    self.__response_budget = response_budget
    self.__min_responses_per_matchup = min_responses_per_matchup
    self.__benchmark_id = benchmark_id
    self.id = id
    self.__leaderboard_page = f"https://app.{self.__openapi_service.environment}/mri/benchmarks/{self.__benchmark_id}/leaderboard/{self.id}"

level_of_detail `property` `writable` #

level_of_detail: LevelOfDetail

Returns the level of detail of the leaderboard.

min_responses_per_matchup `property` `writable` #

min_responses_per_matchup: int

Returns the minimum number of responses required to be considered for the leaderboard.

show_prompt_asset `property` #

show_prompt_asset: bool

Returns whether the prompt asset is shown to the users.

inverse_ranking `property` #

inverse_ranking: bool

Returns whether the ranking is inverse.

show_prompt `property` #

show_prompt: bool

Returns whether the prompt is shown to the users.

instruction `property` #

instruction: str

Returns the instruction of the leaderboard.

name `property` `writable` #

name: str

Returns the name of the leaderboard.

get_standings #

get_standings(
    tags: Optional[list[str]] = None,
) -> "pd.DataFrame"

Returns the standings of the leaderboard.

Parameters:

Name	Type	Description	Default
`tags`	`Optional[list[str]]`	The matchups with these tags should be used to create the standings. If tags are None, all matchups will be considered. If tags are empty, no matchups will be considered.	`None`

Returns:

Type	Description
`'pd.DataFrame'`	A pandas DataFrame containing the standings of the leaderboard.

Source code in src/rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py

def get_standings(self, tags: Optional[list[str]] = None) -> "pd.DataFrame":
    """
    Returns the standings of the leaderboard.

    Args:
        tags: The matchups with these tags should be used to create the standings.
            If tags are None, all matchups will be considered.
            If tags are empty, no matchups will be considered.

    Returns:
        A pandas DataFrame containing the standings of the leaderboard.
    """
    with tracer.start_as_current_span("RapidataLeaderboard.get_standings"):
        participants = self.__openapi_service.leaderboard.leaderboard_api.leaderboard_leaderboard_id_standings_get(
            leaderboard_id=self.id, tags=tags
        )

        import pandas as pd

        standings = []
        for participant in participants.items:
            standings.append(
                {
                    "name": participant.name,
                    "wins": participant.wins,
                    "total_matches": participant.total_matches,
                    "score": (
                        round(participant.score, 2)
                        if participant.score is not None
                        else None
                    ),
                }
            )

        return pd.DataFrame(standings)

get_win_loss_matrix #

get_win_loss_matrix(
    tags: Optional[list[str]] = None,
    use_weighted_scoring: Optional[bool] = None,
) -> DataFrame

Returns the win/loss matrix for all participants in this leaderboard.

The matrix shows pairwise comparison results where each cell [i, j] represents the number of wins participant i has against participant j.

Parameters:

Name	Type	Description	Default
`tags`	`Optional[list[str]]`	Filter matchups by these tags. If None, all matchups are considered.	`None`
`use_weighted_scoring`	`Optional[bool]`	Whether to use weighted scoring for the matrix calculation.	`None`

Returns:

Type	Description
`DataFrame`	A pandas DataFrame with participants as both index and columns,
`DataFrame`	containing the pairwise win counts.

Source code in src/rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py

def get_win_loss_matrix(
    self,
    tags: Optional[list[str]] = None,
    use_weighted_scoring: Optional[bool] = None,
) -> pd.DataFrame:
    """
    Returns the win/loss matrix for all participants in this leaderboard.

    The matrix shows pairwise comparison results where each cell [i, j] represents
    the number of wins participant i has against participant j.

    Args:
        tags: Filter matchups by these tags. If None, all matchups are considered.
        use_weighted_scoring: Whether to use weighted scoring for the matrix calculation.

    Returns:
        A pandas DataFrame with participants as both index and columns,
        containing the pairwise win counts.
    """
    with tracer.start_as_current_span("RapidataLeaderboard.get_win_loss_matrix"):
        result = self.__openapi_service.leaderboard.leaderboard_api.leaderboard_leaderboard_id_matrix_get(
            leaderboard_id=self.id,
            tags=tags,
            use_weighted_scoring=use_weighted_scoring,
        )

        import pandas as pd

        return pd.DataFrame(
            data=result.data,
            index=pd.Index(result.index),
            columns=pd.Index(result.columns),
        )

view #

view() -> None

Views the leaderboard.

Source code in src/rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py

def view(self) -> None:
    """
    Views the leaderboard.
    """
    logger.info("Opening leaderboard page in browser...")
    could_open_browser = webbrowser.open(self.__leaderboard_page)
    if not could_open_browser:
        encoded_url = urllib.parse.quote(
            self.__leaderboard_page, safe="%/:=&?~#+!$,;'@()*[]"
        )
        managed_print(
            Fore.RED
            + f"Please open this URL in your browser: '{encoded_url}'"
            + Fore.RESET
        )

Rapidata leaderboard