@@ -78,7 +78,7 @@ class EvaluationJobExitCode(Enum):
7878 SUCCESS = 0
7979 COMMON_ERROR = 1
8080
81- # Configuration-related issues
81+ # Configuration-related issues 10-19
8282 INVALID_EVALUATION_CONFIG = 10
8383 EVALUATION_CONFIG_NOT_PROVIDED = 11
8484 INVALID_OUTPUT_DIR = 12
@@ -87,7 +87,7 @@ class EvaluationJobExitCode(Enum):
8787 INVALID_TARGET_EVALUATION_ID = 15
8888 INVALID_EVALUATION_CONFIG_VALIDATION = 16
8989
90- # Evaluation process issues
90+ # Evaluation process issues 20-39
9191 OUTPUT_DIR_NOT_FOUND = 20
9292 INVALID_INPUT_DATASET = 21
9393 INPUT_DATA_NOT_FOUND = 22
@@ -100,6 +100,7 @@ class EvaluationJobExitCode(Enum):
100100 MODEL_INFERENCE_WRONG_RESPONSE_FORMAT = 29
101101 UNSUPPORTED_METRICS = 30
102102 METRIC_CALCULATION_FAILURE = 31
103+ EVALUATION_MODEL_CATALOG_RECORD_CREATION_FAILED = 32
103104
104105
105106EVALUATION_JOB_EXIT_CODE_MESSAGE = {
@@ -124,6 +125,11 @@ class EvaluationJobExitCode(Enum):
124125 EvaluationJobExitCode .MODEL_INFERENCE_WRONG_RESPONSE_FORMAT .value : "Evaluation encountered unsupported, or unexpected model output, verify the target evaluation model is compatible and produces the correct format." ,
125126 EvaluationJobExitCode .UNSUPPORTED_METRICS .value : "None of the provided metrics are supported by the framework." ,
126127 EvaluationJobExitCode .METRIC_CALCULATION_FAILURE .value : "All attempted metric calculations were unsuccessful. Please review the metric configurations and input data." ,
128+ EvaluationJobExitCode .EVALUATION_MODEL_CATALOG_RECORD_CREATION_FAILED .value : (
129+ "Failed to create a Model Catalog record for the evaluation. "
130+ "This could be due to missing required permissions. "
131+ "Please check the log for more information."
132+ ),
127133}
128134
129135
@@ -849,13 +855,17 @@ def get(self, eval_id) -> AquaEvaluationDetail:
849855 loggroup_id = ""
850856
851857 loggroup_url = get_log_links (region = self .region , log_group_id = loggroup_id )
852- log_url = get_log_links (
853- region = self .region ,
854- log_group_id = loggroup_id ,
855- log_id = log_id ,
856- compartment_id = job_run_details .compartment_id ,
857- source_id = jobrun_id
858- ) if job_run_details else ""
858+ log_url = (
859+ get_log_links (
860+ region = self .region ,
861+ log_group_id = loggroup_id ,
862+ log_id = log_id ,
863+ compartment_id = job_run_details .compartment_id ,
864+ source_id = jobrun_id ,
865+ )
866+ if job_run_details
867+ else ""
868+ )
859869
860870 log_name = None
861871 loggroup_name = None
@@ -931,7 +941,6 @@ def list(
931941 evaluations = []
932942 async_tasks = []
933943 for model in models :
934-
935944 if model .identifier in self ._eval_cache .keys ():
936945 logger .debug (f"Retrieving evaluation { model .identifier } from cache." )
937946 evaluations .append (self ._eval_cache .get (model .identifier ))
@@ -1049,13 +1058,17 @@ def get_status(self, eval_id: str) -> dict:
10491058 loggroup_id = ""
10501059
10511060 loggroup_url = get_log_links (region = self .region , log_group_id = loggroup_id )
1052- log_url = get_log_links (
1053- region = self .region ,
1054- log_group_id = loggroup_id ,
1055- log_id = log_id ,
1056- compartment_id = job_run_details .compartment_id ,
1057- source_id = jobrun_id
1058- ) if job_run_details else ""
1061+ log_url = (
1062+ get_log_links (
1063+ region = self .region ,
1064+ log_group_id = loggroup_id ,
1065+ log_id = log_id ,
1066+ compartment_id = job_run_details .compartment_id ,
1067+ source_id = jobrun_id ,
1068+ )
1069+ if job_run_details
1070+ else ""
1071+ )
10591072
10601073 return dict (
10611074 id = eval_id ,
@@ -1100,6 +1113,19 @@ def get_supported_metrics(self) -> dict:
11001113 ),
11011114 "args" : {},
11021115 },
1116+ {
1117+ "use_case" : ["text_generation" ],
1118+ "key" : "bleu" ,
1119+ "name" : "bleu" ,
1120+ "description" : (
1121+ "BLEU (Bilingual Evaluation Understudy) is an algorithm for evaluating the "
1122+ "quality of text which has been machine-translated from one natural language to another. "
1123+ "Quality is considered to be the correspondence between a machine's output and that of a "
1124+ "human: 'the closer a machine translation is to a professional human translation, "
1125+ "the better it is'."
1126+ ),
1127+ "args" : {},
1128+ },
11031129 ]
11041130
11051131 @telemetry (entry_point = "plugin=evaluation&action=load_metrics" , name = "aqua" )
0 commit comments