Branch: refs/heads/master
Home:
https://github.com/xwiki-contrib/ai-llm-benchmark
Commit: c01f55332ed4e8331713feedfa7f32bd0f4a94be
https://github.com/xwiki-contrib/ai-llm-benchmark/commit/c01f55332ed4e83317…
Author: Paul Pantiru <paulp2501(a)gmail.com>
Date: 2024-08-06 (Tue, 06 Aug 2024)
Changed paths:
M archives/full_nomic_ai_llm_benchmark_archive_20240804_101628.zip
M config.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_001_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_002_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_003_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_004_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_005_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_006_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_007_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_008_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_009_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_010_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_011_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_012_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_013_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_014_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_015_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_016_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_017_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_018_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_019_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_020_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_021_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_022_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_023_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_024_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_025_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_026_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_027_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_028_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_029_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_030_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_031_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_032_result.json
R evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_Q4/qa_033_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_001_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_002_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_003_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_004_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_005_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_006_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_007_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_008_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_009_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_010_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_011_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_012_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_013_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_014_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_015_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_016_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_017_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_018_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_019_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_020_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_021_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_022_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_023_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_024_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_025_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_026_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_027_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_028_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_029_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_030_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_031_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_032_result.json
A evaluation_results/RAG-qa/AI.Models.eval_mixtral-8x7B_fp16/qa_033_result.json
M evaluation_results/average_power_consumption.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_001_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_002_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_003_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_004_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_005_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_006_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_007_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_008_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_009_result.json
R evaluation_results/summarization/AI.Models.mixtral-8x7B_Q4/summ_010_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_001_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_002_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_003_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_004_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_005_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_006_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_007_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_008_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_009_result.json
A evaluation_results/summarization/AI.Models.mixtral-8x7B_fp16/summ_010_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_001_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_002_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_003_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_004_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_005_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_006_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_007_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_008_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_009_result.json
R
evaluation_results/text_generation/AI.Models.mixtral-8x7B_Q4/text_gen_010_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_001_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_002_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_003_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_004_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_005_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_006_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_007_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_008_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_009_result.json
A
evaluation_results/text_generation/AI.Models.mixtral-8x7B_fp16/text_gen_010_result.json
M evaluation_results_graphics/RAG-qa_AnswerRelevancy_bar_chart.png
M evaluation_results_graphics/RAG-qa_AnswerRelevancy_box_plot.png
M evaluation_results_graphics/RAG-qa_ContextualPrecision_bar_chart.png
M evaluation_results_graphics/RAG-qa_ContextualPrecision_box_plot.png
M evaluation_results_graphics/RAG-qa_ContextualRecall_bar_chart.png
M evaluation_results_graphics/RAG-qa_ContextualRecall_box_plot.png
M evaluation_results_graphics/RAG-qa_Faithfulness_bar_chart.png
M evaluation_results_graphics/RAG-qa_Faithfulness_box_plot.png
M evaluation_results_graphics/RAG-qa_grouped_bar_chart.png
M evaluation_results_graphics/average_average_power_draw_grouped_chart.png
M evaluation_results_graphics/average_energy_consumption_grouped_chart.png
M evaluation_results_graphics/average_energy_per_input_token_grouped_chart.png
M evaluation_results_graphics/average_energy_per_output_token_grouped_chart.png
M evaluation_results_graphics/average_energy_per_total_token_grouped_chart.png
M evaluation_results_graphics/average_power_draw_chart.png
M evaluation_results_graphics/model_average_power_chart.png
M evaluation_results_graphics/summarization_Alignment_bar_chart.png
M evaluation_results_graphics/summarization_Alignment_box_plot.png
M evaluation_results_graphics/summarization_Coverage_bar_chart.png
M evaluation_results_graphics/summarization_Coverage_box_plot.png
M evaluation_results_graphics/summarization_grouped_bar_chart.png
M evaluation_results_graphics/text_generation_grouped_bar_chart.png
M evaluation_results_graphics/text_generation_score_bar_chart.png
M evaluation_results_graphics/text_generation_score_box_plot.png
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_001.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_002.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_003.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_004.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_005.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_006.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_007.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_008.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_009.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_010.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_011.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_012.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_013.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_014.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_015.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_016.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_017.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_018.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_019.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_020.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_021.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_022.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_023.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_024.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_025.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_026.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_027.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_028.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_029.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_030.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_031.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_032.json
R output/AI.Models.eval_mixtral-8x7B_Q4/tasks/RAG-qa/qa_033.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_001.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_002.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_003.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_004.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_005.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_006.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_007.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_008.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_009.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_010.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_011.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_012.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_013.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_014.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_015.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_016.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_017.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_018.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_019.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_020.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_021.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_022.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_023.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_024.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_025.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_026.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_027.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_028.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_029.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_030.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_031.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_032.json
A output/AI.Models.eval_mixtral-8x7B_fp16/tasks/RAG-qa/qa_033.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_001.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_002.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_003.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_004.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_005.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_006.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_007.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_008.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_009.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/summarization/summ_010.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_001.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_002.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_003.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_004.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_005.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_006.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_007.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_008.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_009.json
R output/AI.Models.mixtral-8x7B_Q4/tasks/text_generation/text_gen_010.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_001.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_002.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_003.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_004.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_005.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_006.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_007.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_008.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_009.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/summarization/summ_010.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_001.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_002.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_003.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_004.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_005.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_006.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_007.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_008.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_009.json
A output/AI.Models.mixtral-8x7B_fp16/tasks/text_generation/text_gen_010.json
A reports/evaluation_report_20240806_184454.pdf
M scripts/output_generation/collect_model_responses.py
Log Message:
-----------
LLMAI-87: Execute the benchmark and document results
* Re-evaluated mixtral_8x7b due to wrong initial configuration
To unsubscribe from these emails, change your notification settings at
https://github.com/xwiki-contrib/ai-llm-benchmark/settings/notifications