@@ -8,7 +8,7 @@ use sailfish::TemplateOnce;
88pub struct ServerlessModels {
99 style_type : String ,
1010 embedding_models : [ Component ; 4 ] ,
11- instruct_models : [ Component ; 5 ] ,
11+ instruct_models : [ Component ; 6 ] ,
1212 summarization_models : [ Component ; 1 ] ,
1313}
1414
@@ -48,46 +48,53 @@ impl ServerlessModels {
4848 ] ,
4949 instruct_models : [
5050 Component :: from ( Row :: new ( & [
51- "meta-llama/Meta-Llama-3.1-70B -Instruct" . into ( ) ,
52- "70,000 " . into ( ) ,
53- "70,000 " . into ( ) ,
54- "8,000 " . into ( ) ,
51+ "meta-llama/Meta-Llama-3.1-405B -Instruct" . into ( ) ,
52+ "405 " . into ( ) ,
53+ "405 " . into ( ) ,
54+ "128k " . into ( ) ,
5555 "Highest quality" . into ( ) ,
5656 ] ) ) ,
57+ Component :: from ( Row :: new ( & [
58+ "meta-llama/Meta-Llama-3.1-70B-Instruct" . into ( ) ,
59+ "70" . into ( ) ,
60+ "70" . into ( ) ,
61+ "128k" . into ( ) ,
62+ "High quality" . into ( ) ,
63+ ] ) ) ,
5764 Component :: from ( Row :: new ( & [
5865 "meta-llama/Meta-Llama-3.1-8B-Instruct" . into ( ) ,
59- "8,000 " . into ( ) ,
60- "8,000 " . into ( ) ,
61- "8,000 " . into ( ) ,
62- "High quality, low latency" . into ( ) ,
66+ "8" . into ( ) ,
67+ "8" . into ( ) ,
68+ "128k " . into ( ) ,
69+ "Low latency" . into ( ) ,
6370 ] ) ) ,
6471 Component :: from ( Row :: new ( & [
6572 "microsoft/Phi-3-mini-128k-instruct" . into ( ) ,
66- "3,820 " . into ( ) ,
67- "3,820 " . into ( ) ,
68- "128,000 " . into ( ) ,
73+ "3.8 " . into ( ) ,
74+ "3.8 " . into ( ) ,
75+ "128k " . into ( ) ,
6976 "Lowest latency" . into ( ) ,
7077 ] ) ) ,
7178 Component :: from ( Row :: new ( & [
7279 "mistralai/Mixtral-8x7B-Instruct-v0.1" . into ( ) ,
73- "56,000 " . into ( ) ,
74- "12,900 " . into ( ) ,
75- "32,768 " . into ( ) ,
80+ "56" . into ( ) ,
81+ "12.9 " . into ( ) ,
82+ "32k " . into ( ) ,
7683 "MOE high quality" . into ( ) ,
7784 ] ) ) ,
7885 Component :: from ( Row :: new ( & [
7986 "mistralai/Mistral-7B-Instruct-v0.2" . into ( ) ,
80- "7,000 " . into ( ) ,
81- "7,000 " . into ( ) ,
82- "32,768 " . into ( ) ,
83- "High quality, low latency" . into ( ) ,
87+ "7" . into ( ) ,
88+ "7" . into ( ) ,
89+ "32k " . into ( ) ,
90+ "Low latency" . into ( ) ,
8491 ] ) ) ,
8592 ] ,
8693 summarization_models : [ Component :: from ( Row :: new ( & [
8794 "google/pegasus-xsum" . into ( ) ,
8895 "568" . into ( ) ,
8996 "512" . into ( ) ,
90- "8,000 " . into ( ) ,
97+ "8k " . into ( ) ,
9198 ] ) ) ] ,
9299 }
93100 }
0 commit comments