yibum commited on
Commit
8671e72
1 Parent(s): 6584952

update latency to better fit use scenario

Browse files
crm-results/hf_leaderboard_latency_cost.csv CHANGED
@@ -27,11 +27,11 @@ LLaMA 3 70B,Long,llama-3-70b-instruct,Self-host (p4d.24xlarge),243.9,67.7,High,6
27
  LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),251.2,99.0,Medium,6.25
28
  Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),248.5,8.22,Medium,4.90
29
  Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),250.0,8.11,Low,4.54
30
- SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.5,16.80,Low,8.99
31
- SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.7,15.50,Low,8.29
32
- SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),253.7,28.17,High,6.52
33
- SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),249.7,26.96,Medium,6.24
34
  SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),244.0,16.55,Low,3.43
35
  SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),250.4,15.60,Low,3.38
36
  XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,16.03,Medium,5.04
37
- XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,11.40,Medium,4.66
 
27
  LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),251.2,99.0,Medium,6.25
28
  Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),248.5,8.22,Medium,4.90
29
  Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),250.0,8.11,Low,4.54
30
+ SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.5,16.80,Low,3.50
31
+ SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.7,15.50,Low,3.45
32
+ SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),253.7,28.17,High,7.76
33
+ SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),249.7,26.96,Medium,7.48
34
  SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),244.0,16.55,Low,3.43
35
  SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),250.4,15.60,Low,3.38
36
  XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,16.03,Medium,5.04
37
+ XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,11.40,Medium,4.66