tags:
base_model: mixedbread-ai/mxbai-embed-mini-v1
library_name: sentence-transformers
model-index:
name: mxbai-embed-xsmall-v1
results:
- task:
type: Retrieval
dataset:
type: arguana
name: MTEB ArguAna
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 25.18
- type: ndcg
at 3
value: 39.22
- type: ndcg
at 5
value: 43.93
- type: ndcg
at 10
value: 49.58
- type: ndcg
at 30
value: 53.41
- type: ndcg
at 100
value: 54.11
- type: map
at 1
value: 25.18
- type: map
at 3
value: 35.66
- type: map
at 5
value: 38.25
- type: map
at 10
value: 40.58
- type: map
at 30
value: 41.6
- type: map
at 100
value: 41.69
- type: recall
at 1
value: 25.18
- type: recall
at 3
value: 49.57
- type: recall
at 5
value: 61.09
- type: recall
at 10
value: 78.59
- type: recall
at 30
value: 94.03
- type: recall
at 100
value: 97.94
- type: precision
at 1
value: 25.18
- type: precision
at 3
value: 16.52
- type: precision
at 5
value: 12.22
- type: precision
at 10
value: 7.86
- type: precision
at 30
value: 3.13
- type: precision
at 100
value: 0.98
- type: accuracy
at 3
value: 49.57
- type: accuracy
at 5
value: 61.09
- type: accuracy
at 10
value: 78.59
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackAndroidRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 44.35
- type: ndcg
at 3
value: 49.64
- type: ndcg
at 5
value: 51.73
- type: ndcg
at 10
value: 54.82
- type: ndcg
at 30
value: 57.64
- type: ndcg
at 100
value: 59.77
- type: map
at 1
value: 36.26
- type: map
at 3
value: 44.35
- type: map
at 5
value: 46.26
- type: map
at 10
value: 48.24
- type: map
at 30
value: 49.34
- type: map
at 100
value: 49.75
- type: recall
at 1
value: 36.26
- type: recall
at 3
value: 51.46
- type: recall
at 5
value: 57.78
- type: recall
at 10
value: 66.5
- type: recall
at 30
value: 77.19
- type: recall
at 100
value: 87.53
- type: precision
at 1
value: 44.35
- type: precision
at 3
value: 23.65
- type: precision
at 5
value: 16.88
- type: precision
at 10
value: 10.7
- type: precision
at 30
value: 4.53
- type: precision
at 100
value: 1.65
- type: accuracy
at 3
value: 60.51
- type: accuracy
at 5
value: 67.67
- type: accuracy
at 10
value: 74.68
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackEnglishRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 39.43
- type: ndcg
at 3
value: 44.13
- type: ndcg
at 5
value: 46.06
- type: ndcg
at 10
value: 48.31
- type: ndcg
at 30
value: 51.06
- type: ndcg
at 100
value: 53.07
- type: map
at 1
value: 31.27
- type: map
at 3
value: 39.07
- type: map
at 5
value: 40.83
- type: map
at 10
value: 42.23
- type: map
at 30
value: 43.27
- type: map
at 100
value: 43.66
- type: recall
at 1
value: 31.27
- type: recall
at 3
value: 45.89
- type: recall
at 5
value: 51.44
- type: recall
at 10
value: 58.65
- type: recall
at 30
value: 69.12
- type: recall
at 100
value: 78.72
- type: precision
at 1
value: 39.43
- type: precision
at 3
value: 21.61
- type: precision
at 5
value: 15.34
- type: precision
at 10
value: 9.27
- type: precision
at 30
value: 4.01
- type: precision
at 100
value: 1.52
- type: accuracy
at 3
value: 55.48
- type: accuracy
at 5
value: 60.76
- type: accuracy
at 10
value: 67.45
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGamingRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 45.58
- type: ndcg
at 3
value: 52.68
- type: ndcg
at 5
value: 55.28
- type: ndcg
at 10
value: 57.88
- type: ndcg
at 30
value: 60.6
- type: ndcg
at 100
value: 62.03
- type: map
at 1
value: 39.97
- type: map
at 3
value: 49.06
- type: map
at 5
value: 50.87
- type: map
at 10
value: 52.2
- type: map
at 30
value: 53.06
- type: map
at 100
value: 53.28
- type: recall
at 1
value: 39.97
- type: recall
at 3
value: 57.4
- type: recall
at 5
value: 63.83
- type: recall
at 10
value: 71.33
- type: recall
at 30
value: 81.81
- type: recall
at 100
value: 89.0
- type: precision
at 1
value: 45.58
- type: precision
at 3
value: 23.55
- type: precision
at 5
value: 16.01
- type: precision
at 10
value: 9.25
- type: precision
at 30
value: 3.67
- type: precision
at 100
value: 1.23
- type: accuracy
at 3
value: 62.76
- type: accuracy
at 5
value: 68.84
- type: accuracy
at 10
value: 75.8
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGisRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 27.35
- type: ndcg
at 3
value: 34.23
- type: ndcg
at 5
value: 37.1
- type: ndcg
at 10
value: 40.26
- type: ndcg
at 30
value: 43.54
- type: ndcg
at 100
value: 45.9
- type: map
at 1
value: 25.28
- type: map
at 3
value: 31.68
- type: map
at 5
value: 33.38
- type: map
at 10
value: 34.79
- type: map
at 30
value: 35.67
- type: map
at 100
value: 35.96
- type: recall
at 1
value: 25.28
- type: recall
at 3
value: 38.95
- type: recall
at 5
value: 45.82
- type: recall
at 10
value: 55.11
- type: recall
at 30
value: 68.13
- type: recall
at 100
value: 80.88
- type: precision
at 1
value: 27.35
- type: precision
at 3
value: 14.65
- type: precision
at 5
value: 10.44
- type: precision
at 10
value: 6.37
- type: precision
at 30
value: 2.65
- type: precision
at 100
value: 0.97
- type: accuracy
at 3
value: 42.15
- type: accuracy
at 5
value: 49.15
- type: accuracy
at 10
value: 58.53
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackMathematicaRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 18.91
- type: ndcg
at 3
value: 24.37
- type: ndcg
at 5
value: 26.11
- type: ndcg
at 10
value: 29.37
- type: ndcg
at 30
value: 33.22
- type: ndcg
at 100
value: 35.73
- type: map
at 1
value: 15.23
- type: map
at 3
value: 21.25
- type: map
at 5
value: 22.38
- type: map
at 10
value: 23.86
- type: map
at 30
value: 24.91
- type: map
at 100
value: 25.24
- type: recall
at 1
value: 15.23
- type: recall
at 3
value: 28.28
- type: recall
at 5
value: 32.67
- type: recall
at 10
value: 42.23
- type: recall
at 30
value: 56.87
- type: recall
at 100
value: 69.44
- type: precision
at 1
value: 18.91
- type: precision
at 3
value: 11.9
- type: precision
at 5
value: 8.48
- type: precision
at 10
value: 5.63
- type: precision
at 30
value: 2.64
- type: precision
at 100
value: 1.02
- type: accuracy
at 3
value: 33.95
- type: accuracy
at 5
value: 38.81
- type: accuracy
at 10
value: 49.13
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackPhysicsRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 36.96
- type: ndcg
at 3
value: 42.48
- type: ndcg
at 5
value: 44.57
- type: ndcg
at 10
value: 47.13
- type: ndcg
at 30
value: 50.65
- type: ndcg
at 100
value: 53.14
- type: map
at 1
value: 30.1
- type: map
at 3
value: 37.97
- type: map
at 5
value: 39.62
- type: map
at 10
value: 41.06
- type: map
at 30
value: 42.13
- type: map
at 100
value: 42.53
- type: recall
at 1
value: 30.1
- type: recall
at 3
value: 45.98
- type: recall
at 5
value: 51.58
- type: recall
at 10
value: 59.24
- type: recall
at 30
value: 72.47
- type: recall
at 100
value: 84.53
- type: precision
at 1
value: 36.96
- type: precision
at 3
value: 20.5
- type: precision
at 5
value: 14.4
- type: precision
at 10
value: 8.62
- type: precision
at 30
value: 3.67
- type: precision
at 100
value: 1.38
- type: accuracy
at 3
value: 54.09
- type: accuracy
at 5
value: 60.25
- type: accuracy
at 10
value: 67.37
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackProgrammersRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 28.65
- type: ndcg
at 3
value: 34.3
- type: ndcg
at 5
value: 36.8
- type: ndcg
at 10
value: 39.92
- type: ndcg
at 30
value: 42.97
- type: ndcg
at 100
value: 45.45
- type: map
at 1
value: 23.35
- type: map
at 3
value: 30.36
- type: map
at 5
value: 32.15
- type: map
at 10
value: 33.74
- type: map
at 30
value: 34.69
- type: map
at 100
value: 35.02
- type: recall
at 1
value: 23.35
- type: recall
at 3
value: 37.71
- type: recall
at 5
value: 44.23
- type: recall
at 10
value: 53.6
- type: recall
at 30
value: 64.69
- type: recall
at 100
value: 77.41
- type: precision
at 1
value: 28.65
- type: precision
at 3
value: 16.74
- type: precision
at 5
value: 12.21
- type: precision
at 10
value: 7.61
- type: precision
at 30
value: 3.29
- type: precision
at 100
value: 1.22
- type: accuracy
at 3
value: 44.86
- type: accuracy
at 5
value: 52.4
- type: accuracy
at 10
value: 61.07
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackStatsRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 26.07
- type: ndcg
at 3
value: 31.62
- type: ndcg
at 5
value: 33.23
- type: ndcg
at 10
value: 35.62
- type: ndcg
at 30
value: 38.41
- type: ndcg
at 100
value: 40.81
- type: map
at 1
value: 22.96
- type: map
at 3
value: 28.85
- type: map
at 5
value: 29.97
- type: map
at 10
value: 31.11
- type: map
at 30
value: 31.86
- type: map
at 100
value: 32.15
- type: recall
at 1
value: 22.96
- type: recall
at 3
value: 35.14
- type: recall
at 5
value: 39.22
- type: recall
at 10
value: 46.52
- type: recall
at 30
value: 57.58
- type: recall
at 100
value: 70.57
- type: precision
at 1
value: 26.07
- type: precision
at 3
value: 14.11
- type: precision
at 5
value: 9.69
- type: precision
at 10
value: 5.81
- type: precision
at 30
value: 2.45
- type: precision
at 100
value: 0.92
- type: accuracy
at 3
value: 39.42
- type: accuracy
at 5
value: 43.41
- type: accuracy
at 10
value: 50.92
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackTexRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 21.78
- type: ndcg
at 3
value: 25.74
- type: ndcg
at 5
value: 27.86
- type: ndcg
at 10
value: 30.3
- type: ndcg
at 30
value: 33.51
- type: ndcg
at 100
value: 36.12
- type: map
at 1
value: 17.63
- type: map
at 3
value: 22.7
- type: map
at 5
value: 24.14
- type: map
at 10
value: 25.31
- type: map
at 30
value: 26.22
- type: map
at 100
value: 26.56
- type: recall
at 1
value: 17.63
- type: recall
at 3
value: 28.37
- type: recall
at 5
value: 33.99
- type: recall
at 10
value: 41.23
- type: recall
at 30
value: 53.69
- type: recall
at 100
value: 67.27
- type: precision
at 1
value: 21.78
- type: precision
at 3
value: 12.41
- type: precision
at 5
value: 9.07
- type: precision
at 10
value: 5.69
- type: precision
at 30
value: 2.61
- type: precision
at 100
value: 1.03
- type: accuracy
at 3
value: 33.62
- type: accuracy
at 5
value: 39.81
- type: accuracy
at 10
value: 47.32
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackUnixRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 30.97
- type: ndcg
at 3
value: 36.13
- type: ndcg
at 5
value: 39.0
- type: ndcg
at 10
value: 41.78
- type: ndcg
at 30
value: 44.96
- type: ndcg
at 100
value: 47.52
- type: map
at 1
value: 26.05
- type: map
at 3
value: 32.77
- type: map
at 5
value: 34.6
- type: map
at 10
value: 35.93
- type: map
at 30
value: 36.88
- type: map
at 100
value: 37.22
- type: recall
at 1
value: 26.05
- type: recall
at 3
value: 40.0
- type: recall
at 5
value: 47.34
- type: recall
at 10
value: 55.34
- type: recall
at 30
value: 67.08
- type: recall
at 100
value: 80.2
- type: precision
at 1
value: 30.97
- type: precision
at 3
value: 16.6
- type: precision
at 5
value: 12.03
- type: precision
at 10
value: 7.3
- type: precision
at 30
value: 3.08
- type: precision
at 100
value: 1.15
- type: accuracy
at 3
value: 45.62
- type: accuracy
at 5
value: 53.64
- type: accuracy
at 10
value: 61.66
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWebmastersRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 29.64
- type: ndcg
at 3
value: 35.49
- type: ndcg
at 5
value: 37.77
- type: ndcg
at 10
value: 40.78
- type: ndcg
at 30
value: 44.59
- type: ndcg
at 100
value: 46.97
- type: map
at 1
value: 24.77
- type: map
at 3
value: 31.33
- type: map
at 5
value: 32.95
- type: map
at 10
value: 34.47
- type: map
at 30
value: 35.7
- type: map
at 100
value: 36.17
- type: recall
at 1
value: 24.77
- type: recall
at 3
value: 38.16
- type: recall
at 5
value: 44.1
- type: recall
at 10
value: 53.31
- type: recall
at 30
value: 68.43
- type: recall
at 100
value: 80.24
- type: precision
at 1
value: 29.64
- type: precision
at 3
value: 16.8
- type: precision
at 5
value: 12.21
- type: precision
at 10
value: 7.83
- type: precision
at 30
value: 3.89
- type: precision
at 100
value: 1.63
- type: accuracy
at 3
value: 45.45
- type: accuracy
at 5
value: 51.58
- type: accuracy
at 10
value: 61.07
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWordpressRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 23.47
- type: ndcg
at 3
value: 27.98
- type: ndcg
at 5
value: 30.16
- type: ndcg
at 10
value: 32.97
- type: ndcg
at 30
value: 36.3
- type: ndcg
at 100
value: 38.47
- type: map
at 1
value: 21.63
- type: map
at 3
value: 26.02
- type: map
at 5
value: 27.32
- type: map
at 10
value: 28.51
- type: map
at 30
value: 29.39
- type: map
at 100
value: 29.66
- type: recall
at 1
value: 21.63
- type: recall
at 3
value: 31.47
- type: recall
at 5
value: 36.69
- type: recall
at 10
value: 44.95
- type: recall
at 30
value: 58.2
- type: recall
at 100
value: 69.83
- type: precision
at 1
value: 23.47
- type: precision
at 3
value: 11.71
- type: precision
at 5
value: 8.32
- type: precision
at 10
value: 5.23
- type: precision
at 30
value: 2.29
- type: precision
at 100
value: 0.86
- type: accuracy
at 3
value: 34.01
- type: accuracy
at 5
value: 39.37
- type: accuracy
at 10
value: 48.24
- task:
type: Retrieval
dataset:
type: climate-fever
name: MTEB ClimateFEVER
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 19.8
- type: ndcg
at 3
value: 17.93
- type: ndcg
at 5
value: 19.39
- type: ndcg
at 10
value: 22.42
- type: ndcg
at 30
value: 26.79
- type: ndcg
at 100
value: 29.84
- type: map
at 1
value: 9.09
- type: map
at 3
value: 12.91
- type: map
at 5
value: 14.12
- type: map
at 10
value: 15.45
- type: map
at 30
value: 16.73
- type: map
at 100
value: 17.21
- type: recall
at 1
value: 9.09
- type: recall
at 3
value: 16.81
- type: recall
at 5
value: 20.9
- type: recall
at 10
value: 27.65
- type: recall
at 30
value: 41.23
- type: recall
at 100
value: 53.57
- type: precision
at 1
value: 19.8
- type: precision
at 3
value: 13.36
- type: precision
at 5
value: 10.33
- type: precision
at 10
value: 7.15
- type: precision
at 30
value: 3.66
- type: precision
at 100
value: 1.49
- type: accuracy
at 3
value: 36.22
- type: accuracy
at 5
value: 44.1
- type: accuracy
at 10
value: 55.11
- task:
type: Retrieval
dataset:
type: dbpedia-entity
name: MTEB DBPedia
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 42.75
- type: ndcg
at 3
value: 35.67
- type: ndcg
at 5
value: 33.58
- type: ndcg
at 10
value: 32.19
- type: ndcg
at 30
value: 31.82
- type: ndcg
at 100
value: 35.87
- type: map
at 1
value: 7.05
- type: map
at 3
value: 10.5
- type: map
at 5
value: 12.06
- type: map
at 10
value: 14.29
- type: map
at 30
value: 17.38
- type: map
at 100
value: 19.58
- type: recall
at 1
value: 7.05
- type: recall
at 3
value: 11.89
- type: recall
at 5
value: 14.7
- type: recall
at 10
value: 19.78
- type: recall
at 30
value: 29.88
- type: recall
at 100
value: 42.4
- type: precision
at 1
value: 54.25
- type: precision
at 3
value: 39.42
- type: precision
at 5
value: 33.15
- type: precision
at 10
value: 25.95
- type: precision
at 30
value: 15.51
- type: precision
at 100
value: 7.9
- type: accuracy
at 3
value: 72.0
- type: accuracy
at 5
value: 77.75
- type: accuracy
at 10
value: 83.5
- task:
type: Retrieval
dataset:
type: fever
name: MTEB FEVER
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 40.19
- type: ndcg
at 3
value: 50.51
- type: ndcg
at 5
value: 53.51
- type: ndcg
at 10
value: 56.45
- type: ndcg
at 30
value: 58.74
- type: ndcg
at 100
value: 59.72
- type: map
at 1
value: 37.56
- type: map
at 3
value: 46.74
- type: map
at 5
value: 48.46
- type: map
at 10
value: 49.7
- type: map
at 30
value: 50.31
- type: map
at 100
value: 50.43
- type: recall
at 1
value: 37.56
- type: recall
at 3
value: 58.28
- type: recall
at 5
value: 65.45
- type: recall
at 10
value: 74.28
- type: recall
at 30
value: 83.42
- type: recall
at 100
value: 88.76
- type: precision
at 1
value: 40.19
- type: precision
at 3
value: 20.99
- type: precision
at 5
value: 14.24
- type: precision
at 10
value: 8.12
- type: precision
at 30
value: 3.06
- type: precision
at 100
value: 0.98
- type: accuracy
at 3
value: 62.3
- type: accuracy
at 5
value: 69.94
- type: accuracy
at 10
value: 79.13
- task:
type: Retrieval
dataset:
type: fiqa
name: MTEB FiQA2018
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 34.41
- type: ndcg
at 3
value: 33.2
- type: ndcg
at 5
value: 34.71
- type: ndcg
at 10
value: 37.1
- type: ndcg
at 30
value: 40.88
- type: ndcg
at 100
value: 44.12
- type: map
at 1
value: 17.27
- type: map
at 3
value: 25.36
- type: map
at 5
value: 27.76
- type: map
at 10
value: 29.46
- type: map
at 30
value: 30.74
- type: map
at 100
value: 31.29
- type: recall
at 1
value: 17.27
- type: recall
at 3
value: 30.46
- type: recall
at 5
value: 36.91
- type: recall
at 10
value: 44.47
- type: recall
at 30
value: 56.71
- type: recall
at 100
value: 70.72
- type: precision
at 1
value: 34.41
- type: precision
at 3
value: 22.32
- type: precision
at 5
value: 16.91
- type: precision
at 10
value: 10.53
- type: precision
at 30
value: 4.62
- type: precision
at 100
value: 1.79
- type: accuracy
at 3
value: 50.77
- type: accuracy
at 5
value: 57.56
- type: accuracy
at 10
value: 65.12
- task:
type: Retrieval
dataset:
type: hotpotqa
name: MTEB HotpotQA
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 57.93
- type: ndcg
at 3
value: 44.21
- type: ndcg
at 5
value: 46.4
- type: ndcg
at 10
value: 48.37
- type: ndcg
at 30
value: 50.44
- type: ndcg
at 100
value: 51.86
- type: map
at 1
value: 28.97
- type: map
at 3
value: 36.79
- type: map
at 5
value: 38.31
- type: map
at 10
value: 39.32
- type: map
at 30
value: 39.99
- type: map
at 100
value: 40.2
- type: recall
at 1
value: 28.97
- type: recall
at 3
value: 41.01
- type: recall
at 5
value: 45.36
- type: recall
at 10
value: 50.32
- type: recall
at 30
value: 57.38
- type: recall
at 100
value: 64.06
- type: precision
at 1
value: 57.93
- type: precision
at 3
value: 27.34
- type: precision
at 5
value: 18.14
- type: precision
at 10
value: 10.06
- type: precision
at 30
value: 3.82
- type: precision
at 100
value: 1.28
- type: accuracy
at 3
value: 71.03
- type: accuracy
at 5
value: 75.14
- type: accuracy
at 10
value: 79.84
- task:
type: Retrieval
dataset:
type: msmarco
name: MTEB MSMARCO
config: default
split: dev
revision: None
metrics:
- type: ndcg
at 1
value: 19.74
- type: ndcg
at 3
value: 29.47
- type: ndcg
at 5
value: 32.99
- type: ndcg
at 10
value: 36.76
- type: ndcg
at 30
value: 40.52
- type: ndcg
at 100
value: 42.78
- type: map
at 1
value: 19.2
- type: map
at 3
value: 26.81
- type: map
at 5
value: 28.78
- type: map
at 10
value: 30.35
- type: map
at 30
value: 31.3
- type: map
at 100
value: 31.57
- type: recall
at 1
value: 19.2
- type: recall
at 3
value: 36.59
- type: recall
at 5
value: 45.08
- type: recall
at 10
value: 56.54
- type: recall
at 30
value: 72.05
- type: recall
at 100
value: 84.73
- type: precision
at 1
value: 19.74
- type: precision
at 3
value: 12.61
- type: precision
at 5
value: 9.37
- type: precision
at 10
value: 5.89
- type: precision
at 30
value: 2.52
- type: precision
at 100
value: 0.89
- type: accuracy
at 3
value: 37.38
- type: accuracy
at 5
value: 46.06
- type: accuracy
at 10
value: 57.62
- task:
type: Retrieval
dataset:
type: nq
name: MTEB NQ
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 25.9
- type: ndcg
at 3
value: 35.97
- type: ndcg
at 5
value: 40.27
- type: ndcg
at 10
value: 44.44
- type: ndcg
at 30
value: 48.31
- type: ndcg
at 100
value: 50.14
- type: map
at 1
value: 23.03
- type: map
at 3
value: 32.45
- type: map
at 5
value: 34.99
- type: map
at 10
value: 36.84
- type: map
at 30
value: 37.92
- type: map
at 100
value: 38.16
- type: recall
at 1
value: 23.03
- type: recall
at 3
value: 43.49
- type: recall
at 5
value: 53.41
- type: recall
at 10
value: 65.65
- type: recall
at 30
value: 80.79
- type: recall
at 100
value: 90.59
- type: precision
at 1
value: 25.9
- type: precision
at 3
value: 16.76
- type: precision
at 5
value: 12.54
- type: precision
at 10
value: 7.78
- type: precision
at 30
value: 3.23
- type: precision
at 100
value: 1.1
- type: accuracy
at 3
value: 47.31
- type: accuracy
at 5
value: 57.16
- type: accuracy
at 10
value: 69.09
- task:
type: Retrieval
dataset:
type: nfcorpus
name: MTEB NFCorpus
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 40.87
- type: ndcg
at 3
value: 36.79
- type: ndcg
at 5
value: 34.47
- type: ndcg
at 10
value: 32.05
- type: ndcg
at 30
value: 29.23
- type: ndcg
at 100
value: 29.84
- type: map
at 1
value: 5.05
- type: map
at 3
value: 8.5
- type: map
at 5
value: 9.87
- type: map
at 10
value: 11.71
- type: map
at 30
value: 13.48
- type: map
at 100
value: 14.86
- type: recall
at 1
value: 5.05
- type: recall
at 3
value: 9.55
- type: recall
at 5
value: 11.91
- type: recall
at 10
value: 16.07
- type: recall
at 30
value: 22.13
- type: recall
at 100
value: 30.7
- type: precision
at 1
value: 42.72
- type: precision
at 3
value: 34.78
- type: precision
at 5
value: 30.03
- type: precision
at 10
value: 23.93
- type: precision
at 30
value: 14.61
- type: precision
at 100
value: 7.85
- type: accuracy
at 3
value: 58.2
- type: accuracy
at 5
value: 64.09
- type: accuracy
at 10
value: 69.35
- task:
type: Retrieval
dataset:
type: quora
name: MTEB QuoraRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 80.62
- type: ndcg
at 3
value: 84.62
- type: ndcg
at 5
value: 86.25
- type: ndcg
at 10
value: 87.7
- type: ndcg
at 30
value: 88.63
- type: ndcg
at 100
value: 88.95
- type: map
at 1
value: 69.91
- type: map
at 3
value: 80.7
- type: map
at 5
value: 82.57
- type: map
at 10
value: 83.78
- type: map
at 30
value: 84.33
- type: map
at 100
value: 84.44
- type: recall
at 1
value: 69.91
- type: recall
at 3
value: 86.36
- type: recall
at 5
value: 90.99
- type: recall
at 10
value: 95.19
- type: recall
at 30
value: 98.25
- type: recall
at 100
value: 99.47
- type: precision
at 1
value: 80.62
- type: precision
at 3
value: 37.03
- type: precision
at 5
value: 24.36
- type: precision
at 10
value: 13.4
- type: precision
at 30
value: 4.87
- type: precision
at 100
value: 1.53
- type: accuracy
at 3
value: 92.25
- type: accuracy
at 5
value: 95.29
- type: accuracy
at 10
value: 97.74
- task:
type: Retrieval
dataset:
type: scidocs
name: MTEB SCIDOCS
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 24.1
- type: ndcg
at 3
value: 20.18
- type: ndcg
at 5
value: 17.72
- type: ndcg
at 10
value: 21.5
- type: ndcg
at 30
value: 26.66
- type: ndcg
at 100
value: 30.95
- type: map
at 1
value: 4.88
- type: map
at 3
value: 9.09
- type: map
at 5
value: 10.99
- type: map
at 10
value: 12.93
- type: map
at 30
value: 14.71
- type: map
at 100
value: 15.49
- type: recall
at 1
value: 4.88
- type: recall
at 3
value: 11.55
- type: recall
at 5
value: 15.91
- type: recall
at 10
value: 22.82
- type: recall
at 30
value: 35.7
- type: recall
at 100
value: 50.41
- type: precision
at 1
value: 24.1
- type: precision
at 3
value: 19.0
- type: precision
at 5
value: 15.72
- type: precision
at 10
value: 11.27
- type: precision
at 30
value: 5.87
- type: precision
at 100
value: 2.49
- type: accuracy
at 3
value: 43.0
- type: accuracy
at 5
value: 51.6
- type: accuracy
at 10
value: 62.7
- task:
type: Retrieval
dataset:
type: scifact
name: MTEB SciFact
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 52.33
- type: ndcg
at 3
value: 61.47
- type: ndcg
at 5
value: 63.82
- type: ndcg
at 10
value: 65.81
- type: ndcg
at 30
value: 67.75
- type: ndcg
at 100
value: 68.96
- type: map
at 1
value: 50.46
- type: map
at 3
value: 58.51
- type: map
at 5
value: 60.12
- type: map
at 10
value: 61.07
- type: map
at 30
value: 61.64
- type: map
at 100
value: 61.8
- type: recall
at 1
value: 50.46
- type: recall
at 3
value: 67.81
- type: recall
at 5
value: 73.6
- type: recall
at 10
value: 79.31
- type: recall
at 30
value: 86.8
- type: recall
at 100
value: 93.5
- type: precision
at 1
value: 52.33
- type: precision
at 3
value: 24.56
- type: precision
at 5
value: 16.27
- type: precision
at 10
value: 8.9
- type: precision
at 30
value: 3.28
- type: precision
at 100
value: 1.06
- type: accuracy
at 3
value: 69.67
- type: accuracy
at 5
value: 75.0
- type: accuracy
at 10
value: 80.67
- task:
type: Retrieval
dataset:
type: trec-covid
name: MTEB TRECCOVID
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 57.0
- type: ndcg
at 3
value: 53.78
- type: ndcg
at 5
value: 52.62
- type: ndcg
at 10
value: 48.9
- type: ndcg
at 30
value: 44.2
- type: ndcg
at 100
value: 36.53
- type: map
at 1
value: 0.16
- type: map
at 3
value: 0.41
- type: map
at 5
value: 0.62
- type: map
at 10
value: 1.07
- type: map
at 30
value: 2.46
- type: map
at 100
value: 5.52
- type: recall
at 1
value: 0.16
- type: recall
at 3
value: 0.45
- type: recall
at 5
value: 0.72
- type: recall
at 10
value: 1.33
- type: recall
at 30
value: 3.46
- type: recall
at 100
value: 8.73
- type: precision
at 1
value: 62.0
- type: precision
at 3
value: 57.33
- type: precision
at 5
value: 56.0
- type: precision
at 10
value: 52.0
- type: precision
at 30
value: 46.2
- type: precision
at 100
value: 37.22
- type: accuracy
at 3
value: 82.0
- type: accuracy
at 5
value: 90.0
- type: accuracy
at 10
value: 92.0
- task:
type: Retrieval
dataset:
type: webis-touche2020
name: MTEB Touche2020
config: default
split: test
revision: None
metrics:
- type: ndcg
at 1
value: 20.41
- type: ndcg
at 3
value: 17.62
- type: ndcg
at 5
value: 17.16
- type: ndcg
at 10
value: 17.09
- type: ndcg
at 30
value: 20.1
- type: ndcg
at 100
value: 26.33
- type: map
at 1
value: 2.15
- type: map
at 3
value: 3.59
- type: map
at 5
value: 5.07
- type: map
at 10
value: 6.95
- type: map
at 30
value: 9.01
- type: map
at 100
value: 10.54
- type: recall
at 1
value: 2.15
- type: recall
at 3
value: 4.5
- type: recall
at 5
value: 7.54
- type: recall
at 10
value: 12.46
- type: recall
at 30
value: 21.9
- type: recall
at 100
value: 36.58
- type: precision
at 1
value: 22.45
- type: precision
at 3
value: 19.05
- type: precision
at 5
value: 17.55
- type: precision
at 10
value: 15.51
- type: precision
at 30
value: 10.07
- type: precision
at 100
value: 5.57
- type: accuracy
at 3
value: 42.86
- type: accuracy
at 5
value: 53.06
- type: accuracy
at 10
value: 69.39
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg
at 10
value: 41.59
license: apache-2.0
language:
pipeline_tag: feature-extraction
The crispy sentence embedding family from Mixedbread .
π Looking for a simple end-to-end retrieval solution? Meet Omni, our multimodal and multilingual model. Get in touch for access.
mixedbread-ai/mxbai-embed-xsmall-v1
This model is an open-source English embedding model developed by Mixedbread . It's built upon sentence-transformers/all-MiniLM-L6-v2 and trained with the AnglE loss and Espresso . Read more details in our blog post .
In a bread loaf :
Performance
Binary Quantization and Matryoshka
Our model supports both binary quantization and Matryoshka Representation Learning (MRL) , allowing for significant efficiency gains:
Binary quantization: Retains 93.9% of performance while increasing efficiency by a factor of 32 MRL: A 33% reduction in vector size still leaves 96.2% of model performance
These optimizations can lead to substantial reductions in infrastructure costs for cloud computing and vector databases. Read more here .
Quickstart
Here are several ways to produce German sentence embeddings using our model.
angle-emb
pip install -U angle-emb
from angle_emb import AnglE
from angleemb.utils import cosine similarity
# 1. Specify preferred dimensions dimensions = 384
# 2. Load model and set pooling strategy to avg model = AnglE.from_pretrained( "mixedbread-ai/mxbai-embed-xsmall-v1", pooling_strategy='avg').cuda()
query = 'A man is eating a piece of bread'
docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ]
# 3. Encode embeddings = model.encode(docs, embedding_size=dimensions)
for doc, emb in zip(docs[1:], embeddings[1:]): print(f'{query} ||| {doc}', cosine_similarity(embeddings[0], emb))
Sentence Transformers
python -m pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer
from sentencetransformers.util import cos sim
# 1. Specify preferred dimensions dimensions = 384
# 2. Load model model = SentenceTransformer("mixedbread-ai/mxbai-embed-xsmall-v1", truncate_dim=dimensions)
query = 'A man is eating a piece of bread'
docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ]
# 3. Encode embeddings = model.encode(docs)
similarities = cos_sim(embeddings[0], embeddings[1:]) print('similarities:', similarities)
transformers
pip install -U transformers
from typing import Dict
import torch import numpy as np from transformers import AutoModel, AutoTokenizer from sentencetransformers.util import cos sim
def pooling(outputs: torch.Tensor, inputs: Dict) -> np.ndarray: outputs = torch.sum( outputs * inputs["attentionmask"][:, :, None], dim=1) / torch.sum(inputs["attention mask"]) return outputs.detach().cpu().numpy()
# 1. Load model model_id = 'mixedbread-ai/mxbai-embed-xsmall-v1' tokenizer = AutoTokenizer.frompretrained(model id) model = AutoModel.frompretrained(model id).cuda()
query = 'A man is eating a piece of bread'
docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ]
# 2. Encode inputs = tokenizer(docs, padding=True, return_tensors='pt') for k, v in inputs.items(): inputs[k] = v.cuda() outputs = model( inputs).lasthidden state embeddings = pooling(outputs, inputs)
# 3. Compute similarity scores similarities = cos_sim(embeddings[0], embeddings[1:]) print('similarities:', similarities)
Batched API
python -m pip install batched
import uvicorn
import batched
from fastapi import FastAPI
from fastapi.responses import ORJSONResponse
from sentence_transformers import SentenceTransformer
from pydantic import BaseModel
app = FastAPI()
model = SentenceTransformer('mixedbread-ai/mxbai-embed-xsmall-v1') model.encode = batched.aio.dynamically(model.encode)
class EmbeddingsRequest(BaseModel): input: str | list[str]
@app.post("/embeddings") async def embeddings(request: EmbeddingsRequest): return ORJSONResponse({"embeddings": await model.encode(request.input)})
if name == "main ": uvicorn.run(app, host="0.0.0.0", port=8000)
Community
Join our discord community to share your feedback and thoughts. We're here to help and always happy to discuss the exciting field of machine learning!
License
Apache 2.0
Citation
@online{xsmall2024mxbai,
title={Every Byte Matters: Introducing mxbai-embed-xsmall-v1},
author={Sean Lee and Julius Lipp and Rui Huang and Darius Koenig},
year={2024},
url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1},
}