Sentence Transformers integration
#2
by
						
tomaarsen
	
							HF Staff
						- opened
							
					
- 1_Pooling/config.json +10 -0
- README.md +33 -0
- config_sentence_transformers.json +11 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
    	
        1_Pooling/config.json
    ADDED
    
    | @@ -0,0 +1,10 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "word_embedding_dimension": 384,
         | 
| 3 | 
            +
              "pooling_mode_cls_token": true,
         | 
| 4 | 
            +
              "pooling_mode_mean_tokens": false,
         | 
| 5 | 
            +
              "pooling_mode_max_tokens": false,
         | 
| 6 | 
            +
              "pooling_mode_mean_sqrt_len_tokens": false,
         | 
| 7 | 
            +
              "pooling_mode_weightedmean_tokens": false,
         | 
| 8 | 
            +
              "pooling_mode_lasttoken": false,
         | 
| 9 | 
            +
              "include_prompt": true
         | 
| 10 | 
            +
            }
         | 
    	
        README.md
    CHANGED
    
    | @@ -2936,6 +2936,39 @@ Based on the [intfloat/e5-large-unsupervised](https://huggingface.co/intfloat/e5 | |
| 2936 | 
             
            ## Usage
         | 
| 2937 |  | 
| 2938 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2939 | 
             
            ### Using Huggingface transformers
         | 
| 2940 |  | 
| 2941 |  | 
|  | |
| 2936 | 
             
            ## Usage
         | 
| 2937 |  | 
| 2938 |  | 
| 2939 | 
            +
            ### Using Sentence Transformers
         | 
| 2940 | 
            +
             | 
| 2941 | 
            +
            You can use the sentence-transformers package to use an snowflake-arctic-embed model, as shown below. 
         | 
| 2942 | 
            +
             | 
| 2943 | 
            +
            ```python
         | 
| 2944 | 
            +
            from sentence_transformers import SentenceTransformer
         | 
| 2945 | 
            +
             | 
| 2946 | 
            +
            model = SentenceTransformer("Snowflake/snowflake-arctic-embed-s")
         | 
| 2947 | 
            +
             | 
| 2948 | 
            +
            queries = ['what is snowflake?', 'Where can I get the best tacos?']
         | 
| 2949 | 
            +
            documents = ['The Data Cloud!', 'Mexico City of Course!']
         | 
| 2950 | 
            +
             | 
| 2951 | 
            +
            query_embeddings = model.encode(queries, prompt_name="query")
         | 
| 2952 | 
            +
            document_embeddings = model.encode(documents)
         | 
| 2953 | 
            +
             | 
| 2954 | 
            +
            scores = query_embeddings @ document_embeddings.T
         | 
| 2955 | 
            +
            for query, query_scores in zip(queries, scores):
         | 
| 2956 | 
            +
                doc_score_pairs = list(zip(documents, query_scores))
         | 
| 2957 | 
            +
                doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
         | 
| 2958 | 
            +
                # Output passages & scores
         | 
| 2959 | 
            +
                print("Query:", query)
         | 
| 2960 | 
            +
                for document, score in doc_score_pairs:
         | 
| 2961 | 
            +
                    print(score, document)
         | 
| 2962 | 
            +
            ```
         | 
| 2963 | 
            +
            ```
         | 
| 2964 | 
            +
            Query: what is snowflake?
         | 
| 2965 | 
            +
            0.533809 The Data Cloud!
         | 
| 2966 | 
            +
            0.49207097 Mexico City of Course!
         | 
| 2967 | 
            +
            Query: Where can I get the best tacos?
         | 
| 2968 | 
            +
            0.56592476 Mexico City of Course!
         | 
| 2969 | 
            +
            0.48255116 The Data Cloud!
         | 
| 2970 | 
            +
            ```
         | 
| 2971 | 
            +
             | 
| 2972 | 
             
            ### Using Huggingface transformers
         | 
| 2973 |  | 
| 2974 |  | 
    	
        config_sentence_transformers.json
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "__version__": {
         | 
| 3 | 
            +
                "sentence_transformers": "2.7.0.dev0",
         | 
| 4 | 
            +
                "transformers": "4.39.3",
         | 
| 5 | 
            +
                "pytorch": "2.1.0+cu121"
         | 
| 6 | 
            +
              },
         | 
| 7 | 
            +
              "prompts": {
         | 
| 8 | 
            +
                "query": "Represent this sentence for searching relevant passages: "
         | 
| 9 | 
            +
              },
         | 
| 10 | 
            +
              "default_prompt_name": null
         | 
| 11 | 
            +
            }
         | 
    	
        modules.json
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [
         | 
| 2 | 
            +
              {
         | 
| 3 | 
            +
                "idx": 0,
         | 
| 4 | 
            +
                "name": "0",
         | 
| 5 | 
            +
                "path": "",
         | 
| 6 | 
            +
                "type": "sentence_transformers.models.Transformer"
         | 
| 7 | 
            +
              },
         | 
| 8 | 
            +
              {
         | 
| 9 | 
            +
                "idx": 1,
         | 
| 10 | 
            +
                "name": "1",
         | 
| 11 | 
            +
                "path": "1_Pooling",
         | 
| 12 | 
            +
                "type": "sentence_transformers.models.Pooling"
         | 
| 13 | 
            +
              },
         | 
| 14 | 
            +
              {
         | 
| 15 | 
            +
                "idx": 2,
         | 
| 16 | 
            +
                "name": "2",
         | 
| 17 | 
            +
                "path": "2_Normalize",
         | 
| 18 | 
            +
                "type": "sentence_transformers.models.Normalize"
         | 
| 19 | 
            +
              }
         | 
| 20 | 
            +
            ]
         | 
    	
        sentence_bert_config.json
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "max_seq_length": 512,
         | 
| 3 | 
            +
              "do_lower_case": false
         | 
| 4 | 
            +
            }
         | 
