Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +789 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,789 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:50000
|
8 |
+
- loss:CosineSimilarityLoss
|
9 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'An article on behavioral reinforcement learning:
|
12 |
+
|
13 |
+
|
14 |
+
Title: Cell-ŧype-specific responses to associative learning in the primary motor
|
15 |
+
cortex.
|
16 |
+
|
17 |
+
Abstract: The primary motor cortex (M1) is known to be a critical site for movement
|
18 |
+
initiation and motor learning. Surprisingly, it has also been shown to possess
|
19 |
+
reward-related activity, presumably to facilitate reward-based learning of new
|
20 |
+
movements. However, whether reward-related signals are represented among different
|
21 |
+
cell types in M1, and whether their response properties change after cue-reward
|
22 |
+
conditioning remains unclear. Here, we performed longitudinal in vivo two-photon
|
23 |
+
Ca2+ imaging to monitor the activity of different neuronal cell types in M1 while
|
24 |
+
mice engaged in a classical conditioning task. Our results demonstrate that most
|
25 |
+
of the major neuronal cell types in M1 showed robust but differential responses
|
26 |
+
to both the conditioned cue stimulus (CS) and reward, and their response properties
|
27 |
+
undergo cell-ŧype-specific modifications after associative learning. PV-INs’ responses
|
28 |
+
became more reliable to the CS, while VIP-INs’ responses became more reliable
|
29 |
+
to reward. Pyramidal neurons only showed robust responses to novel reward, and
|
30 |
+
they habituated to it after associative learning. Lastly, SOM-INs’ responses emerged
|
31 |
+
and became more reliable to both the CS and reward after conditioning. These observations
|
32 |
+
suggest that cue- and reward-related signals are preferentially represented among
|
33 |
+
different neuronal cell types in M1, and the distinct modifications they undergo
|
34 |
+
during associative learning could be essential in triggering different aspects
|
35 |
+
of local circuit reorganization in M1 during reward-based motor skill learning.'
|
36 |
+
sentences:
|
37 |
+
- 'An article on behavioral reinforcement learning:
|
38 |
+
|
39 |
+
|
40 |
+
Title: Learning to construct sentences in Spanish: A replication of the Weird
|
41 |
+
Word Order technique.
|
42 |
+
|
43 |
+
Abstract: In the present study, children''s early ability to organise words into
|
44 |
+
sentences was investigated using the Weird Word Order procedure with Spanish-speaking
|
45 |
+
children. Spanish is a language that allows for more flexibility in the positions
|
46 |
+
of subjects and objects, with respect to verbs, than other previously studied
|
47 |
+
languages (English, French, and Japanese). As in prior studies (Abbot-Smith et
|
48 |
+
al., 2001; Chang et al., 2009; Franck et al., 2011; Matthews et al., 2005, 2007),
|
49 |
+
we manipulated the relative frequency of verbs in training sessions with two age
|
50 |
+
groups (three-A nd four-year-old children). Results supported earlier findings
|
51 |
+
with regard to frequency: Children produced atypical word orders significantly
|
52 |
+
more often with infrequent verbs than with frequent verbs. The findings from the
|
53 |
+
present study support probabilistic learning models which allow higher levels
|
54 |
+
of flexibility and, in turn, oppose hypotheses that defend early access to advanced
|
55 |
+
grammatical knowledge.'
|
56 |
+
- 'An article on behavioral reinforcement learning:
|
57 |
+
|
58 |
+
|
59 |
+
Title: What are the computations of the cerebellum, the basal ganglia and the
|
60 |
+
cerebral cortex?.
|
61 |
+
|
62 |
+
Abstract: The classical notion that the cerebellum and the basal ganglia are dedicated
|
63 |
+
to motor control is under dispute given increasing evidence of their involvement
|
64 |
+
in non-motor functions. Is it then impossible to characterize the functions of
|
65 |
+
the cerebellum, the basal ganglia and the cerebral cortex in a simplistic manner?
|
66 |
+
This paper presents a novel view that their computational roles can be characterized
|
67 |
+
not by asking what are the ''goals'' of their computation, such as motor or sensory,
|
68 |
+
but by asking what are the ''methods'' of their computation, specifically, their
|
69 |
+
learning algorithms. There is currently enough anatomical, physiological, and
|
70 |
+
theoretical evidence to support the hypotheses that the cerebellum is a specialized
|
71 |
+
organism for supervised learning, the basal ganglia are for reinforcement learning,
|
72 |
+
and the cerebral cortex is for unsupervised learning.This paper investigates how
|
73 |
+
the learning modules specialized for these three kinds of learning can be assembled
|
74 |
+
into goal-oriented behaving systems. In general, supervised learning modules in
|
75 |
+
the cerebellum can be utilized as ''internal models'' of the environment. Reinforcement
|
76 |
+
learning modules in the basal ganglia enable action selection by an ''evaluation''
|
77 |
+
of environmental states. Unsupervised learning modules in the cerebral cortex
|
78 |
+
can provide statistically efficient representation of the states of the environment
|
79 |
+
and the behaving system. Two basic action selection architectures are shown, namely,
|
80 |
+
reactive action selection and predictive action selection. They can be implemented
|
81 |
+
within the anatomical constraint of the network linking these structures. Furthermore,
|
82 |
+
the use of the cerebellar supervised learning modules for state estimation, behavioral
|
83 |
+
simulation, and encapsulation of learned skill is considered. Finally, the usefulness
|
84 |
+
of such theoretical frameworks in interpreting brain imaging data is demonstrated
|
85 |
+
in the paradigm of procedural learning.'
|
86 |
+
- 'An article on behavioral reinforcement learning:
|
87 |
+
|
88 |
+
|
89 |
+
Title: Repeated decisions and attitudes to risk.
|
90 |
+
|
91 |
+
Abstract: In contrast to the underpinnings of expected utility, the experimental
|
92 |
+
pilot study results reported here suggest that current decisions may be influenced
|
93 |
+
both by past decisions and by the possibility of making decisions in the future.'
|
94 |
+
- source_sentence: 'An article on behavioral reinforcement learning:
|
95 |
+
|
96 |
+
|
97 |
+
Title: Sensory Evidence Accumulation Using Optic Flow in a Naturalistic Navigation
|
98 |
+
Task.
|
99 |
+
|
100 |
+
Abstract: Sensory evidence accumulation is considered a hallmark of decision-making
|
101 |
+
in noisy environments. Integration of sensory inputs has been traditionally studied
|
102 |
+
using passive stimuli, segregating perception from action. Lessons learned from
|
103 |
+
this approach, however, may not generalize to ethological behaviors like navigation,
|
104 |
+
where there is an active interplay between perception and action. We designed
|
105 |
+
a sensory-based sequential decision task in virtual reality in which humans and
|
106 |
+
monkeys navigated to a memorized location by integrating optic flow generated
|
107 |
+
by their own joystick movements. A major challenge in such closed-loop tasks is
|
108 |
+
that subjects’ actions will determine future sensory input, causing ambiguity
|
109 |
+
about whether they rely on sensory input rather than expectations based solely
|
110 |
+
on a learned model of the dynamics. To test whether subjects integrated optic
|
111 |
+
flow over time, we used three independent experimental manipulations, unpredictable
|
112 |
+
optic flow perturbations, which pushed subjects off their trajectory; gain manipulation
|
113 |
+
of the joystick controller, which changed the consequences of actions; and manipulation
|
114 |
+
of the optic flow density, which changed the information borne by sensory evidence.
|
115 |
+
Our results suggest that both macaques (male) and humans (female/male) relied
|
116 |
+
heavily on optic flow, thereby demonstrating a critical role for sensory evidence
|
117 |
+
accumulation during naturalistic action-perception closed-loop tasks.'
|
118 |
+
sentences:
|
119 |
+
- 'An article on behavioral reinforcement learning:
|
120 |
+
|
121 |
+
|
122 |
+
Title: The importance of decision making in causal learning from interventions.
|
123 |
+
|
124 |
+
Abstract: Recent research has focused on how interventions benefit causal learning.
|
125 |
+
This research suggests that the main benefit of interventions is in the temporal
|
126 |
+
and conditional probability information that interventions provide a learner.
|
127 |
+
But when one generates interventions, one must also decide what interventions
|
128 |
+
to generate. In three experiments, we investigated the importance of these decision
|
129 |
+
demands to causal learning. Experiment 1 demonstrated that learners were better
|
130 |
+
at learning causal models when they observed intervention data that they had generated,
|
131 |
+
as opposed to observing data generated by another learner. Experiment 2 demonstrated
|
132 |
+
the same effect between self-generated interventions and interventions learners
|
133 |
+
were forced to make. Experiment 3 demonstrated that when learners observed a sequence
|
134 |
+
of interventions such that the decision-making process that generated those interventions
|
135 |
+
was more readily available, learning was less impaired. These data suggest that
|
136 |
+
decision making may be an important part of causal learning from interventions.'
|
137 |
+
- 'An article on behavioral reinforcement learning:
|
138 |
+
|
139 |
+
|
140 |
+
Title: Region-specific effects of acute haloperidol in the human midbrain, striatum
|
141 |
+
and cortex.
|
142 |
+
|
143 |
+
Abstract: D2 autoreceptors provide an important regulatory mechanism of dopaminergic
|
144 |
+
neurotransmission. However, D2 receptors are also expressed as heteroreceptors
|
145 |
+
at postsynaptic membranes. The expression and the functional characteristics of
|
146 |
+
both, D2 auto- and heteroreceptors, differ between brain regions. Therefore, one
|
147 |
+
would expect that also the net response to a D2 antagonist, i.e. whether and to
|
148 |
+
what degree overall neural activity increases or decreases, varies across brain
|
149 |
+
areas. In the current study we systematically tested this hypothesis by parametrically
|
150 |
+
increasing haloperidol levels (placebo, 2 and 3 mg) in healthy volunteers and
|
151 |
+
measuring brain activity in the three major dopaminergic pathways. In particular,
|
152 |
+
activity was assessed using fMRI while participants performed a working memory
|
153 |
+
and a reinforcement learning task. Consistent with the hypothesis, across brain
|
154 |
+
regions activity parametrically in- and decreased. Moreover, even within the same
|
155 |
+
area there were function-specific concurrent de- and increases of activity, likely
|
156 |
+
caused by input from upstream dopaminergic regions. In the ventral striatum, for
|
157 |
+
instance, activity during reinforcement learning decreased for outcome processing
|
158 |
+
while prediction error related activity increased. In conclusion, the current
|
159 |
+
study highlights the intricacy of D2 neurotransmission which makes it difficult
|
160 |
+
to predict the function-specific net response of a given area to pharmacological
|
161 |
+
manipulations.'
|
162 |
+
- 'An article on behavioral reinforcement learning:
|
163 |
+
|
164 |
+
|
165 |
+
Title: Modeling dopaminergic and other processes involved in learning from reward
|
166 |
+
prediction error: Contributions from an individual differences perspective.
|
167 |
+
|
168 |
+
Abstract: Phasic firing changes of midbrain dopamine neurons have been widely
|
169 |
+
characterized as reflecting a reward prediction error (RPE). Major personality
|
170 |
+
traits (e.g., extraversion) have been linked to inter-individual variations in
|
171 |
+
dopaminergic neurotransmission. Consistent with these two claims, recent research
|
172 |
+
(Smillie et al., 2011; Cooper et al., 2014) found that extraverts exhibited larger
|
173 |
+
RPEs than introverts, as reflected in feedback related negativity (FRN) effects
|
174 |
+
in EEG recordings. Using an established, biologically-localized RPE computational
|
175 |
+
model, we successfully simulated dopaminergic cell firing changes which are thought
|
176 |
+
to modulate the FRN. We introduced simulated individual differences into the model:
|
177 |
+
parameters were systematically varied, with stable values for each simulated individual.
|
178 |
+
We explored whether a model parameter might be responsible for the observed covariance
|
179 |
+
between extraversion and the FRN changes in real data, and argued that a parameter
|
180 |
+
is a plausible source of such covariance if parameter variance, across simulated
|
181 |
+
individuals, correlated almost perfectly with the size of the simulated dopaminergic
|
182 |
+
FRN modulation, and created as much variance as possible in this simulated output.
|
183 |
+
Several model parameters met these criteria, while others did not. In particular,
|
184 |
+
variations in the strength of connections carrying excitatory reward drive inputs
|
185 |
+
to midbrain dopaminergic cells were considered plausible candidates, along with
|
186 |
+
variations in a parameter which scales the effects of dopamine cell firing bursts
|
187 |
+
on synaptic modification in ventral striatum. We suggest possible neurotransmitter
|
188 |
+
mechanisms underpinning these model parameters. Finally, the limitations and possible
|
189 |
+
extensions of our general approach are discussed.'
|
190 |
+
- source_sentence: 'An article on behavioral reinforcement learning:
|
191 |
+
|
192 |
+
|
193 |
+
Title: Pigeons'' use of cues in a repeated five-trial-sequence, single-reversal
|
194 |
+
task.
|
195 |
+
|
196 |
+
Abstract: We studied behavioral flexibility, or the ability to modify one''s behavior
|
197 |
+
in accordance with the changing environment, in pigeons using a reversal-learning
|
198 |
+
paradigm. In two experiments, each session consisted of a series of five-trial
|
199 |
+
sequences involving a simple simultaneous color discrimination in which a reversal
|
200 |
+
could occur during each sequence. The ideal strategy would be to start each sequence
|
201 |
+
with a choice of S1 (the first correct stimulus) until it was no longer correct,
|
202 |
+
and then to switch to S2 (the second correct stimulus), thus utilizing cues provided
|
203 |
+
by local reinforcement (feedback from the preceding trial). In both experiments,
|
204 |
+
subjects showed little evidence of using local reinforcement cues, but instead
|
205 |
+
used the mean probabilities of reinforcement for S1 and S2 on each trial within
|
206 |
+
each sequence. That is, subjects showed remarkably similar behavior, regardless
|
207 |
+
of where (or, in Exp. 2, whether) a reversal occurred during a given sequence.
|
208 |
+
Therefore, subjects appeared to be relatively insensitive to the consequences
|
209 |
+
of responses (local feedback) and were not able to maximize reinforcement. The
|
210 |
+
fact that pigeons did not use the more optimal feedback afforded by recent reinforcement
|
211 |
+
contingencies to maximize their reinforcement has implications for their use of
|
212 |
+
flexible response strategies under reversal-learning conditions.'
|
213 |
+
sentences:
|
214 |
+
- 'An article on behavioral reinforcement learning:
|
215 |
+
|
216 |
+
|
217 |
+
Title: Behavioral and circuit basis of sucrose rejection by drosophila females
|
218 |
+
in a simple decision-making task.
|
219 |
+
|
220 |
+
Abstract: Drosophila melanogaster egg-laying site selection offers a genetic model
|
221 |
+
to study a simple form of value-based decision. We have previously shown that
|
222 |
+
Drosophila females consistently reject a sucrose-containing substrate and choose
|
223 |
+
a plain (sucrose-free) substrate for egg laying in our sucrose versus plain decision
|
224 |
+
assay. However, either substrate is accepted when it is the sole option. Here
|
225 |
+
we describe the neural mechanism that underlies females’ sucrose rejection in
|
226 |
+
our sucrose versus plain assay. First, we demonstrate that females explored the
|
227 |
+
sucrose substrate frequently before most egg-laying events, suggesting that they
|
228 |
+
actively suppress laying eggs on the sucrose substrate as opposed to avoiding
|
229 |
+
visits to it. Second, we show that activating a specific subset of DA neurons
|
230 |
+
triggered a preference for laying eggs on the sucrose substrate over the plain
|
231 |
+
one, suggesting that activating these DA neurons can increase the value of the
|
232 |
+
sucrose substrate for egg laying. Third, we demonstrate that neither ablating
|
233 |
+
nor inhibiting the mushroom body (MB), a known Drosophila learning and decision
|
234 |
+
center, affected females’ egg-laying preferences in our sucrose versus plain assay,
|
235 |
+
suggesting that MB does not mediate this specific decision-making task.Wepropose
|
236 |
+
that the value of a sucrose substrate— as an egg-laying option—can be adjusted
|
237 |
+
by the activities of a specific DA circuit. Once the sucrose substrate is determined
|
238 |
+
to be the lesser valued option, females execute their decision to reject this
|
239 |
+
inferior substrate not by stopping their visits to it, but by actively suppressing
|
240 |
+
their egg-laying motor program during their visits.'
|
241 |
+
- 'An article on behavioral reinforcement learning:
|
242 |
+
|
243 |
+
|
244 |
+
Title: Choice in experiential learning: True preferences or experimental artifacts?.
|
245 |
+
|
246 |
+
Abstract: The rate of selecting different options in the decisions-from-feedback
|
247 |
+
paradigm is commonly used to measure preferences resulting from experiential learning.
|
248 |
+
While convergence to a single option increases with experience, some variance
|
249 |
+
in choice remains even when options are static and offer fixed rewards. Employing
|
250 |
+
a decisions-from-feedback paradigm followed by a policy-setting task, we examined
|
251 |
+
whether the observed variance in choice is driven by factors related to the paradigm
|
252 |
+
itself: Continued exploration (e.g., believing options are non-stationary) or
|
253 |
+
exploitation of perceived outcome patterns (i.e., a belief that sequential choices
|
254 |
+
are not independent). Across two studies, participants showed variance in their
|
255 |
+
choices, which was related (i.e., proportional) to the policies they set. In addition,
|
256 |
+
in Study 2, participants'' reported under-confidence was associated with the amount
|
257 |
+
of choice variance in later choices and policies. These results suggest that variance
|
258 |
+
in choice is better explained by participants lacking confidence in knowing which
|
259 |
+
option is better, rather than methodological artifacts (i.e., exploration or failures
|
260 |
+
to recognize outcome independence). As such, the current studies provide evidence
|
261 |
+
for the decisions-from-feedback paradigm''s validity as a behavioral research
|
262 |
+
method for assessing learned preferences.'
|
263 |
+
- 'An article on behavioral reinforcement learning:
|
264 |
+
|
265 |
+
|
266 |
+
Title: Impaired savings despite intact initial learning of motor adaptation in
|
267 |
+
Parkinson''s disease.
|
268 |
+
|
269 |
+
Abstract: In motor adaptation, the occurrence of savings (faster relearning of
|
270 |
+
a previously learned motor adaptation task) has been explained in terms of operant
|
271 |
+
reinforcement learning (Huang et al. in Neuron 70(4):787-801, 2011), which is
|
272 |
+
thought to associate an adapted motor command with outcome success during repeated
|
273 |
+
execution of the adapted movement. There is some evidence for deficient savings
|
274 |
+
in Parkinson''s Disease (PD), which might result from deficient operant reinforcement
|
275 |
+
processes. However, this evidence is compromised by limited adaptation training
|
276 |
+
during initial learning and by multi-target adaptation, which reduces the number
|
277 |
+
of reinforced movement repetitions for each target. Here, we examined savings
|
278 |
+
in PD patients and controls following overlearning with a single target. PD patients
|
279 |
+
showed less savings than controls after successive adaptation and deadaptation
|
280 |
+
blocks within the same test session, as well as less savings across test sessions
|
281 |
+
separated by a 24-h delay. It is argued that impaired blunted dopaminergic signals
|
282 |
+
in PD impairs the modulation of dopaminergic signals to the motor cortex in response
|
283 |
+
to rewarding motor outcomes, thus impairing the association of the adapted motor
|
284 |
+
command with rewarding motor outcomes. Consequently, the previously adapted motor
|
285 |
+
command is not preferentially selected during relearning, and savings is impaired.'
|
286 |
+
- source_sentence: 'An article on behavioral reinforcement learning:
|
287 |
+
|
288 |
+
|
289 |
+
Title: Altered cingulate sub-region activation accounts for task-related dissociation
|
290 |
+
in ERN amplitude as a function of obsessive-compulsive symptoms.
|
291 |
+
|
292 |
+
Abstract: Larger error-related negativities (ERNs) have been consistently found
|
293 |
+
in obsessive-compulsive disorder (OCD) patients, and are thought to reflect the
|
294 |
+
activities of a hyperactive cortico-striatal circuit during action monitoring.
|
295 |
+
We previously observed that obsessive-compulsive (OC) symptomatic students (non-patients)
|
296 |
+
have larger ERNs during errors in a response competition task, yet smaller ERNs
|
297 |
+
in a reinforcement learning task. The finding of a task-specific dissociation
|
298 |
+
suggests that distinct yet partially overlapping medio-frontal systems underlie
|
299 |
+
the ERN in different tasks, and that OC symptoms are associated with functional
|
300 |
+
differences in these systems. Here, we used EEG source localization to identify
|
301 |
+
why OC symptoms are associated with hyperactive ERNs to errors yet hypoactive
|
302 |
+
ERNs when selecting maladaptive actions. At rest, OC symptomatology predicted
|
303 |
+
greater activity in rostral anterior cingulate cortex (rACC) and lower activity
|
304 |
+
in dorsal anterior cingulate cortex (dACC). When compared to a group with low
|
305 |
+
OC symptom scores, the high OC group had greater rACC reactivity during errors
|
306 |
+
in the response competition task and less deactivation of dACC activity during
|
307 |
+
errors in the reinforcement learning task. The degree of activation in these areas
|
308 |
+
correlated with ERN amplitudes during both tasks in the high OC group, but not
|
309 |
+
in the low group. Interactive anterior cingulate cortex (ACC) systems associated
|
310 |
+
avoidance of maladaptive actions were intact in the high OC group, but were related
|
311 |
+
to poorer performance on a third task: probabilistic reversal learning. These
|
312 |
+
novel findings link both tonic and phasic activities in the ACC to action monitoring
|
313 |
+
alterations, including dissociation in performance deficits, in OC symptomatic
|
314 |
+
participants.'
|
315 |
+
sentences:
|
316 |
+
- 'An article on behavioral reinforcement learning:
|
317 |
+
|
318 |
+
|
319 |
+
Title: The Stroop Effect: Why Proportion Congruent Has Nothing to Do With Congruency
|
320 |
+
and Everything to Do With Contingency.
|
321 |
+
|
322 |
+
Abstract: The item-specific proportion congruent (ISPC) effect refers to the observation
|
323 |
+
that the Stroop effect is larger for words that are presented mostly in congruent
|
324 |
+
colors (e.g., BLUE presented 75% of the time in blue) and smaller for words that
|
325 |
+
are presented mostly in a given incongruent color (e.g., YELLOW presented 75%
|
326 |
+
of the time in orange). One account of the ISPC effect, the modulation hypothesis,
|
327 |
+
is that participants modulate attention based on the identity of the word (i.e.,
|
328 |
+
participants allow the word to influence responding when it is presented mostly
|
329 |
+
in its congruent color). Another account, the contingency hypothesis, is that
|
330 |
+
participants use the word to predict the response that they will need to make
|
331 |
+
(e.g., if the word is YELLOW, then the response is probably "orange"). Reanalyses
|
332 |
+
of data from L. L. Jacoby, D. S. Lindsay, and S. Hessels (2003), along with results
|
333 |
+
from new experiments, are inconsistent with the modulation hypothesis but entirely
|
334 |
+
consistent with the contingency hypothesis. A response threshold mechanism that
|
335 |
+
uses contingency information provides a sufficient account of the data.'
|
336 |
+
- 'An article on behavioral reinforcement learning:
|
337 |
+
|
338 |
+
|
339 |
+
Title: D-cycloserine facilitates socially reinforced learning in an animal model
|
340 |
+
relevant to autism spectrum disorders.
|
341 |
+
|
342 |
+
Abstract: There are no drugs that specifically target the social deficits of autism
|
343 |
+
spectrum disorders (ASD). This may be due to a lack of behavioral paradigms in
|
344 |
+
animal models relevant to ASD. Partner preference formation in the prairie vole
|
345 |
+
represents a social cognitive process involving socially reinforced learning.
|
346 |
+
D-cycloserine (DCS) is a cognitive enhancer that acts at the N-methyl-D-aspartate
|
347 |
+
receptor to promote learning. If DCS enhances socially reinforced learning in
|
348 |
+
the partner preference paradigm, it may be useful in combination with behavioral
|
349 |
+
therapies for enhancing social functioning in ASD. Female prairie and meadow voles
|
350 |
+
were given DCS either peripherally or directly into one of three brain regions:
|
351 |
+
nucleus accumbens, amygdala, or caudate putamen. Subjects were then cohabited
|
352 |
+
with a male vole under conditions that do not typically yield a partner preference.
|
353 |
+
The development of a preference for that stimulus male vole over a novel male
|
354 |
+
vole was assessed using a partner preference test. A low dose of DCS administered
|
355 |
+
peripherally enhanced preference formation in prairie voles but not meadow voles
|
356 |
+
under conditions in which it would not otherwise occur. These effects were replicated
|
357 |
+
in prairie voles by microinfusions of DCS into the nucleus accumbens, which is
|
358 |
+
involved in reinforcement learning, and the amygdala, which is involved in social
|
359 |
+
information processing. Partner preference in the prairie vole may provide a behavioral
|
360 |
+
paradigm with face, construct, and predictive validity for identifying prosocial
|
361 |
+
pharmacotherapeutics. D-cycloserine may be a viable treatment strategy for social
|
362 |
+
deficits of ASD when paired with social behavioral therapy.'
|
363 |
+
- 'An article on behavioral reinforcement learning:
|
364 |
+
|
365 |
+
|
366 |
+
Title: Pseudodiagnosticity Revisited.
|
367 |
+
|
368 |
+
Abstract: In the psychology of reasoning and judgment, the pseudodiagnosticity
|
369 |
+
task has been a major tool for the empirical investigation of people''s ability
|
370 |
+
to search for diagnostic information. A novel normative analysis of this experimental
|
371 |
+
paradigm is presented, by which the participants'' prevailing responses turn out
|
372 |
+
not to support the generally accepted existence of a reasoning bias. The conclusions
|
373 |
+
drawn do not rest on pragmatic concerns suggesting alleged divergences between
|
374 |
+
the experimenter''s and participants'' reading of the task. They only rely, instead,
|
375 |
+
on the demonstration that observed behavior largely conforms to optimal utility
|
376 |
+
maximizing information search strategies for standard variants of the pseudodiagnosticity
|
377 |
+
paradigm that have been investigated so far. It is argued that the experimental
|
378 |
+
results obtained, contrary to what has recurrently been claimed, have failed to
|
379 |
+
discriminate between normative and nonnormative accounts of behavior. More general
|
380 |
+
implications of the analysis presented for past and future research on human information
|
381 |
+
search behavior and diagnostic reasoning are discussed.'
|
382 |
+
- source_sentence: 'An article on behavioral reinforcement learning:
|
383 |
+
|
384 |
+
|
385 |
+
Title: Confidence and the description–experience distinction.
|
386 |
+
|
387 |
+
Abstract: In this paper, we extend the literature on the description–experience
|
388 |
+
gap in risky choices by focusing on how the mode of learning—through description
|
389 |
+
or experience—affects confidence. Specifically, we explore how learning through
|
390 |
+
description or experience affects confidence in (1) the information gathered to
|
391 |
+
make a decision and (2) the resulting choice. In two preregistered experiments
|
392 |
+
we tested whether there was a description–experience gap in both dimensions of
|
393 |
+
confidence. Learning from description was associated with higher confidence—both
|
394 |
+
in the information gathered and in the choice made—than was learning from experience.
|
395 |
+
In a third preregistered experiment, we examined the effect of sample size on
|
396 |
+
confidence in decisions from experience. Contrary to the normative view that larger
|
397 |
+
samples foster confidence in statistical inference, we observed that more experience
|
398 |
+
led to less confidence. This observation is reminiscent of recent theories of
|
399 |
+
deliberate ignorance, which highlight the adaptive benefits of deliberately limiting
|
400 |
+
information search.'
|
401 |
+
sentences:
|
402 |
+
- 'An article on behavioral reinforcement learning:
|
403 |
+
|
404 |
+
|
405 |
+
Title: Episodic memories predict adaptive Value-Based Decision-Making.
|
406 |
+
|
407 |
+
Abstract: Prior research illustrates that memory can guide Value-Based Decision-Making.
|
408 |
+
For example, previous work has implicated both working memory and procedural memory
|
409 |
+
(i.e., reinforcement learning) in guiding choice. However, other types of memories,
|
410 |
+
such as episodic memory, may also influence Decision-Making. Here we test the
|
411 |
+
role for episodic Memory-Specifically item versus associative Memory-In supporting
|
412 |
+
Value-Based choice. Participants completed a task where they first learned the
|
413 |
+
value associated with trial unique lotteries. After a short delay, they completed
|
414 |
+
a Decision-Making task where they could choose to reengage with previously encountered
|
415 |
+
lotteries, or new never before seen lotteries. Finally, participants completed
|
416 |
+
a surprise memory test for the lotteries and their associated values. Results
|
417 |
+
indicate that participants chose to reengage more often with lotteries that resulted
|
418 |
+
in high versus low rewards. Critically, participants not only formed detailed,
|
419 |
+
associative memories for the reward values coupled with individual lotteries,
|
420 |
+
but also exhibited adaptive Decision-Making only when they had intact associative
|
421 |
+
memory. We further found that the relationship between adaptive choice and associative
|
422 |
+
memory generalized to more complex, ecologically valid choice behavior, such as
|
423 |
+
social decisionmaking. However, individuals more strongly encode experiences of
|
424 |
+
social Violations-Such as being treated unfairly, suggesting a bias for how individuals
|
425 |
+
form associative memories within social contexts. Together, these findings provide
|
426 |
+
an important integration of episodic memory and Decision-Making literatures to
|
427 |
+
better understand key mechanisms supporting adaptive behavior.'
|
428 |
+
- 'An article on behavioral reinforcement learning:
|
429 |
+
|
430 |
+
|
431 |
+
Title: How (in)variant are subjective representations of described and experienced
|
432 |
+
risk and rewards?.
|
433 |
+
|
434 |
+
Abstract: Decisions under risk have been shown to differ depending on whether
|
435 |
+
information on outcomes and probabilities is gleaned from symbolic descriptions
|
436 |
+
or gathered through experience. To some extent, this description–experience gap
|
437 |
+
is due to sampling error in experience-based choice. Analyses with cumulative
|
438 |
+
prospect theory (CPT), investigating to what extent the gap is also driven by
|
439 |
+
differences in people''s subjective representations of outcome and probability
|
440 |
+
information (taking into account sampling error), have produced mixed results.
|
441 |
+
We improve on previous analyses of description-based and experience-based choices
|
442 |
+
by taking advantage of both a within-subjects design and a hierarchical Bayesian
|
443 |
+
implementation of CPT. This approach allows us to capture both the differences
|
444 |
+
and the within-person stability of individuals’ subjective representations across
|
445 |
+
the two modes of learning about choice options. Relative to decisions from description,
|
446 |
+
decisions from experience showed reduced sensitivity to probabilities and increased
|
447 |
+
sensitivity to outcomes. For some CPT parameters, individual differences were
|
448 |
+
relatively stable across modes of learning. Our results suggest that outcome and
|
449 |
+
probability information translate into systematically different subjective representations
|
450 |
+
in description- versus experience-based choice. At the same time, both types of
|
451 |
+
decisions seem to tap into the same individual-level regularities.'
|
452 |
+
- 'An article on behavioral reinforcement learning:
|
453 |
+
|
454 |
+
|
455 |
+
Title: Do narcissists make better decisions? An investigation of narcissism and
|
456 |
+
dynamic decision-making performance.
|
457 |
+
|
458 |
+
Abstract: We investigated whether narcissism affected dynamic decision-making
|
459 |
+
performance in the presence and absence of misleading information. Performance
|
460 |
+
was examined in a two-choice dynamic decision-making task where the optimal strategy
|
461 |
+
was to forego an option providing larger immediate rewards in favor of an option
|
462 |
+
that led to larger delayed rewards. Information regarding foregone rewards from
|
463 |
+
the alternate option was presented or withheld to bias participants toward the
|
464 |
+
sub-optimal choice. The results demonstrated that individuals high in narcissistic
|
465 |
+
traits performed comparably to low narcissism individuals when foregone reward
|
466 |
+
information was absent, but high narcissism individuals outperformed individuals
|
467 |
+
low in narcissistic traits when misleading information was presented. The advantage
|
468 |
+
for participants high in narcissistic traits was strongest within males, and,
|
469 |
+
overall, males outperformed females when foregone rewards were present. While
|
470 |
+
prior research emphasizes narcissists'' decision-making deficits, our findings
|
471 |
+
provide evidence that individuals high in narcissistic traits excel at decision-making
|
472 |
+
tasks that involve disregarding ambiguous information and focusing on the long-term
|
473 |
+
utility of each option. Their superior ability at filtering out misleading information
|
474 |
+
may reflect an effort to maintain their self-view or avoid ego threat.'
|
475 |
+
pipeline_tag: sentence-similarity
|
476 |
+
library_name: sentence-transformers
|
477 |
+
---
|
478 |
+
|
479 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
480 |
+
|
481 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
482 |
+
|
483 |
+
## Model Details
|
484 |
+
|
485 |
+
### Model Description
|
486 |
+
- **Model Type:** Sentence Transformer
|
487 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
|
488 |
+
- **Maximum Sequence Length:** 256 tokens
|
489 |
+
- **Output Dimensionality:** 384 dimensions
|
490 |
+
- **Similarity Function:** Cosine Similarity
|
491 |
+
<!-- - **Training Dataset:** Unknown -->
|
492 |
+
<!-- - **Language:** Unknown -->
|
493 |
+
<!-- - **License:** Unknown -->
|
494 |
+
|
495 |
+
### Model Sources
|
496 |
+
|
497 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
498 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
499 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
500 |
+
|
501 |
+
### Full Model Architecture
|
502 |
+
|
503 |
+
```
|
504 |
+
SentenceTransformer(
|
505 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
506 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
507 |
+
(2): Normalize()
|
508 |
+
)
|
509 |
+
```
|
510 |
+
|
511 |
+
## Usage
|
512 |
+
|
513 |
+
### Direct Usage (Sentence Transformers)
|
514 |
+
|
515 |
+
First install the Sentence Transformers library:
|
516 |
+
|
517 |
+
```bash
|
518 |
+
pip install -U sentence-transformers
|
519 |
+
```
|
520 |
+
|
521 |
+
Then you can load this model and run inference.
|
522 |
+
```python
|
523 |
+
from sentence_transformers import SentenceTransformer
|
524 |
+
|
525 |
+
# Download from the 🤗 Hub
|
526 |
+
model = SentenceTransformer("dwulff/minilm-brl")
|
527 |
+
# Run inference
|
528 |
+
sentences = [
|
529 |
+
'An article on behavioral reinforcement learning:\n\nTitle: Confidence and the description–experience distinction.\nAbstract: In this paper, we extend the literature on the description–experience gap in risky choices by focusing on how the mode of learning—through description or experience—affects confidence. Specifically, we explore how learning through description or experience affects confidence in (1) the information gathered to make a decision and (2) the resulting choice. In two preregistered experiments we tested whether there was a description–experience gap in both dimensions of confidence. Learning from description was associated with higher confidence—both in the information gathered and in the choice made—than was learning from experience. In a third preregistered experiment, we examined the effect of sample size on confidence in decisions from experience. Contrary to the normative view that larger samples foster confidence in statistical inference, we observed that more experience led to less confidence. This observation is reminiscent of recent theories of deliberate ignorance, which highlight the adaptive benefits of deliberately limiting information search.',
|
530 |
+
"An article on behavioral reinforcement learning:\n\nTitle: How (in)variant are subjective representations of described and experienced risk and rewards?.\nAbstract: Decisions under risk have been shown to differ depending on whether information on outcomes and probabilities is gleaned from symbolic descriptions or gathered through experience. To some extent, this description–experience gap is due to sampling error in experience-based choice. Analyses with cumulative prospect theory (CPT), investigating to what extent the gap is also driven by differences in people's subjective representations of outcome and probability information (taking into account sampling error), have produced mixed results. We improve on previous analyses of description-based and experience-based choices by taking advantage of both a within-subjects design and a hierarchical Bayesian implementation of CPT. This approach allows us to capture both the differences and the within-person stability of individuals’ subjective representations across the two modes of learning about choice options. Relative to decisions from description, decisions from experience showed reduced sensitivity to probabilities and increased sensitivity to outcomes. For some CPT parameters, individual differences were relatively stable across modes of learning. Our results suggest that outcome and probability information translate into systematically different subjective representations in description- versus experience-based choice. At the same time, both types of decisions seem to tap into the same individual-level regularities.",
|
531 |
+
"An article on behavioral reinforcement learning:\n\nTitle: Do narcissists make better decisions? An investigation of narcissism and dynamic decision-making performance.\nAbstract: We investigated whether narcissism affected dynamic decision-making performance in the presence and absence of misleading information. Performance was examined in a two-choice dynamic decision-making task where the optimal strategy was to forego an option providing larger immediate rewards in favor of an option that led to larger delayed rewards. Information regarding foregone rewards from the alternate option was presented or withheld to bias participants toward the sub-optimal choice. The results demonstrated that individuals high in narcissistic traits performed comparably to low narcissism individuals when foregone reward information was absent, but high narcissism individuals outperformed individuals low in narcissistic traits when misleading information was presented. The advantage for participants high in narcissistic traits was strongest within males, and, overall, males outperformed females when foregone rewards were present. While prior research emphasizes narcissists' decision-making deficits, our findings provide evidence that individuals high in narcissistic traits excel at decision-making tasks that involve disregarding ambiguous information and focusing on the long-term utility of each option. Their superior ability at filtering out misleading information may reflect an effort to maintain their self-view or avoid ego threat.",
|
532 |
+
]
|
533 |
+
embeddings = model.encode(sentences)
|
534 |
+
print(embeddings.shape)
|
535 |
+
# [3, 384]
|
536 |
+
|
537 |
+
# Get the similarity scores for the embeddings
|
538 |
+
similarities = model.similarity(embeddings, embeddings)
|
539 |
+
print(similarities.shape)
|
540 |
+
# [3, 3]
|
541 |
+
```
|
542 |
+
|
543 |
+
<!--
|
544 |
+
### Direct Usage (Transformers)
|
545 |
+
|
546 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
547 |
+
|
548 |
+
</details>
|
549 |
+
-->
|
550 |
+
|
551 |
+
<!--
|
552 |
+
### Downstream Usage (Sentence Transformers)
|
553 |
+
|
554 |
+
You can finetune this model on your own dataset.
|
555 |
+
|
556 |
+
<details><summary>Click to expand</summary>
|
557 |
+
|
558 |
+
</details>
|
559 |
+
-->
|
560 |
+
|
561 |
+
<!--
|
562 |
+
### Out-of-Scope Use
|
563 |
+
|
564 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
565 |
+
-->
|
566 |
+
|
567 |
+
<!--
|
568 |
+
## Bias, Risks and Limitations
|
569 |
+
|
570 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
571 |
+
-->
|
572 |
+
|
573 |
+
<!--
|
574 |
+
### Recommendations
|
575 |
+
|
576 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
577 |
+
-->
|
578 |
+
|
579 |
+
## Training Details
|
580 |
+
|
581 |
+
### Training Dataset
|
582 |
+
|
583 |
+
#### Unnamed Dataset
|
584 |
+
|
585 |
+
* Size: 50,000 training samples
|
586 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
587 |
+
* Approximate statistics based on the first 1000 samples:
|
588 |
+
| | sentence_0 | sentence_1 | label |
|
589 |
+
|:--------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
590 |
+
| type | string | string | float |
|
591 |
+
| details | <ul><li>min: 102 tokens</li><li>mean: 237.66 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 61 tokens</li><li>mean: 227.84 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.17</li><li>max: 0.9</li></ul> |
|
592 |
+
* Samples:
|
593 |
+
| sentence_0 | sentence_1 | label |
|
594 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
595 |
+
| <code>An article on behavioral reinforcement learning:<br><br>Title: Working memory and response selection: A computational account of interactions among cortico-basalganglio-thalamic loops.<br>Abstract: Cortico-basalganglio-thalamic loops are involved in both cognitive processes and motor control. We present a biologically meaningful computational model of how these loops contribute to the organization of working memory and the development of response behavior. Via reinforcement learning in basal ganglia, the model develops flexible control of working memory within prefrontal loops and achieves selection of appropriate responses based on working memory content and visual stimulation within a motor loop. We show that both working memory control and response selection can evolve within parallel and interacting cortico-basalganglio-thalamic loops by Hebbian and three-factor learning rules. Furthermore, the model gives a coherent explanation for how complex strategies of working memory control and respo...</code> | <code>An article on behavioral reinforcement learning:<br><br>Title: The role of basal ganglia in reinforcement learning and imprinting in domestic chicks.<br>Abstract: Effects of bilateral kainate lesions of telencephalic basal ganglia (lobus parolfactorius, LPO) were examined in domestic chicks. In the imprinting paradigm, where chicks learned to selectively approach a moving object without any explicitly associated reward, both the pre- and post-training lesions were without effects. On the other hand, in the water-reinforced pecking task, pre-training lesions of LPO severely impaired immediate reinforcement as well as formation of the association memory. However, post-training LPO lesions did not cause amnesia, and chicks selectively pecked at the reinforced color. The LPO could thus be involved specifically in the evaluation of present rewards and the instantaneous reinforcement of pecking, but not in the execution of selective behavior based on a memorized color cue.</code> | <code>0.5</code> |
|
596 |
+
| <code>An article on behavioral reinforcement learning:<br><br>Title: Exploration Disrupts Choice-Predictive Signals and Alters Dynamics in Prefrontal Cortex.<br>Abstract: In uncertain environments, decision-makers must balance two goals: they must “exploit” rewarding options but also “explore” in order to discover rewarding alternatives. Exploring and exploiting necessarily change how the brain responds to identical stimuli, but little is known about how these states, and transitions between them, change how the brain transforms sensory information into action. To address this question, we recorded neural activity in a prefrontal sensorimotor area while monkeys naturally switched between exploring and exploiting rewarding options. We found that exploration profoundly reduced spatially selective, choice-predictive activity in single neurons and delayed choice-predictive population dynamics. At the same time, reward learning was increased in brain and behavior. These results indicate that exploration i...</code> | <code>An article on behavioral reinforcement learning:<br><br>Title: Counterfactual choice and learning in a Neural Network centered on human lateral frontopolar cortex.<br>Abstract: Decision making and learning in a real-world context require organisms to track not only the choices they make and the outcomes that follow but also other untaken, or counterfactual, choices and their outcomes. Although the neural system responsible for tracking the value of choices actually taken is increasingly well understood, whether a neural system tracks counterfactual information is currently unclear. Using a three-alternative decision-making task, a Bayesian reinforcement-learning algorithm, and fMRI, we investigated the coding of counterfactual choices and prediction errors in the human brain. Rather than representing evidence favoring multiple counterfactual choices, lateral frontal polar cortex (lFPC), dorsomedial frontal cortex (DMFC), and posteromedial cortex (PMC) encode the reward-based evidence favoring t...</code> | <code>0.5</code> |
|
597 |
+
| <code>An article on behavioral reinforcement learning:<br><br>Title: Electrophysiological signatures of visual statistical learning in 3-month-old infants at familial and low risk for autism spectrum disorder.<br>Abstract: Visual statistical learning (VSL) refers to the ability to extract associations and conditional probabilities within the visual environment. It may serve as a precursor to cognitive and social communication development. Quantifying VSL in infants at familial risk (FR) for Autism Spectrum Disorder (ASD) provides opportunities to understand how genetic predisposition can influence early learning processes which may, in turn, lay a foundation for cognitive and social communication delays. We examined electroencephalography (EEG) signatures of VSL in 3-month-old infants, examining whether EEG correlates of VSL differentiated FR from low-risk (LR) infants. In an exploratory analysis, we then examined whether EEG correlates of VSL at 3 months relate to cognitive function and ASD symptoms...</code> | <code>An article on behavioral reinforcement learning:<br><br>Title: Reduced nucleus accumbens reactivity and adolescent depression following early-life stress.<br>Abstract: Depression is a common outcome for those having experienced early-life stress (ELS). For those individuals, depression typically increases during adolescence and appears to endure into adulthood, suggesting alterations in the development of brain systems involved in depression. Developmentally, the nucleus accumbens (NAcc), a limbic structure associated with reward learning and motivation, typically undergoes dramatic functional change during adolescence; therefore, age-related changes in NAcc function may underlie increases in depression in adolescence following ELS. The current study examined the effects of ELS in 38 previously institutionalized children and adolescents in comparison to a group of 31 youths without a history of ELS. Consistent with previous research, the findings showed that depression was higher in adolescents...</code> | <code>0.0</code> |
|
598 |
+
* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
|
599 |
+
```json
|
600 |
+
{
|
601 |
+
"loss_fct": "torch.nn.modules.loss.MSELoss"
|
602 |
+
}
|
603 |
+
```
|
604 |
+
|
605 |
+
### Training Hyperparameters
|
606 |
+
#### Non-Default Hyperparameters
|
607 |
+
|
608 |
+
- `per_device_train_batch_size`: 64
|
609 |
+
- `per_device_eval_batch_size`: 64
|
610 |
+
- `num_train_epochs`: 5
|
611 |
+
- `multi_dataset_batch_sampler`: round_robin
|
612 |
+
|
613 |
+
#### All Hyperparameters
|
614 |
+
<details><summary>Click to expand</summary>
|
615 |
+
|
616 |
+
- `overwrite_output_dir`: False
|
617 |
+
- `do_predict`: False
|
618 |
+
- `eval_strategy`: no
|
619 |
+
- `prediction_loss_only`: True
|
620 |
+
- `per_device_train_batch_size`: 64
|
621 |
+
- `per_device_eval_batch_size`: 64
|
622 |
+
- `per_gpu_train_batch_size`: None
|
623 |
+
- `per_gpu_eval_batch_size`: None
|
624 |
+
- `gradient_accumulation_steps`: 1
|
625 |
+
- `eval_accumulation_steps`: None
|
626 |
+
- `torch_empty_cache_steps`: None
|
627 |
+
- `learning_rate`: 5e-05
|
628 |
+
- `weight_decay`: 0.0
|
629 |
+
- `adam_beta1`: 0.9
|
630 |
+
- `adam_beta2`: 0.999
|
631 |
+
- `adam_epsilon`: 1e-08
|
632 |
+
- `max_grad_norm`: 1
|
633 |
+
- `num_train_epochs`: 5
|
634 |
+
- `max_steps`: -1
|
635 |
+
- `lr_scheduler_type`: linear
|
636 |
+
- `lr_scheduler_kwargs`: {}
|
637 |
+
- `warmup_ratio`: 0.0
|
638 |
+
- `warmup_steps`: 0
|
639 |
+
- `log_level`: passive
|
640 |
+
- `log_level_replica`: warning
|
641 |
+
- `log_on_each_node`: True
|
642 |
+
- `logging_nan_inf_filter`: True
|
643 |
+
- `save_safetensors`: True
|
644 |
+
- `save_on_each_node`: False
|
645 |
+
- `save_only_model`: False
|
646 |
+
- `restore_callback_states_from_checkpoint`: False
|
647 |
+
- `no_cuda`: False
|
648 |
+
- `use_cpu`: False
|
649 |
+
- `use_mps_device`: False
|
650 |
+
- `seed`: 42
|
651 |
+
- `data_seed`: None
|
652 |
+
- `jit_mode_eval`: False
|
653 |
+
- `use_ipex`: False
|
654 |
+
- `bf16`: False
|
655 |
+
- `fp16`: False
|
656 |
+
- `fp16_opt_level`: O1
|
657 |
+
- `half_precision_backend`: auto
|
658 |
+
- `bf16_full_eval`: False
|
659 |
+
- `fp16_full_eval`: False
|
660 |
+
- `tf32`: None
|
661 |
+
- `local_rank`: 0
|
662 |
+
- `ddp_backend`: None
|
663 |
+
- `tpu_num_cores`: None
|
664 |
+
- `tpu_metrics_debug`: False
|
665 |
+
- `debug`: []
|
666 |
+
- `dataloader_drop_last`: False
|
667 |
+
- `dataloader_num_workers`: 0
|
668 |
+
- `dataloader_prefetch_factor`: None
|
669 |
+
- `past_index`: -1
|
670 |
+
- `disable_tqdm`: False
|
671 |
+
- `remove_unused_columns`: True
|
672 |
+
- `label_names`: None
|
673 |
+
- `load_best_model_at_end`: False
|
674 |
+
- `ignore_data_skip`: False
|
675 |
+
- `fsdp`: []
|
676 |
+
- `fsdp_min_num_params`: 0
|
677 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
678 |
+
- `tp_size`: 0
|
679 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
680 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
681 |
+
- `deepspeed`: None
|
682 |
+
- `label_smoothing_factor`: 0.0
|
683 |
+
- `optim`: adamw_torch
|
684 |
+
- `optim_args`: None
|
685 |
+
- `adafactor`: False
|
686 |
+
- `group_by_length`: False
|
687 |
+
- `length_column_name`: length
|
688 |
+
- `ddp_find_unused_parameters`: None
|
689 |
+
- `ddp_bucket_cap_mb`: None
|
690 |
+
- `ddp_broadcast_buffers`: False
|
691 |
+
- `dataloader_pin_memory`: True
|
692 |
+
- `dataloader_persistent_workers`: False
|
693 |
+
- `skip_memory_metrics`: True
|
694 |
+
- `use_legacy_prediction_loop`: False
|
695 |
+
- `push_to_hub`: False
|
696 |
+
- `resume_from_checkpoint`: None
|
697 |
+
- `hub_model_id`: None
|
698 |
+
- `hub_strategy`: every_save
|
699 |
+
- `hub_private_repo`: None
|
700 |
+
- `hub_always_push`: False
|
701 |
+
- `gradient_checkpointing`: False
|
702 |
+
- `gradient_checkpointing_kwargs`: None
|
703 |
+
- `include_inputs_for_metrics`: False
|
704 |
+
- `include_for_metrics`: []
|
705 |
+
- `eval_do_concat_batches`: True
|
706 |
+
- `fp16_backend`: auto
|
707 |
+
- `push_to_hub_model_id`: None
|
708 |
+
- `push_to_hub_organization`: None
|
709 |
+
- `mp_parameters`:
|
710 |
+
- `auto_find_batch_size`: False
|
711 |
+
- `full_determinism`: False
|
712 |
+
- `torchdynamo`: None
|
713 |
+
- `ray_scope`: last
|
714 |
+
- `ddp_timeout`: 1800
|
715 |
+
- `torch_compile`: False
|
716 |
+
- `torch_compile_backend`: None
|
717 |
+
- `torch_compile_mode`: None
|
718 |
+
- `dispatch_batches`: None
|
719 |
+
- `split_batches`: None
|
720 |
+
- `include_tokens_per_second`: False
|
721 |
+
- `include_num_input_tokens_seen`: False
|
722 |
+
- `neftune_noise_alpha`: None
|
723 |
+
- `optim_target_modules`: None
|
724 |
+
- `batch_eval_metrics`: False
|
725 |
+
- `eval_on_start`: False
|
726 |
+
- `use_liger_kernel`: False
|
727 |
+
- `eval_use_gather_object`: False
|
728 |
+
- `average_tokens_across_devices`: False
|
729 |
+
- `prompts`: None
|
730 |
+
- `batch_sampler`: batch_sampler
|
731 |
+
- `multi_dataset_batch_sampler`: round_robin
|
732 |
+
|
733 |
+
</details>
|
734 |
+
|
735 |
+
### Training Logs
|
736 |
+
| Epoch | Step | Training Loss |
|
737 |
+
|:------:|:----:|:-------------:|
|
738 |
+
| 0.6394 | 500 | 0.0179 |
|
739 |
+
| 1.2788 | 1000 | 0.0124 |
|
740 |
+
| 1.9182 | 1500 | 0.0107 |
|
741 |
+
| 2.5575 | 2000 | 0.0092 |
|
742 |
+
| 3.1969 | 2500 | 0.0086 |
|
743 |
+
| 3.8363 | 3000 | 0.0078 |
|
744 |
+
| 4.4757 | 3500 | 0.0073 |
|
745 |
+
|
746 |
+
|
747 |
+
### Framework Versions
|
748 |
+
- Python: 3.13.2
|
749 |
+
- Sentence Transformers: 4.0.2
|
750 |
+
- Transformers: 4.50.0.dev0
|
751 |
+
- PyTorch: 2.6.0
|
752 |
+
- Accelerate: 1.5.2
|
753 |
+
- Datasets: 3.5.0
|
754 |
+
- Tokenizers: 0.21.1
|
755 |
+
|
756 |
+
## Citation
|
757 |
+
|
758 |
+
### BibTeX
|
759 |
+
|
760 |
+
#### Sentence Transformers
|
761 |
+
```bibtex
|
762 |
+
@inproceedings{reimers-2019-sentence-bert,
|
763 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
764 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
765 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
766 |
+
month = "11",
|
767 |
+
year = "2019",
|
768 |
+
publisher = "Association for Computational Linguistics",
|
769 |
+
url = "https://arxiv.org/abs/1908.10084",
|
770 |
+
}
|
771 |
+
```
|
772 |
+
|
773 |
+
<!--
|
774 |
+
## Glossary
|
775 |
+
|
776 |
+
*Clearly define terms in order to be accessible across audiences.*
|
777 |
+
-->
|
778 |
+
|
779 |
+
<!--
|
780 |
+
## Model Card Authors
|
781 |
+
|
782 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
783 |
+
-->
|
784 |
+
|
785 |
+
<!--
|
786 |
+
## Model Card Contact
|
787 |
+
|
788 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
789 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 6,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.50.0.dev0",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.0.2",
|
4 |
+
"transformers": "4.50.0.dev0",
|
5 |
+
"pytorch": "2.6.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18a8507cd33f1a7f86c4f9d6179c8694bde6dda77d02482ab21ef0515465fc4b
|
3 |
+
size 90864192
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 128,
|
51 |
+
"model_max_length": 256,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|