Upload fine-tuned model
Browse files- 1_Pooling/config.json +10 -0
- README.md +665 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- trainer_state.json +433 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,665 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:2752
|
8 |
+
- loss:TripletLoss
|
9 |
+
base_model: sentence-transformers/all-MiniLM-L12-v2
|
10 |
+
widget:
|
11 |
+
- source_sentence: The First Trust Financials AlphaDEX ETF (FXO) employs a strategic
|
12 |
+
management approach aimed at delivering investment results that align with the
|
13 |
+
StrataQuant® Financials Index. The ETF focuses primarily on large- and mid-cap
|
14 |
+
U.S. financial stocks, investing at least 90% of its net assets in securities
|
15 |
+
derived from the Russell 1000® Index. Utilizing the AlphaDEX® selection methodology,
|
16 |
+
FXO identifies and targets stocks poised to generate positive alpha by applying
|
17 |
+
a multi-factor, quantitative model. This model assesses potential outperformers
|
18 |
+
on a risk-adjusted basis, which facilitates the selection of securities that are
|
19 |
+
then tiered and equal-weighted, leading to a mid-cap bias and occasional tilts
|
20 |
+
toward non-financial sectors. The ETF undergoes a reconstitution and rebalancing
|
21 |
+
process on a quarterly basis, with the objective of outperforming traditional
|
22 |
+
passive indices, thereby enhancing returns for investors while maintaining a focus
|
23 |
+
on the financial sector.
|
24 |
+
sentences:
|
25 |
+
- The SPDR S&P Software & Services ETF (XSW) employs a strategic management approach
|
26 |
+
aimed at closely tracking the performance of the S&P Software & Services Select
|
27 |
+
Industry Index. By utilizing a sampling strategy, XSW invests a minimum of 80%
|
28 |
+
of its total assets in securities that fall within this index, which represents
|
29 |
+
a focused segment of the broader S&P Total Market Index, specifically targeting
|
30 |
+
the software and services sectors. To address the concentration risks often associated
|
31 |
+
with large-cap companies in the software industry, XSW adopts an equal-weighted
|
32 |
+
methodology. This approach mitigates the influence of larger firms and allows
|
33 |
+
for greater exposure to smaller, growth-oriented companies. Consequently, the
|
34 |
+
ETF encompasses a diverse array of software and services firms, with a particular
|
35 |
+
emphasis on the services sector. The index undergoes quarterly rebalancing, ensuring
|
36 |
+
that the portfolio remains diversified and aligned with its investment objectives,
|
37 |
+
thereby providing investors with a balanced exposure to this dynamic industry.
|
38 |
+
- The Direxion Energy Bull 2X Shares (ERX) ETF is strategically designed to provide
|
39 |
+
investors with 200% of the daily performance of the S&P Energy Select Sector Index.
|
40 |
+
This index encompasses large-cap U.S. energy companies, focusing on sectors such
|
41 |
+
as oil, gas, consumable fuels, and energy equipment and services. To achieve its
|
42 |
+
leveraged exposure, the fund allocates at least 80% of its net assets into financial
|
43 |
+
instruments like swap agreements and securities that directly track the performance
|
44 |
+
of the index. As a non-diversified and market-cap-weighted fund, ERX is concentrated
|
45 |
+
in a limited number of dominant firms within the energy sector. The ETF is primarily
|
46 |
+
intended for short-term trading, as it rebalances daily to maintain its leverage.
|
47 |
+
Investors should be aware that the returns of ERX can be volatile and unpredictable
|
48 |
+
over longer time frames due to factors like compounding and path dependency, making
|
49 |
+
it suitable for those with a high risk tolerance and a short investment horizon.
|
50 |
+
- The Direxion Financial Bull 3X Shares ETF (FAS) is strategically designed to deliver
|
51 |
+
300% of the daily performance of the Financials Select Sector Index, utilizing
|
52 |
+
a 3x leveraged exposure framework. This ETF is managed with a focus on short-term
|
53 |
+
tactical opportunities, employing daily rebalancing to align with the index's
|
54 |
+
movements. FAS allocates at least 80% of its net assets in a range of financial
|
55 |
+
instruments, including swap agreements, direct securities of the index, and ETFs
|
56 |
+
that mirror the index's composition. The targeted sectors encompass a broad spectrum
|
57 |
+
of the financial industry, such as financial services, insurance, banking, capital
|
58 |
+
markets, mortgage real estate investment trusts (REITs), and consumer finance.
|
59 |
+
Given its non-diversified nature and reliance on leverage, FAS is primarily suitable
|
60 |
+
for investors seeking short-term gains and is not recommended for long-term holding
|
61 |
+
due to the potential compounding effects and path dependency associated with leveraged
|
62 |
+
investments.
|
63 |
+
- source_sentence: The ProShares Big Data Refiners ETF (DAT) aims to track the performance
|
64 |
+
of the FactSet Big Data Refiners Index, focusing on global companies involved
|
65 |
+
in managing, storing, using, and analyzing large structured and unstructured datasets.
|
66 |
+
The fund invests at least 80% of its assets in index components or similar instruments,
|
67 |
+
targeting companies that derive at least 75% of their revenue from big data activities,
|
68 |
+
with adjustments if fewer than 25 companies meet this threshold. It employs a
|
69 |
+
market-cap-weighted approach, capping individual securities at 4.5%, and includes
|
70 |
+
firms from developed and emerging markets with a minimum market cap of $500 million
|
71 |
+
and a three-month average daily trading value of at least $1 million. The index
|
72 |
+
is reconstituted and rebalanced semiannually in June and December, and the fund
|
73 |
+
is non-diversified.
|
74 |
+
sentences:
|
75 |
+
- The Invesco S&P SmallCap Information Technology ETF (PSCT) is designed to replicate
|
76 |
+
the investment performance of the S&P SmallCap 600 Capped Information Technology
|
77 |
+
Index, allocating a minimum of 90% of its total assets to the securities within
|
78 |
+
this index. This index, curated by S&P Dow Jones Indices, evaluates the performance
|
79 |
+
of U.S. small-cap firms in the information technology sector, as categorized by
|
80 |
+
the Global Industry Classification Standard. PSCT provides focused exposure to
|
81 |
+
small-cap technology companies across various industries, including computer hardware,
|
82 |
+
software, internet services, electronics, semiconductors, and communication technologies.
|
83 |
+
The fund employs a market-cap-weighted approach, with individual security weights
|
84 |
+
capped at 22.5% and the total weight of securities exceeding 4.5% limited to 45%
|
85 |
+
of the portfolio. To preserve its focus on size, liquidity, and financial viability,
|
86 |
+
the index is rebalanced quarterly, ensuring an adaptive investment strategy that
|
87 |
+
aligns with evolving market conditions.
|
88 |
+
- 'The ALPS Active REIT ETF (ticker: REIT) is a type of investment fund that aims
|
89 |
+
to make money through both income from dividends and increases in the value of
|
90 |
+
its investments. It primarily invests at least 80% of its money in stocks of U.S.
|
91 |
+
Real Estate Investment Trusts (REITs), which are companies that own and manage
|
92 |
+
real estate properties. The fund mainly focuses on common stocks of these REITs
|
93 |
+
but also puts some money into other types of real estate-related stocks, like
|
94 |
+
preferred stocks and companies that operate in real estate. The fund''s managers
|
95 |
+
use a special method to assess the true value of the properties and the REITs
|
96 |
+
to make informed investment choices. It''s important to note that this ETF is
|
97 |
+
non-diversified, meaning it doesn''t spread its investments across many different
|
98 |
+
areas. Additionally, it changed its structure to a more transparent format on
|
99 |
+
August 22, 2023.'
|
100 |
+
- The First Trust Amex Biotech Index ETF (FBT) aims to replicate the performance
|
101 |
+
of the NYSE Arca Biotechnology Index by investing at least 90% of its net assets
|
102 |
+
in the index's securities. This equal-dollar weighted index comprises 30 leading
|
103 |
+
biotechnology companies, offering exposure to firms involved in biological processes
|
104 |
+
for product development and services. FBT's portfolio, reconstituted and rebalanced
|
105 |
+
quarterly, provides a concentrated yet broad exposure to the biotech sector, potentially
|
106 |
+
including pharmaceuticals and medical technology. The ETF's strategy ensures a
|
107 |
+
diversified investment in the dynamic biotech industry, reflecting both price
|
108 |
+
and yield movements before fees and expenses.
|
109 |
+
- source_sentence: The First Trust Utilities AlphaDEX ETF (FXU) seeks to achieve investment
|
110 |
+
results that correspond to the StrataQuant® Utilities Index, focusing on large-
|
111 |
+
and mid-cap utility firms in the US. The fund invests at least 90% of its net
|
112 |
+
assets in securities from the index, which is a modified equal-dollar weighted
|
113 |
+
index derived from the Russell 1000® Index. FXU employs the AlphaDEX® selection
|
114 |
+
methodology, using a quant-based model to select stocks based on growth and value
|
115 |
+
metrics, aiming to generate positive alpha. This smart beta approach results in
|
116 |
+
a portfolio with a significant tilt toward mid-caps and includes a notable allocation
|
117 |
+
to telecom companies. The index is reconstituted and rebalanced quarterly, offering
|
118 |
+
a strategic alternative to traditional market-like sector exposure.
|
119 |
+
sentences:
|
120 |
+
- The Goldman Sachs Future Consumer Equity ETF (GBUY) is an actively managed investment
|
121 |
+
vehicle aimed at delivering long-term capital appreciation by allocating a minimum
|
122 |
+
of 80% of its net assets to equity securities of both U.S. and international companies.
|
123 |
+
This ETF strategically targets global equities that resonate with the evolving
|
124 |
+
preferences and spending patterns of younger consumers, with a strong emphasis
|
125 |
+
on key themes such as technology adoption and lifestyle choices. GBUY utilizes
|
126 |
+
a fundamental investment approach, where the adviser plays a pivotal role in identifying
|
127 |
+
companies with robust growth potential and attractive valuations, without limitations
|
128 |
+
on market capitalization or geographic location. As a non-diversified fund, GBUY
|
129 |
+
possesses the flexibility to adjust its thematic investments over time, ensuring
|
130 |
+
responsiveness to the ever-changing landscape of consumer trends. This dynamic
|
131 |
+
approach allows investors to gain exposure to innovative sectors that are shaping
|
132 |
+
the future of consumer behavior.
|
133 |
+
- The Fidelity MSCI Utilities Index ETF (FUTY) is strategically designed to mirror
|
134 |
+
the performance of the MSCI USA IMI Utilities 25/50 Index, which encompasses the
|
135 |
+
U.S. utilities sector. The management strategy emphasizes a market-cap-weighted
|
136 |
+
approach, directing at least 80% of the fund's assets into securities that align
|
137 |
+
with this index. While the ETF may not replicate every security within the index,
|
138 |
+
it adheres to strict diversification guidelines mandated by the U.S. Internal
|
139 |
+
Revenue Code. This includes a limit where no single issuer exceeds 25% of the
|
140 |
+
fund's assets and the combined weight of issuers over 5% is capped at 50%. By
|
141 |
+
focusing exclusively on the utilities sector, FUTY targets companies involved
|
142 |
+
in essential services such as electric, gas, and water utilities, as well as renewable
|
143 |
+
energy providers. This sector concentration allows for a nuanced investment strategy
|
144 |
+
that can capitalize on the specific dynamics of the utilities market. FUTY competes
|
145 |
+
with similar offerings, such as Vanguard's VPU, providing investors with liquidity
|
146 |
+
and the potential for modest trading spreads.
|
147 |
+
- The Global X U.S. Infrastructure Development ETF (PAVE) aims to replicate the
|
148 |
+
performance of the Indxx U.S. Infrastructure Development Index by allocating a
|
149 |
+
minimum of 80% of its assets to the index's underlying securities. This market-cap-weighted
|
150 |
+
index targets U.S.-listed companies that generate over 50% of their revenue from
|
151 |
+
domestic infrastructure development. PAVE encompasses a diverse range of sectors,
|
152 |
+
including construction, engineering, raw materials production, industrial transportation,
|
153 |
+
and heavy construction equipment, while deliberately excluding Master Limited
|
154 |
+
Partnerships (MLPs), Real Estate Investment Trusts (REITs), and Business Development
|
155 |
+
Companies (BDCs). The ETF employs a strategy of diversification through annual
|
156 |
+
reconstitution and rebalancing, maintaining a single security cap of 3% and a
|
157 |
+
minimum allocation of 0.3%. This approach ensures exposure to a balanced mix of
|
158 |
+
large-, mid-, and small-cap companies, aligning with key investment themes in
|
159 |
+
the U.S. infrastructure landscape.
|
160 |
+
- source_sentence: The First Trust Nasdaq Transportation ETF (FTXR) seeks to replicate
|
161 |
+
the performance of the Nasdaq US Smart Transportation TM Index by allocating a
|
162 |
+
minimum of 90% of its net assets to the securities within the index. This non-diversified
|
163 |
+
fund strategically targets 30 U.S. transportation companies, carefully selected
|
164 |
+
for their liquidity and ranked based on key criteria such as growth, value, and
|
165 |
+
volatility. The ETF encompasses a diverse range of sectors within transportation,
|
166 |
+
including delivery, shipping, railroads, trucking, and airlines. The weighting
|
167 |
+
of each stock in the portfolio is based on its growth potential, value proposition,
|
168 |
+
and historical price stability, ensuring that no single investment exceeds 8%
|
169 |
+
of total holdings. To maintain its strategic alignment, the index is reconstituted
|
170 |
+
annually and rebalanced quarterly, reinforcing FTXR's focus on capturing essential
|
171 |
+
trends in the transportation sector.
|
172 |
+
sentences:
|
173 |
+
- 'The WisdomTree Trust WisdomTree Bat ETF (WBAT) utilizes a passive management
|
174 |
+
approach to replicate the performance of the WisdomTree Battery Value Chain and
|
175 |
+
Innovation Index. This index provides comprehensive global exposure to firms primarily
|
176 |
+
engaged in battery and energy storage solutions (BESS) and related innovations.
|
177 |
+
The ETF strategically targets four critical sectors of the value chain: raw materials,
|
178 |
+
manufacturing, enabling technologies, and emerging innovations. To qualify for
|
179 |
+
inclusion, companies must generate at least 50% of their revenue from these areas
|
180 |
+
or from innovative activities. The index employs a multi-factor methodology, assessing
|
181 |
+
companies based on their level of involvement in the sector and a composite risk
|
182 |
+
score, while imposing a 3.5% cap on individual issuers to mitigate concentration
|
183 |
+
risk. As a non-diversified fund, WBAT rebalances semi-annually, ensuring its alignment
|
184 |
+
with the index''s tier-weighted framework.'
|
185 |
+
- "The Invesco Pharmaceuticals ETF (PJP) is an investment fund that focuses on U.S.\
|
186 |
+
\ pharmaceutical companies. These are businesses involved in making and selling\
|
187 |
+
\ medications. The goal of the ETF is to follow the performance of a specific\
|
188 |
+
\ index that tracks these pharmaceutical companies. \n\nThe fund puts at least\
|
189 |
+
\ 90% of its money into stocks from this index, which includes around 30 companies.\
|
190 |
+
\ To choose which stocks to invest in, it uses a special method that looks at\
|
191 |
+
\ factors like how well a stock's price is doing, how companies are performing\
|
192 |
+
\ financially, and their overall value. This approach often favors smaller and\
|
193 |
+
\ mid-sized companies rather than very large ones, which helps spread out the\
|
194 |
+
\ risk.\n\nThe ETF is re-evaluated and adjusted every few months (in February,\
|
195 |
+
\ May, August, and November) to keep it aligned with the index. It is considered\
|
196 |
+
\ non-diversified, meaning it focuses on a specific area rather than a wide range\
|
197 |
+
\ of sectors. Before August 28, 2023, this ETF was called the Invesco Dynamic\
|
198 |
+
\ Pharmaceuticals ETF."
|
199 |
+
- The Vanguard Real Estate ETF (VNQ) employs a strategic management approach aimed
|
200 |
+
at generating substantial income and moderate long-term capital appreciation by
|
201 |
+
closely tracking the MSCI US Investable Market Real Estate 25/50 Index. This index
|
202 |
+
encompasses a diverse range of publicly traded equity Real Estate Investment Trusts
|
203 |
+
(REITs) and other real estate-related entities within the United States. VNQ's
|
204 |
+
investment strategy involves allocating nearly all of its assets to the stocks
|
205 |
+
that comprise the index, meticulously maintaining each stock's proportional weighting
|
206 |
+
to ensure alignment with index performance. The fund primarily targets the commercial
|
207 |
+
REIT sector, displaying a notable bias toward this area over specialized REITs,
|
208 |
+
which allows for focused exposure to income-generating properties such as office
|
209 |
+
buildings, retail spaces, and industrial facilities. Despite the minor inconvenience
|
210 |
+
of monthly holdings disclosure, VNQ is recognized for its efficient management
|
211 |
+
practices, often resulting in actual costs that fall below its stated expense
|
212 |
+
ratio. It is important to note that distributions from the fund are taxed as ordinary
|
213 |
+
income, consistent with typical REIT investment structures.
|
214 |
+
- source_sentence: The KraneShares Emerging Markets Consumer Technology ETF (KEMQ)
|
215 |
+
aims to track the Solactive Emerging Market Consumer Technology Index, investing
|
216 |
+
at least 80% of its net assets in instruments within or similar to its underlying
|
217 |
+
index. This index comprises the equity securities of the 50 largest companies
|
218 |
+
by market capitalization, primarily from emerging and frontier markets, focusing
|
219 |
+
on the consumer and technology sectors. KEMQ offers concentrated exposure to emerging
|
220 |
+
market tech companies, selected by a committee and tier-weighted based on market
|
221 |
+
cap. The largest 10 securities are weighted at 3.5% each, the next 20 at 2.5%
|
222 |
+
each, and the remaining 20 at 0.75% each. The index is reviewed and adjusted quarterly
|
223 |
+
to ensure it reflects the most relevant market opportunities.
|
224 |
+
sentences:
|
225 |
+
- The First Trust Consumer Discretionary AlphaDEX® ETF (FXD) is designed to outperform
|
226 |
+
the US consumer discretionary sector by tracking the StrataQuant® Consumer Discretionary
|
227 |
+
Index. This index is a modified equal-dollar weighted benchmark that selects stocks
|
228 |
+
from the Russell 1000® using the innovative AlphaDEX® methodology. This approach
|
229 |
+
incorporates both value and growth criteria to identify stocks with the potential
|
230 |
+
for positive alpha. FXD strategically invests at least 90% of its net assets in
|
231 |
+
these selected securities, resulting in notable mid-cap exposure and distinct
|
232 |
+
industry tilts that differentiate it from traditional sector-focused investments.
|
233 |
+
The fund employs a quasi-active selection process, reconstituted and rebalanced
|
234 |
+
on a quarterly basis, making it an appealing choice for investors seeking higher
|
235 |
+
returns rather than mere sector replication.
|
236 |
+
- "The Invesco S&P 500 Equal Weight Health Care ETF (RSPH) is an investment fund\
|
237 |
+
\ that aims to match the performance of a specific group of health care companies\
|
238 |
+
\ in the S&P 500. This ETF puts most of its money—at least 90%—into stocks of\
|
239 |
+
\ these health care companies. The goal is to give investors a way to invest in\
|
240 |
+
\ the health care sector, which includes everything from pharmaceuticals to medical\
|
241 |
+
\ devices. \n\nWhat makes this ETF special is its equal weight strategy. This\
|
242 |
+
\ means that each company in the fund has the same importance in the performance\
|
243 |
+
\ of the ETF, regardless of how big or small it is. This approach helps to spread\
|
244 |
+
\ risk, as it prevents any one company from having too much influence on how the\
|
245 |
+
\ ETF performs. Overall, RSPH offers a balanced way to invest in health care stocks\
|
246 |
+
\ without being overly dependent on a few large companies."
|
247 |
+
- The SPDR S&P Global Infrastructure ETF (GII) employs a strategic management approach
|
248 |
+
aimed at closely tracking the S&P Global Infrastructure Index. To achieve this,
|
249 |
+
the ETF allocates a minimum of 80% of its assets to the securities included in
|
250 |
+
the index and their related depositary receipts. The index comprises 75 of the
|
251 |
+
largest publicly listed infrastructure companies worldwide, selected based on
|
252 |
+
stringent investability criteria. GII specifically targets firms within the energy,
|
253 |
+
transportation, and utility sectors, maintaining a diversified portfolio with
|
254 |
+
a composition of 30 transportation companies, 30 utility companies, and 15 energy
|
255 |
+
companies. To enhance diversification and mitigate concentration risk, sector
|
256 |
+
weights are capped at 40% for transportation and utilities, and 20% for energy.
|
257 |
+
Furthermore, the fund limits the weight of any single security to a maximum of
|
258 |
+
5%. Within each sector, stocks are weighted according to market capitalization.
|
259 |
+
GII undergoes substantial adjustments during its semi-annual rebalancing, ensuring
|
260 |
+
alignment with the evolving market landscape while adhering to its investment
|
261 |
+
strategy.
|
262 |
+
datasets:
|
263 |
+
- suhwan3/stage1_v1
|
264 |
+
pipeline_tag: sentence-similarity
|
265 |
+
library_name: sentence-transformers
|
266 |
+
---
|
267 |
+
|
268 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L12-v2
|
269 |
+
|
270 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) on the [stage1_v1](https://huggingface.co/datasets/suhwan3/stage1_v1) dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
271 |
+
|
272 |
+
## Model Details
|
273 |
+
|
274 |
+
### Model Description
|
275 |
+
- **Model Type:** Sentence Transformer
|
276 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) <!-- at revision c004d8e3e901237d8fa7e9fff12774962e391ce5 -->
|
277 |
+
- **Maximum Sequence Length:** 128 tokens
|
278 |
+
- **Output Dimensionality:** 384 dimensions
|
279 |
+
- **Similarity Function:** Cosine Similarity
|
280 |
+
- **Training Dataset:**
|
281 |
+
- [stage1_v1](https://huggingface.co/datasets/suhwan3/stage1_v1)
|
282 |
+
<!-- - **Language:** Unknown -->
|
283 |
+
<!-- - **License:** Unknown -->
|
284 |
+
|
285 |
+
### Model Sources
|
286 |
+
|
287 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
288 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
289 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
290 |
+
|
291 |
+
### Full Model Architecture
|
292 |
+
|
293 |
+
```
|
294 |
+
SentenceTransformer(
|
295 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
296 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
297 |
+
(2): Normalize()
|
298 |
+
)
|
299 |
+
```
|
300 |
+
|
301 |
+
## Usage
|
302 |
+
|
303 |
+
### Direct Usage (Sentence Transformers)
|
304 |
+
|
305 |
+
First install the Sentence Transformers library:
|
306 |
+
|
307 |
+
```bash
|
308 |
+
pip install -U sentence-transformers
|
309 |
+
```
|
310 |
+
|
311 |
+
Then you can load this model and run inference.
|
312 |
+
```python
|
313 |
+
from sentence_transformers import SentenceTransformer
|
314 |
+
|
315 |
+
# Download from the 🤗 Hub
|
316 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
317 |
+
# Run inference
|
318 |
+
sentences = [
|
319 |
+
'The KraneShares Emerging Markets Consumer Technology ETF (KEMQ) aims to track the Solactive Emerging Market Consumer Technology Index, investing at least 80% of its net assets in instruments within or similar to its underlying index. This index comprises the equity securities of the 50 largest companies by market capitalization, primarily from emerging and frontier markets, focusing on the consumer and technology sectors. KEMQ offers concentrated exposure to emerging market tech companies, selected by a committee and tier-weighted based on market cap. The largest 10 securities are weighted at 3.5% each, the next 20 at 2.5% each, and the remaining 20 at 0.75% each. The index is reviewed and adjusted quarterly to ensure it reflects the most relevant market opportunities.',
|
320 |
+
'The First Trust Consumer Discretionary AlphaDEX® ETF (FXD) is designed to outperform the US consumer discretionary sector by tracking the StrataQuant® Consumer Discretionary Index. This index is a modified equal-dollar weighted benchmark that selects stocks from the Russell 1000® using the innovative AlphaDEX® methodology. This approach incorporates both value and growth criteria to identify stocks with the potential for positive alpha. FXD strategically invests at least 90% of its net assets in these selected securities, resulting in notable mid-cap exposure and distinct industry tilts that differentiate it from traditional sector-focused investments. The fund employs a quasi-active selection process, reconstituted and rebalanced on a quarterly basis, making it an appealing choice for investors seeking higher returns rather than mere sector replication.',
|
321 |
+
'The SPDR S&P Global Infrastructure ETF (GII) employs a strategic management approach aimed at closely tracking the S&P Global Infrastructure Index. To achieve this, the ETF allocates a minimum of 80% of its assets to the securities included in the index and their related depositary receipts. The index comprises 75 of the largest publicly listed infrastructure companies worldwide, selected based on stringent investability criteria. GII specifically targets firms within the energy, transportation, and utility sectors, maintaining a diversified portfolio with a composition of 30 transportation companies, 30 utility companies, and 15 energy companies. To enhance diversification and mitigate concentration risk, sector weights are capped at 40% for transportation and utilities, and 20% for energy. Furthermore, the fund limits the weight of any single security to a maximum of 5%. Within each sector, stocks are weighted according to market capitalization. GII undergoes substantial adjustments during its semi-annual rebalancing, ensuring alignment with the evolving market landscape while adhering to its investment strategy.',
|
322 |
+
]
|
323 |
+
embeddings = model.encode(sentences)
|
324 |
+
print(embeddings.shape)
|
325 |
+
# [3, 384]
|
326 |
+
|
327 |
+
# Get the similarity scores for the embeddings
|
328 |
+
similarities = model.similarity(embeddings, embeddings)
|
329 |
+
print(similarities.shape)
|
330 |
+
# [3, 3]
|
331 |
+
```
|
332 |
+
|
333 |
+
<!--
|
334 |
+
### Direct Usage (Transformers)
|
335 |
+
|
336 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
337 |
+
|
338 |
+
</details>
|
339 |
+
-->
|
340 |
+
|
341 |
+
<!--
|
342 |
+
### Downstream Usage (Sentence Transformers)
|
343 |
+
|
344 |
+
You can finetune this model on your own dataset.
|
345 |
+
|
346 |
+
<details><summary>Click to expand</summary>
|
347 |
+
|
348 |
+
</details>
|
349 |
+
-->
|
350 |
+
|
351 |
+
<!--
|
352 |
+
### Out-of-Scope Use
|
353 |
+
|
354 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
355 |
+
-->
|
356 |
+
|
357 |
+
<!--
|
358 |
+
## Bias, Risks and Limitations
|
359 |
+
|
360 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
361 |
+
-->
|
362 |
+
|
363 |
+
<!--
|
364 |
+
### Recommendations
|
365 |
+
|
366 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
367 |
+
-->
|
368 |
+
|
369 |
+
## Training Details
|
370 |
+
|
371 |
+
### Training Dataset
|
372 |
+
|
373 |
+
#### stage1_v1
|
374 |
+
|
375 |
+
* Dataset: [stage1_v1](https://huggingface.co/datasets/suhwan3/stage1_v1) at [9be9e9c](https://huggingface.co/datasets/suhwan3/stage1_v1/tree/9be9e9c7f0032b5abd62092b14f9501b7aac58c8)
|
376 |
+
* Size: 2,752 training samples
|
377 |
+
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
378 |
+
* Approximate statistics based on the first 1000 samples:
|
379 |
+
| | query | positive | negative |
|
380 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
381 |
+
| type | string | string | string |
|
382 |
+
| details | <ul><li>min: 123 tokens</li><li>mean: 128.0 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 123 tokens</li><li>mean: 128.0 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 128 tokens</li><li>mean: 128.0 tokens</li><li>max: 128 tokens</li></ul> |
|
383 |
+
* Samples:
|
384 |
+
| query | positive | negative |
|
385 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
386 |
+
| <code>The Global X Aging Population ETF (AGNG) is a fund designed to invest in companies that benefit from the growing number of older people in the world. It focuses on businesses in developed countries that help improve and extend the lives of seniors. This includes companies that work in areas like biotechnology, medical devices, pharmaceuticals, senior living facilities, and healthcare services. The fund aims to support the aging population trend by investing over 80% of its money in these sectors. <br><br>AGNG uses a special method to choose its investments, looking at a variety of businesses, including those in insurance and consumer products. The fund is updated once a year to make sure it stays balanced and diverse, meaning it spreads its investments across different kinds of companies. Before April 2021, it was called the Global X Longevity Thematic ETF and went by the ticker LNGR. This ETF is a way for investors to tap into the growing market of services and products for seniors.</code> | <code>The Amplify High Income ETF (YYY) is a fund of funds that aims to replicate the performance of the ISE High Income™ Index by investing at least 80% of its net assets in securities of the index. This index comprises the top 60 U.S. exchange-listed closed-end funds (CEFs), selected and weighted based on yield, discount to NAV, and trading volume. YYY typically holds about 30 CEFs, with a maximum weight of 4.25% per fund at rebalance, and can include funds across major asset classes. The ETF's strategy focuses on acquiring discounted CEFs with high yields and sufficient liquidity to minimize trading costs. YYY's fee structure includes the expenses of its constituent funds. The fund was reorganized under Amplify ETFs in 2019, maintaining its investment objectives and index.</code> | <code>The iShares Copper and Metals Mining ETF (ICOP) is strategically designed to replicate the performance of the STOXX Global Copper and Metals Mining Index, concentrating on equities from both U.S. and international companies primarily involved in copper and metal ore extraction. The fund commits at least 80% of its assets to the index's component securities, allowing for up to 20% allocation to derivatives such as futures, options, and swaps, as well as cash and equivalents. ICOP employs a market-capitalization weighted strategy, categorizing companies into three tiers based on their revenue exposure to copper mining: Tier 1 encompasses firms with over 50% revenue from copper, Tier 2 includes those with 25-50%, and Tier 3 comprises companies determined by market share. The index undergoes quarterly rebalancing, implementing caps of 8% on individual holdings and limiting those exceeding 4.5% to a combined weight of 45%. This non-diversified fund provides concentrated exposure specificall...</code> |
|
387 |
+
| <code>The Global X Aging Population ETF (AGNG) seeks to track the performance of the Indxx Aging Population Thematic Index, investing over 80% of its assets in securities from developed markets that support the demographic trend of longer life spans. The fund targets companies involved in biotechnology, medical devices, pharmaceuticals, senior living facilities, and specialized healthcare services, focusing on enhancing and extending the lives of senior citizens. AGNG employs a proprietary research and analysis process, crossing traditional sector lines to include diverse businesses such as insurance and consumer products. The ETF is reconstituted and rebalanced annually, using a modified market-cap weighting with specific caps and floors to ensure diversification. Prior to April 2021, it was known as the Global X Longevity Thematic ETF under the ticker LNGR.</code> | <code>The iShares Biotechnology ETF (IBB) aims to track the performance of the NYSE Biotechnology Index, which comprises U.S.-listed biotechnology companies. These companies are involved in the research and development of therapeutic treatments and the production of tools or systems for biotechnology processes, excluding those focused on mass pharmaceutical production. IBB invests at least 80% of its assets in the index's component securities and up to 20% in futures, options, swap contracts, cash, and equivalents. The fund employs a modified market-cap-weighted methodology, capping the five largest constituents at 8% and others at 4%. It is non-diversified, rebalances quarterly, and fully reconstitutes annually in December. Prior to June 21, 2021, it was known as the iShares Nasdaq Biotechnology ETF.</code> | <code>The Invesco Global Clean Energy ETF (PBD) is designed to track the WilderHill New Energy Global Innovation Index, dedicating a minimum of 90% of its assets to securities within this index, which includes American Depositary Receipts (ADRs) and Global Depositary Receipts (GDRs). The index predominantly features companies committed to **clean energy technologies**, **conservation**, **efficiency**, and the **advancement of renewable energy**. While PBD is passively managed, it employs a strategy akin to active management by focusing on companies with significant **capital appreciation potential**, particularly emphasizing **pure-play small- and mid-cap firms**. The fund boasts a **global diversification**, with approximately half of its assets allocated internationally, while maintaining a limit of 5% on its largest holdings. The index undergoes **quarterly rebalancing and reconstitution**, ensuring a dynamic and varied portfolio that reflects the evolving landscape of the clean energy s...</code> |
|
388 |
+
| <code>The Global X Aging Population ETF (AGNG) is strategically designed to track the performance of the Indxx Aging Population Thematic Index, focusing on the investment potential arising from the global demographic shift towards longer life spans. The ETF allocates over 80% of its assets to securities primarily in developed markets that are aligned with this trend. Target sectors include biotechnology, medical devices, pharmaceuticals, senior living facilities, and specialized healthcare services, all aimed at improving the quality of life for senior citizens. Additionally, AGNG incorporates a broader investment approach by including companies from diverse sectors such as insurance and consumer products, which are relevant to aging populations. The fund employs a proprietary research and analysis methodology that transcends traditional sector boundaries. It is reconstituted and rebalanced annually, utilizing a modified market-cap weighting approach that includes specific caps and floors to...</code> | <code>The iShares U.S. Health Care Providers ETF (IHF) employs a strategy aimed at closely tracking the performance of the Dow Jones U.S. Select Health Care Providers Index. This ETF is managed by investing at least 80% of its assets in the securities of companies that constitute the index, which primarily includes U.S. firms operating within the healthcare services sector. The remaining 20% of the fund's assets may be allocated to various financial instruments such as futures, options, swaps, cash, and cash equivalents to enhance liquidity and manage risk. IHF strategically targets key sectors within the healthcare provider landscape, focusing on managed healthcare, healthcare facilities, and health insurance companies, while deliberately excluding pharmaceutical firms. This approach allows IHF to offer cap-weighted exposure tailored to the healthcare provider space, providing investors with a concentrated yet comprehensive investment vehicle that captures the dynamics of health insurance a...</code> | <code>The First Trust Indxx NextG ETF (NXTG) seeks to replicate the performance of the Indxx 5G & NextG Thematic Index by investing at least 90% of its net assets in the index's securities. This index tracks global equities of companies that are significantly investing in the research, development, and application of fifth generation (5G) and next generation digital cellular technologies. NXTG includes mid- and large-cap companies from two main sub-themes: 5G infrastructure & hardware, which encompasses data center REITs, cell tower REITs, equipment manufacturers, network testing and validation equipment, and mobile phone manufacturers; and telecommunication service providers operating cellular and wireless communication networks with 5G access. Prior to May 29, 2019, NXTG was known as the First Trust NASDAQ Smartphone Index Fund (ticker FONE), focusing more broadly on the cellular phone industry.</code> |
|
389 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
390 |
+
```json
|
391 |
+
{
|
392 |
+
"distance_metric": "TripletDistanceMetric.COSINE",
|
393 |
+
"triplet_margin": 0.5
|
394 |
+
}
|
395 |
+
```
|
396 |
+
|
397 |
+
### Evaluation Dataset
|
398 |
+
|
399 |
+
#### stage1_v1
|
400 |
+
|
401 |
+
* Dataset: [stage1_v1](https://huggingface.co/datasets/suhwan3/stage1_v1) at [9be9e9c](https://huggingface.co/datasets/suhwan3/stage1_v1/tree/9be9e9c7f0032b5abd62092b14f9501b7aac58c8)
|
402 |
+
* Size: 688 evaluation samples
|
403 |
+
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
404 |
+
* Approximate statistics based on the first 688 samples:
|
405 |
+
| | query | positive | negative |
|
406 |
+
|:--------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
407 |
+
| type | string | string | string |
|
408 |
+
| details | <ul><li>min: 123 tokens</li><li>mean: 127.99 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 123 tokens</li><li>mean: 127.99 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 120 tokens</li><li>mean: 127.99 tokens</li><li>max: 128 tokens</li></ul> |
|
409 |
+
* Samples:
|
410 |
+
| query | positive | negative |
|
411 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
412 |
+
| <code>The Global X Aging Population ETF (AGNG) aims to replicate the performance of the Indxx Aging Population Thematic Index by investing over 80% of its assets in securities from developed markets that capitalize on the trend of increasing life expectancies. The fund primarily focuses on companies engaged in biotechnology, medical devices, pharmaceuticals, senior living facilities, and specialized healthcare services, all aimed at enhancing and extending the quality of life for senior citizens. AGNG employs a proprietary research methodology that transcends traditional sector boundaries, incorporating a diverse range of industries, including insurance and consumer products. The ETF is reconstituted and rebalanced annually, utilizing a modified market-cap weighting approach with specific caps and floors to maintain diversification. Previously known as the Global X Longevity Thematic ETF under the ticker LNGR until April 2021, AGNG continues to align its investments with key demographic shif...</code> | <code>The SPDR S&P Biotech ETF (XBI) employs a strategic management approach aimed at closely tracking the performance of the S&P Biotechnology Select Industry Index through a sampling strategy. By investing a minimum of 80% of its total assets in the securities of this index, XBI focuses specifically on the biotechnology sector, which is a subset of the broader S&P Total Market Index. The ETF is distinguished by its equal-weighted methodology, which ensures diversified exposure across U.S. biotech stocks, particularly emphasizing small- and micro-cap companies. This approach mitigates single-name risk by reducing the influence of larger companies, resulting in a lower weighted-average market capitalization relative to its competitors. Additionally, the ETF's structure limits overlap with the pharmaceutical industry, allowing for a more concentrated investment in innovative biotech firms. The index undergoes quarterly rebalancing, which supports its commitment to maintaining a focused invest...</code> | <code>The VanEck Mortgage REIT Income ETF (MORT) employs a strategic approach to replicate the performance of the MVIS® US Mortgage REITs Index, focusing on a diverse range of mortgage real estate investment trusts (REITs). By allocating at least 80% of its total assets to securities within this benchmark, MORT targets companies across various market capitalizations, including small-, medium-, and large-cap mortgage REITs. The ETF is managed with a market-cap-weighted strategy, ensuring that larger companies have a more significant influence on its performance. While MORT features a lower expense ratio compared to its peer, the iShares Mortgage Real Estate Capped ETF (REM), it does experience challenges with liquidity. The fund maintains a concentrated portfolio, heavily aligned with its top holdings, which allows for targeted exposure to the mortgage REIT sector. This management strategy positions MORT as a compelling choice for investors seeking specialized investments in the mortgage REIT...</code> |
|
413 |
+
| <code>The Global X Aging Population ETF (AGNG) aims to replicate the performance of the Indxx Aging Population Thematic Index by investing over 80% of its assets in securities from developed markets that capitalize on the trend of increasing life expectancies. The fund primarily focuses on companies engaged in biotechnology, medical devices, pharmaceuticals, senior living facilities, and specialized healthcare services, all aimed at enhancing and extending the quality of life for senior citizens. AGNG employs a proprietary research methodology that transcends traditional sector boundaries, incorporating a diverse range of industries, including insurance and consumer products. The ETF is reconstituted and rebalanced annually, utilizing a modified market-cap weighting approach with specific caps and floors to maintain diversification. Previously known as the Global X Longevity Thematic ETF under the ticker LNGR until April 2021, AGNG continues to align its investments with key demographic shif...</code> | <code>The Range Cancer Therapeutics ETF (CNCR) is designed to track the Range Oncology Therapeutics Index, targeting U.S. exchange-listed pharmaceutical and biotechnology stocks, as well as American Depository Receipts (ADRs) with market capitalizations exceeding $250 million. Launched in 2023 by Range Fund Holdings, CNCR strategically allocates a minimum of 80% of its assets to the securities within the index. This ETF provides equal-weighted exposure to companies engaged in the research, development, and commercialization of oncology drugs, placing a spotlight on smaller firms with significant growth potential. CNCR is particularly appealing to investors focused on the cancer therapeutics sector. The ETF, formerly known as the Loncar Cancer Immunotherapy ETF, broadened its investment scope in October 2023 by merging with the Loncar China BioPharma ETF, thereby enhancing its exposure to promising opportunities in the Chinese markets.</code> | <code>The Invesco S&P 500 Equal Weight Energy ETF (RSPG) is designed to replicate the performance of the S&P 500® Equal Weight Energy Index by investing a minimum of 90% of its total assets in securities that compose this index. This index includes all companies within the S&P 500® Energy Index that fall under the energy sector, as defined by the Global Industry Classification Standard (GICS). As a large-cap sector fund, RSPG offers equal-weight exposure to a diverse array of U.S. energy companies across various sub-industries, enhancing portfolio diversification. The fund is rebalanced quarterly to ensure a minimum inclusion of 22 companies, and it may also incorporate leading firms from the S&P MidCap 400 Index if necessary to maintain this threshold. Notably, prior to June 7, 2023, RSPG was traded under the ticker RYE.</code> |
|
414 |
+
| <code>The First Trust RBA American Industrial Renaissance ETF (AIRR) is designed to closely track the performance of the Richard Bernstein Advisors American Industrial Renaissance® Index. This passively managed fund allocates a minimum of 90% of its net assets to equity securities within the index, emphasizing small and mid-cap U.S. companies primarily in the industrial and community banking sectors. Key industries targeted include Commercial Services & Supplies, Construction & Engineering, Electrical Equipment, Machinery, and Banks. The index utilizes a multifactor selection approach, systematically excluding firms with more than 25% of sales from outside the U.S. and community banks situated outside traditional Midwestern manufacturing regions. A proprietary optimization method is applied for weighting, limiting the banking sector to 10% and individual issuers to 4%. The index undergoes quarterly reconstitution and rebalancing, maintaining a focus on companies with a favorable 12-month for...</code> | <code>The Invesco Global Water ETF (PIO) aims to track the investment results of the NASDAQ OMX Global Water Index, investing at least 90% of its assets in securities within the index, including ADRs and GDRs. This index comprises global exchange-listed companies from the U.S., developed, and emerging markets that produce water conservation and purification products for homes, businesses, and industries. PIO employs a liquidity-weighted strategy, resulting in a concentrated portfolio dominated by large- to mid-cap firms. Eligible companies must participate in the Green Economy, as determined by SustainableBusiness.com LLC. The fund uses full replication to track its index, with quarterly rebalancing and annual reconstitution, while maintaining country and issuer diversification limits.</code> | <code>The Jacob Funds Inc. Jacob Forward ETF (JFWD) is actively managed with a focus on achieving long-term capital growth by investing in equity securities of U.S. companies engaged in innovative and disruptive technologies. The fund primarily holds common stocks but may also include other equity securities like preferred stocks, rights, or warrants. It targets companies of all sizes, with a significant emphasis on those in the early stages of development, particularly within the healthcare and information technology sectors. JFWD employs a forward-looking investment strategy, selecting securities based on a qualitative and quantitative assessment of companies' potential for above-average growth. The fund may also gain up to 25% foreign market exposure through global operations of U.S. companies. Notably, JFWD is non-diversified and will be delisted, with its last trading day on December 23, 2024.</code> |
|
415 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
416 |
+
```json
|
417 |
+
{
|
418 |
+
"distance_metric": "TripletDistanceMetric.COSINE",
|
419 |
+
"triplet_margin": 0.5
|
420 |
+
}
|
421 |
+
```
|
422 |
+
|
423 |
+
### Training Hyperparameters
|
424 |
+
#### Non-Default Hyperparameters
|
425 |
+
|
426 |
+
- `eval_strategy`: steps
|
427 |
+
- `per_device_train_batch_size`: 16
|
428 |
+
- `per_device_eval_batch_size`: 16
|
429 |
+
- `num_train_epochs`: 10
|
430 |
+
- `warmup_ratio`: 0.1
|
431 |
+
- `bf16`: True
|
432 |
+
- `dataloader_drop_last`: True
|
433 |
+
- `load_best_model_at_end`: True
|
434 |
+
- `batch_sampler`: no_duplicates
|
435 |
+
|
436 |
+
#### All Hyperparameters
|
437 |
+
<details><summary>Click to expand</summary>
|
438 |
+
|
439 |
+
- `overwrite_output_dir`: False
|
440 |
+
- `do_predict`: False
|
441 |
+
- `eval_strategy`: steps
|
442 |
+
- `prediction_loss_only`: True
|
443 |
+
- `per_device_train_batch_size`: 16
|
444 |
+
- `per_device_eval_batch_size`: 16
|
445 |
+
- `per_gpu_train_batch_size`: None
|
446 |
+
- `per_gpu_eval_batch_size`: None
|
447 |
+
- `gradient_accumulation_steps`: 1
|
448 |
+
- `eval_accumulation_steps`: None
|
449 |
+
- `torch_empty_cache_steps`: None
|
450 |
+
- `learning_rate`: 5e-05
|
451 |
+
- `weight_decay`: 0.0
|
452 |
+
- `adam_beta1`: 0.9
|
453 |
+
- `adam_beta2`: 0.999
|
454 |
+
- `adam_epsilon`: 1e-08
|
455 |
+
- `max_grad_norm`: 1.0
|
456 |
+
- `num_train_epochs`: 10
|
457 |
+
- `max_steps`: -1
|
458 |
+
- `lr_scheduler_type`: linear
|
459 |
+
- `lr_scheduler_kwargs`: {}
|
460 |
+
- `warmup_ratio`: 0.1
|
461 |
+
- `warmup_steps`: 0
|
462 |
+
- `log_level`: passive
|
463 |
+
- `log_level_replica`: warning
|
464 |
+
- `log_on_each_node`: True
|
465 |
+
- `logging_nan_inf_filter`: True
|
466 |
+
- `save_safetensors`: True
|
467 |
+
- `save_on_each_node`: False
|
468 |
+
- `save_only_model`: False
|
469 |
+
- `restore_callback_states_from_checkpoint`: False
|
470 |
+
- `no_cuda`: False
|
471 |
+
- `use_cpu`: False
|
472 |
+
- `use_mps_device`: False
|
473 |
+
- `seed`: 42
|
474 |
+
- `data_seed`: None
|
475 |
+
- `jit_mode_eval`: False
|
476 |
+
- `use_ipex`: False
|
477 |
+
- `bf16`: True
|
478 |
+
- `fp16`: False
|
479 |
+
- `fp16_opt_level`: O1
|
480 |
+
- `half_precision_backend`: auto
|
481 |
+
- `bf16_full_eval`: False
|
482 |
+
- `fp16_full_eval`: False
|
483 |
+
- `tf32`: None
|
484 |
+
- `local_rank`: 0
|
485 |
+
- `ddp_backend`: None
|
486 |
+
- `tpu_num_cores`: None
|
487 |
+
- `tpu_metrics_debug`: False
|
488 |
+
- `debug`: []
|
489 |
+
- `dataloader_drop_last`: True
|
490 |
+
- `dataloader_num_workers`: 0
|
491 |
+
- `dataloader_prefetch_factor`: None
|
492 |
+
- `past_index`: -1
|
493 |
+
- `disable_tqdm`: False
|
494 |
+
- `remove_unused_columns`: True
|
495 |
+
- `label_names`: None
|
496 |
+
- `load_best_model_at_end`: True
|
497 |
+
- `ignore_data_skip`: False
|
498 |
+
- `fsdp`: []
|
499 |
+
- `fsdp_min_num_params`: 0
|
500 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
501 |
+
- `tp_size`: 0
|
502 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
503 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
504 |
+
- `deepspeed`: None
|
505 |
+
- `label_smoothing_factor`: 0.0
|
506 |
+
- `optim`: adamw_torch
|
507 |
+
- `optim_args`: None
|
508 |
+
- `adafactor`: False
|
509 |
+
- `group_by_length`: False
|
510 |
+
- `length_column_name`: length
|
511 |
+
- `ddp_find_unused_parameters`: None
|
512 |
+
- `ddp_bucket_cap_mb`: None
|
513 |
+
- `ddp_broadcast_buffers`: False
|
514 |
+
- `dataloader_pin_memory`: True
|
515 |
+
- `dataloader_persistent_workers`: False
|
516 |
+
- `skip_memory_metrics`: True
|
517 |
+
- `use_legacy_prediction_loop`: False
|
518 |
+
- `push_to_hub`: False
|
519 |
+
- `resume_from_checkpoint`: None
|
520 |
+
- `hub_model_id`: None
|
521 |
+
- `hub_strategy`: every_save
|
522 |
+
- `hub_private_repo`: None
|
523 |
+
- `hub_always_push`: False
|
524 |
+
- `gradient_checkpointing`: False
|
525 |
+
- `gradient_checkpointing_kwargs`: None
|
526 |
+
- `include_inputs_for_metrics`: False
|
527 |
+
- `include_for_metrics`: []
|
528 |
+
- `eval_do_concat_batches`: True
|
529 |
+
- `fp16_backend`: auto
|
530 |
+
- `push_to_hub_model_id`: None
|
531 |
+
- `push_to_hub_organization`: None
|
532 |
+
- `mp_parameters`:
|
533 |
+
- `auto_find_batch_size`: False
|
534 |
+
- `full_determinism`: False
|
535 |
+
- `torchdynamo`: None
|
536 |
+
- `ray_scope`: last
|
537 |
+
- `ddp_timeout`: 1800
|
538 |
+
- `torch_compile`: False
|
539 |
+
- `torch_compile_backend`: None
|
540 |
+
- `torch_compile_mode`: None
|
541 |
+
- `include_tokens_per_second`: False
|
542 |
+
- `include_num_input_tokens_seen`: False
|
543 |
+
- `neftune_noise_alpha`: None
|
544 |
+
- `optim_target_modules`: None
|
545 |
+
- `batch_eval_metrics`: False
|
546 |
+
- `eval_on_start`: False
|
547 |
+
- `use_liger_kernel`: False
|
548 |
+
- `eval_use_gather_object`: False
|
549 |
+
- `average_tokens_across_devices`: False
|
550 |
+
- `prompts`: None
|
551 |
+
- `batch_sampler`: no_duplicates
|
552 |
+
- `multi_dataset_batch_sampler`: proportional
|
553 |
+
|
554 |
+
</details>
|
555 |
+
|
556 |
+
### Training Logs
|
557 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
558 |
+
|:------:|:----:|:-------------:|:---------------:|
|
559 |
+
| 0.0581 | 10 | 0.4273 | - |
|
560 |
+
| 0.1163 | 20 | 0.3954 | - |
|
561 |
+
| 0.1744 | 30 | 0.2946 | - |
|
562 |
+
| 0.2326 | 40 | 0.2368 | - |
|
563 |
+
| 0.2907 | 50 | 0.1625 | - |
|
564 |
+
| 0.3488 | 60 | 0.1752 | - |
|
565 |
+
| 0.4070 | 70 | 0.1091 | - |
|
566 |
+
| 0.4651 | 80 | 0.1102 | - |
|
567 |
+
| 0.5233 | 90 | 0.0671 | - |
|
568 |
+
| 0.5814 | 100 | 0.0753 | 0.0678 |
|
569 |
+
| 0.6395 | 110 | 0.061 | - |
|
570 |
+
| 0.6977 | 120 | 0.0218 | - |
|
571 |
+
| 0.7558 | 130 | 0.0676 | - |
|
572 |
+
| 0.8140 | 140 | 0.0591 | - |
|
573 |
+
| 0.8721 | 150 | 0.0454 | - |
|
574 |
+
| 0.9302 | 160 | 0.0554 | - |
|
575 |
+
| 0.9884 | 170 | 0.0344 | - |
|
576 |
+
| 1.0523 | 180 | 0.0295 | - |
|
577 |
+
| 1.1105 | 190 | 0.0347 | - |
|
578 |
+
| 1.1686 | 200 | 0.032 | 0.0274 |
|
579 |
+
| 1.2267 | 210 | 0.0163 | - |
|
580 |
+
| 1.2849 | 220 | 0.0346 | - |
|
581 |
+
| 1.3430 | 230 | 0.0209 | - |
|
582 |
+
| 1.4012 | 240 | 0.0209 | - |
|
583 |
+
| 1.4593 | 250 | 0.0112 | - |
|
584 |
+
| 1.5174 | 260 | 0.0095 | - |
|
585 |
+
| 1.5756 | 270 | 0.016 | - |
|
586 |
+
| 1.6337 | 280 | 0.0123 | - |
|
587 |
+
| 1.6919 | 290 | 0.0173 | - |
|
588 |
+
| 1.75 | 300 | 0.0144 | 0.0171 |
|
589 |
+
| 1.8081 | 310 | 0.0182 | - |
|
590 |
+
| 1.8663 | 320 | 0.0223 | - |
|
591 |
+
| 1.9244 | 330 | 0.0103 | - |
|
592 |
+
| 1.9826 | 340 | 0.0071 | - |
|
593 |
+
| 2.0407 | 350 | 0.0085 | - |
|
594 |
+
| 2.0988 | 360 | 0.0045 | - |
|
595 |
+
| 2.1570 | 370 | 0.0058 | - |
|
596 |
+
| 2.2151 | 380 | 0.001 | - |
|
597 |
+
| 2.2733 | 390 | 0.0053 | - |
|
598 |
+
| 2.3314 | 400 | 0.0108 | 0.0093 |
|
599 |
+
| 2.3895 | 410 | 0.0017 | - |
|
600 |
+
| 2.4477 | 420 | 0.0024 | - |
|
601 |
+
| 2.5058 | 430 | 0.0075 | - |
|
602 |
+
| 2.5640 | 440 | 0.0022 | - |
|
603 |
+
| 2.6221 | 450 | 0.0044 | - |
|
604 |
+
| 2.6802 | 460 | 0.0001 | - |
|
605 |
+
| 2.7384 | 470 | 0.0022 | - |
|
606 |
+
| 2.7965 | 480 | 0.0016 | - |
|
607 |
+
| 2.8547 | 490 | 0.0078 | - |
|
608 |
+
| 2.9128 | 500 | 0.0 | 0.0045 |
|
609 |
+
|
610 |
+
|
611 |
+
### Framework Versions
|
612 |
+
- Python: 3.10.12
|
613 |
+
- Sentence Transformers: 4.1.0
|
614 |
+
- Transformers: 4.51.3
|
615 |
+
- PyTorch: 2.1.0+cu118
|
616 |
+
- Accelerate: 1.6.0
|
617 |
+
- Datasets: 3.5.0
|
618 |
+
- Tokenizers: 0.21.1
|
619 |
+
|
620 |
+
## Citation
|
621 |
+
|
622 |
+
### BibTeX
|
623 |
+
|
624 |
+
#### Sentence Transformers
|
625 |
+
```bibtex
|
626 |
+
@inproceedings{reimers-2019-sentence-bert,
|
627 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
628 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
629 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
630 |
+
month = "11",
|
631 |
+
year = "2019",
|
632 |
+
publisher = "Association for Computational Linguistics",
|
633 |
+
url = "https://arxiv.org/abs/1908.10084",
|
634 |
+
}
|
635 |
+
```
|
636 |
+
|
637 |
+
#### TripletLoss
|
638 |
+
```bibtex
|
639 |
+
@misc{hermans2017defense,
|
640 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
641 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
642 |
+
year={2017},
|
643 |
+
eprint={1703.07737},
|
644 |
+
archivePrefix={arXiv},
|
645 |
+
primaryClass={cs.CV}
|
646 |
+
}
|
647 |
+
```
|
648 |
+
|
649 |
+
<!--
|
650 |
+
## Glossary
|
651 |
+
|
652 |
+
*Clearly define terms in order to be accessible across audiences.*
|
653 |
+
-->
|
654 |
+
|
655 |
+
<!--
|
656 |
+
## Model Card Authors
|
657 |
+
|
658 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
659 |
+
-->
|
660 |
+
|
661 |
+
<!--
|
662 |
+
## Model Card Contact
|
663 |
+
|
664 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
665 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.51.3",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.1.0",
|
4 |
+
"transformers": "4.51.3",
|
5 |
+
"pytorch": "2.1.0+cu118"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b37a099a515758aca62bf5d65b4f6237d8918548a17c6afe4671767604d4e46
|
3 |
+
size 133462128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:420007947ceee29f376c6d93ea26b84ab47d8798e4d413b8c6c7d352691873d7
|
3 |
+
size 265862074
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e5caecae6ec382fa5cc6a7f2e27c0a0ea8d7eab1c18807a2411ac2a2c3b2d86
|
3 |
+
size 14244
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43ca0319b1a074e751f2def653a9a4d68f33309a4a9ed1a9bc9130f8ed762352
|
3 |
+
size 1064
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 128,
|
51 |
+
"model_max_length": 128,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,433 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 500,
|
3 |
+
"best_metric": 0.004451283253729343,
|
4 |
+
"best_model_checkpoint": "models/minilm_16_v1/checkpoint-500",
|
5 |
+
"epoch": 2.9127906976744184,
|
6 |
+
"eval_steps": 100,
|
7 |
+
"global_step": 500,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.05813953488372093,
|
14 |
+
"grad_norm": 2.1814687252044678,
|
15 |
+
"learning_rate": 2.616279069767442e-06,
|
16 |
+
"loss": 0.4273,
|
17 |
+
"step": 10
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.11627906976744186,
|
21 |
+
"grad_norm": 2.4306142330169678,
|
22 |
+
"learning_rate": 5.523255813953489e-06,
|
23 |
+
"loss": 0.3954,
|
24 |
+
"step": 20
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.1744186046511628,
|
28 |
+
"grad_norm": 2.1577110290527344,
|
29 |
+
"learning_rate": 8.430232558139535e-06,
|
30 |
+
"loss": 0.2946,
|
31 |
+
"step": 30
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.23255813953488372,
|
35 |
+
"grad_norm": 2.1654093265533447,
|
36 |
+
"learning_rate": 1.1337209302325581e-05,
|
37 |
+
"loss": 0.2368,
|
38 |
+
"step": 40
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.29069767441860467,
|
42 |
+
"grad_norm": 2.2517313957214355,
|
43 |
+
"learning_rate": 1.424418604651163e-05,
|
44 |
+
"loss": 0.1625,
|
45 |
+
"step": 50
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.3488372093023256,
|
49 |
+
"grad_norm": 2.3195061683654785,
|
50 |
+
"learning_rate": 1.7151162790697676e-05,
|
51 |
+
"loss": 0.1752,
|
52 |
+
"step": 60
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 0.4069767441860465,
|
56 |
+
"grad_norm": 1.3817775249481201,
|
57 |
+
"learning_rate": 2.0058139534883722e-05,
|
58 |
+
"loss": 0.1091,
|
59 |
+
"step": 70
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.46511627906976744,
|
63 |
+
"grad_norm": 1.4942753314971924,
|
64 |
+
"learning_rate": 2.296511627906977e-05,
|
65 |
+
"loss": 0.1102,
|
66 |
+
"step": 80
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 0.5232558139534884,
|
70 |
+
"grad_norm": 4.637234687805176,
|
71 |
+
"learning_rate": 2.5872093023255818e-05,
|
72 |
+
"loss": 0.0671,
|
73 |
+
"step": 90
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 0.5813953488372093,
|
77 |
+
"grad_norm": 2.108654737472534,
|
78 |
+
"learning_rate": 2.8779069767441864e-05,
|
79 |
+
"loss": 0.0753,
|
80 |
+
"step": 100
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 0.5813953488372093,
|
84 |
+
"eval_loss": 0.06778410077095032,
|
85 |
+
"eval_runtime": 1.8475,
|
86 |
+
"eval_samples_per_second": 372.386,
|
87 |
+
"eval_steps_per_second": 23.274,
|
88 |
+
"step": 100
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 0.6395348837209303,
|
92 |
+
"grad_norm": 2.9043750762939453,
|
93 |
+
"learning_rate": 3.168604651162791e-05,
|
94 |
+
"loss": 0.061,
|
95 |
+
"step": 110
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 0.6976744186046512,
|
99 |
+
"grad_norm": 2.4431095123291016,
|
100 |
+
"learning_rate": 3.4593023255813954e-05,
|
101 |
+
"loss": 0.0218,
|
102 |
+
"step": 120
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 0.7558139534883721,
|
106 |
+
"grad_norm": 1.2607183456420898,
|
107 |
+
"learning_rate": 3.7500000000000003e-05,
|
108 |
+
"loss": 0.0676,
|
109 |
+
"step": 130
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 0.813953488372093,
|
113 |
+
"grad_norm": 1.0044856071472168,
|
114 |
+
"learning_rate": 4.0406976744186046e-05,
|
115 |
+
"loss": 0.0591,
|
116 |
+
"step": 140
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 0.872093023255814,
|
120 |
+
"grad_norm": 6.34187650680542,
|
121 |
+
"learning_rate": 4.3313953488372096e-05,
|
122 |
+
"loss": 0.0454,
|
123 |
+
"step": 150
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 0.9302325581395349,
|
127 |
+
"grad_norm": 2.5847198963165283,
|
128 |
+
"learning_rate": 4.622093023255814e-05,
|
129 |
+
"loss": 0.0554,
|
130 |
+
"step": 160
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"epoch": 0.9883720930232558,
|
134 |
+
"grad_norm": 1.1073358058929443,
|
135 |
+
"learning_rate": 4.912790697674419e-05,
|
136 |
+
"loss": 0.0344,
|
137 |
+
"step": 170
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"epoch": 1.052325581395349,
|
141 |
+
"grad_norm": 1.8052096366882324,
|
142 |
+
"learning_rate": 4.977390180878553e-05,
|
143 |
+
"loss": 0.0295,
|
144 |
+
"step": 180
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 1.1104651162790697,
|
148 |
+
"grad_norm": 3.707095146179199,
|
149 |
+
"learning_rate": 4.945090439276486e-05,
|
150 |
+
"loss": 0.0347,
|
151 |
+
"step": 190
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"epoch": 1.1686046511627908,
|
155 |
+
"grad_norm": 0.6383767127990723,
|
156 |
+
"learning_rate": 4.912790697674419e-05,
|
157 |
+
"loss": 0.032,
|
158 |
+
"step": 200
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 1.1686046511627908,
|
162 |
+
"eval_loss": 0.027401413768529892,
|
163 |
+
"eval_runtime": 1.8147,
|
164 |
+
"eval_samples_per_second": 379.12,
|
165 |
+
"eval_steps_per_second": 23.695,
|
166 |
+
"step": 200
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 1.2267441860465116,
|
170 |
+
"grad_norm": 2.500821828842163,
|
171 |
+
"learning_rate": 4.8804909560723514e-05,
|
172 |
+
"loss": 0.0163,
|
173 |
+
"step": 210
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 1.2848837209302326,
|
177 |
+
"grad_norm": 1.1830706596374512,
|
178 |
+
"learning_rate": 4.848191214470285e-05,
|
179 |
+
"loss": 0.0346,
|
180 |
+
"step": 220
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 1.3430232558139534,
|
184 |
+
"grad_norm": 1.1032847166061401,
|
185 |
+
"learning_rate": 4.815891472868217e-05,
|
186 |
+
"loss": 0.0209,
|
187 |
+
"step": 230
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"epoch": 1.4011627906976745,
|
191 |
+
"grad_norm": 1.3562167882919312,
|
192 |
+
"learning_rate": 4.78359173126615e-05,
|
193 |
+
"loss": 0.0209,
|
194 |
+
"step": 240
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 1.4593023255813953,
|
198 |
+
"grad_norm": 0.0,
|
199 |
+
"learning_rate": 4.751291989664083e-05,
|
200 |
+
"loss": 0.0112,
|
201 |
+
"step": 250
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 1.5174418604651163,
|
205 |
+
"grad_norm": 0.0,
|
206 |
+
"learning_rate": 4.7189922480620156e-05,
|
207 |
+
"loss": 0.0095,
|
208 |
+
"step": 260
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 1.5755813953488373,
|
212 |
+
"grad_norm": 1.6855190992355347,
|
213 |
+
"learning_rate": 4.686692506459949e-05,
|
214 |
+
"loss": 0.016,
|
215 |
+
"step": 270
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 1.6337209302325582,
|
219 |
+
"grad_norm": 0.0,
|
220 |
+
"learning_rate": 4.6543927648578814e-05,
|
221 |
+
"loss": 0.0123,
|
222 |
+
"step": 280
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 1.691860465116279,
|
226 |
+
"grad_norm": 2.0956130027770996,
|
227 |
+
"learning_rate": 4.622093023255814e-05,
|
228 |
+
"loss": 0.0173,
|
229 |
+
"step": 290
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"epoch": 1.75,
|
233 |
+
"grad_norm": 1.038135290145874,
|
234 |
+
"learning_rate": 4.589793281653747e-05,
|
235 |
+
"loss": 0.0144,
|
236 |
+
"step": 300
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 1.75,
|
240 |
+
"eval_loss": 0.01709042489528656,
|
241 |
+
"eval_runtime": 1.821,
|
242 |
+
"eval_samples_per_second": 377.81,
|
243 |
+
"eval_steps_per_second": 23.613,
|
244 |
+
"step": 300
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"epoch": 1.808139534883721,
|
248 |
+
"grad_norm": 0.0,
|
249 |
+
"learning_rate": 4.55749354005168e-05,
|
250 |
+
"loss": 0.0182,
|
251 |
+
"step": 310
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"epoch": 1.8662790697674418,
|
255 |
+
"grad_norm": 0.8108630180358887,
|
256 |
+
"learning_rate": 4.525193798449613e-05,
|
257 |
+
"loss": 0.0223,
|
258 |
+
"step": 320
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"epoch": 1.9244186046511627,
|
262 |
+
"grad_norm": 0.9377063512802124,
|
263 |
+
"learning_rate": 4.4928940568475455e-05,
|
264 |
+
"loss": 0.0103,
|
265 |
+
"step": 330
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"epoch": 1.9825581395348837,
|
269 |
+
"grad_norm": 0.0,
|
270 |
+
"learning_rate": 4.460594315245478e-05,
|
271 |
+
"loss": 0.0071,
|
272 |
+
"step": 340
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 2.0406976744186047,
|
276 |
+
"grad_norm": 0.9668266177177429,
|
277 |
+
"learning_rate": 4.428294573643411e-05,
|
278 |
+
"loss": 0.0085,
|
279 |
+
"step": 350
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 2.0988372093023258,
|
283 |
+
"grad_norm": 0.0,
|
284 |
+
"learning_rate": 4.395994832041344e-05,
|
285 |
+
"loss": 0.0045,
|
286 |
+
"step": 360
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 2.1569767441860463,
|
290 |
+
"grad_norm": 1.6278727054595947,
|
291 |
+
"learning_rate": 4.3636950904392764e-05,
|
292 |
+
"loss": 0.0058,
|
293 |
+
"step": 370
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"epoch": 2.2151162790697674,
|
297 |
+
"grad_norm": 0.0,
|
298 |
+
"learning_rate": 4.3313953488372096e-05,
|
299 |
+
"loss": 0.001,
|
300 |
+
"step": 380
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"epoch": 2.2732558139534884,
|
304 |
+
"grad_norm": 0.0,
|
305 |
+
"learning_rate": 4.299095607235142e-05,
|
306 |
+
"loss": 0.0053,
|
307 |
+
"step": 390
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"epoch": 2.3313953488372094,
|
311 |
+
"grad_norm": 1.1249583959579468,
|
312 |
+
"learning_rate": 4.2667958656330754e-05,
|
313 |
+
"loss": 0.0108,
|
314 |
+
"step": 400
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 2.3313953488372094,
|
318 |
+
"eval_loss": 0.009344412013888359,
|
319 |
+
"eval_runtime": 1.7959,
|
320 |
+
"eval_samples_per_second": 383.1,
|
321 |
+
"eval_steps_per_second": 23.944,
|
322 |
+
"step": 400
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"epoch": 2.3895348837209305,
|
326 |
+
"grad_norm": 0.6023396253585815,
|
327 |
+
"learning_rate": 4.234496124031008e-05,
|
328 |
+
"loss": 0.0017,
|
329 |
+
"step": 410
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 2.447674418604651,
|
333 |
+
"grad_norm": 1.0035721063613892,
|
334 |
+
"learning_rate": 4.2021963824289405e-05,
|
335 |
+
"loss": 0.0024,
|
336 |
+
"step": 420
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"epoch": 2.505813953488372,
|
340 |
+
"grad_norm": 2.06754732131958,
|
341 |
+
"learning_rate": 4.169896640826874e-05,
|
342 |
+
"loss": 0.0075,
|
343 |
+
"step": 430
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"epoch": 2.563953488372093,
|
347 |
+
"grad_norm": 0.0,
|
348 |
+
"learning_rate": 4.137596899224806e-05,
|
349 |
+
"loss": 0.0022,
|
350 |
+
"step": 440
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"epoch": 2.6220930232558137,
|
354 |
+
"grad_norm": 0.6082500219345093,
|
355 |
+
"learning_rate": 4.1052971576227395e-05,
|
356 |
+
"loss": 0.0044,
|
357 |
+
"step": 450
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 2.6802325581395348,
|
361 |
+
"grad_norm": 0.0,
|
362 |
+
"learning_rate": 4.072997416020672e-05,
|
363 |
+
"loss": 0.0001,
|
364 |
+
"step": 460
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"epoch": 2.738372093023256,
|
368 |
+
"grad_norm": 0.0,
|
369 |
+
"learning_rate": 4.0406976744186046e-05,
|
370 |
+
"loss": 0.0022,
|
371 |
+
"step": 470
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 2.796511627906977,
|
375 |
+
"grad_norm": 0.0,
|
376 |
+
"learning_rate": 4.008397932816538e-05,
|
377 |
+
"loss": 0.0016,
|
378 |
+
"step": 480
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 2.854651162790698,
|
382 |
+
"grad_norm": 0.0,
|
383 |
+
"learning_rate": 3.9760981912144704e-05,
|
384 |
+
"loss": 0.0078,
|
385 |
+
"step": 490
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 2.9127906976744184,
|
389 |
+
"grad_norm": 0.0,
|
390 |
+
"learning_rate": 3.943798449612403e-05,
|
391 |
+
"loss": 0.0,
|
392 |
+
"step": 500
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"epoch": 2.9127906976744184,
|
396 |
+
"eval_loss": 0.004451283253729343,
|
397 |
+
"eval_runtime": 1.823,
|
398 |
+
"eval_samples_per_second": 377.398,
|
399 |
+
"eval_steps_per_second": 23.587,
|
400 |
+
"step": 500
|
401 |
+
}
|
402 |
+
],
|
403 |
+
"logging_steps": 10,
|
404 |
+
"max_steps": 1720,
|
405 |
+
"num_input_tokens_seen": 0,
|
406 |
+
"num_train_epochs": 10,
|
407 |
+
"save_steps": 500,
|
408 |
+
"stateful_callbacks": {
|
409 |
+
"EarlyStoppingCallback": {
|
410 |
+
"args": {
|
411 |
+
"early_stopping_patience": 10,
|
412 |
+
"early_stopping_threshold": 0.0
|
413 |
+
},
|
414 |
+
"attributes": {
|
415 |
+
"early_stopping_patience_counter": 0
|
416 |
+
}
|
417 |
+
},
|
418 |
+
"TrainerControl": {
|
419 |
+
"args": {
|
420 |
+
"should_epoch_stop": false,
|
421 |
+
"should_evaluate": false,
|
422 |
+
"should_log": false,
|
423 |
+
"should_save": true,
|
424 |
+
"should_training_stop": false
|
425 |
+
},
|
426 |
+
"attributes": {}
|
427 |
+
}
|
428 |
+
},
|
429 |
+
"total_flos": 0.0,
|
430 |
+
"train_batch_size": 16,
|
431 |
+
"trial_name": null,
|
432 |
+
"trial_params": null
|
433 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b46dbf4988c08cab1f3cc765d429ab129b0514625c264f5a5a8d2d178443d62
|
3 |
+
size 5560
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|