bibibi12345 commited on
Commit
0185608
·
1 Parent(s): cfdf66d

added thinking support. added nothinking and maxthinking mode

Browse files
src/config.py CHANGED
@@ -131,17 +131,110 @@ def _generate_search_variants():
131
  search_models.append(search_variant)
132
  return search_models
133
 
134
- # Supported Models (includes both base models and search variants)
135
- SUPPORTED_MODELS = BASE_MODELS + _generate_search_variants()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- # Helper function to get base model name from search variant
138
  def get_base_model_name(model_name):
139
- """Convert search variant model name to base model name."""
140
- if model_name.endswith("-search"):
141
- return model_name[:-7] # Remove "-search" suffix
 
 
 
142
  return model_name
143
 
144
  # Helper function to check if model uses search grounding
145
  def is_search_model(model_name):
146
  """Check if model name indicates search grounding should be enabled."""
147
- return model_name.endswith("-search")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  search_models.append(search_variant)
132
  return search_models
133
 
134
+ # Generate thinking variants for applicable models
135
+ def _generate_thinking_variants():
136
+ """Generate nothinking and maxthinking variants for models that support thinking."""
137
+ thinking_models = []
138
+ for model in BASE_MODELS:
139
+ # Only add thinking variants for models that support content generation
140
+ # and contain "gemini-2.5-flash" or "gemini-2.5-pro" in their name
141
+ if ("generateContent" in model["supportedGenerationMethods"] and
142
+ ("gemini-2.5-flash" in model["name"] or "gemini-2.5-pro" in model["name"])):
143
+
144
+ # Add -nothinking variant
145
+ nothinking_variant = model.copy()
146
+ nothinking_variant["name"] = model["name"] + "-nothinking"
147
+ nothinking_variant["displayName"] = model["displayName"] + " (No Thinking)"
148
+ nothinking_variant["description"] = model["description"] + " (thinking disabled)"
149
+ thinking_models.append(nothinking_variant)
150
+
151
+ # Add -maxthinking variant
152
+ maxthinking_variant = model.copy()
153
+ maxthinking_variant["name"] = model["name"] + "-maxthinking"
154
+ maxthinking_variant["displayName"] = model["displayName"] + " (Max Thinking)"
155
+ maxthinking_variant["description"] = model["description"] + " (maximum thinking budget)"
156
+ thinking_models.append(maxthinking_variant)
157
+ return thinking_models
158
+
159
+ # Generate combined variants (search + thinking combinations)
160
+ def _generate_combined_variants():
161
+ """Generate combined search and thinking variants."""
162
+ combined_models = []
163
+ for model in BASE_MODELS:
164
+ # Only add combined variants for models that support content generation
165
+ # and contain "gemini-2.5-flash" or "gemini-2.5-pro" in their name
166
+ if ("generateContent" in model["supportedGenerationMethods"] and
167
+ ("gemini-2.5-flash" in model["name"] or "gemini-2.5-pro" in model["name"])):
168
+
169
+ # search + nothinking
170
+ search_nothinking = model.copy()
171
+ search_nothinking["name"] = model["name"] + "-search-nothinking"
172
+ search_nothinking["displayName"] = model["displayName"] + " with Google Search (No Thinking)"
173
+ search_nothinking["description"] = model["description"] + " (includes Google Search grounding, thinking disabled)"
174
+ combined_models.append(search_nothinking)
175
+
176
+ # search + maxthinking
177
+ search_maxthinking = model.copy()
178
+ search_maxthinking["name"] = model["name"] + "-search-maxthinking"
179
+ search_maxthinking["displayName"] = model["displayName"] + " with Google Search (Max Thinking)"
180
+ search_maxthinking["description"] = model["description"] + " (includes Google Search grounding, maximum thinking budget)"
181
+ combined_models.append(search_maxthinking)
182
+ return combined_models
183
+
184
+ # Supported Models (includes base models, search variants, and thinking variants)
185
+ SUPPORTED_MODELS = BASE_MODELS + _generate_search_variants() + _generate_thinking_variants()
186
 
187
+ # Helper function to get base model name from any variant
188
  def get_base_model_name(model_name):
189
+ """Convert variant model name to base model name."""
190
+ # Remove all possible suffixes in order
191
+ suffixes = ["-maxthinking", "-nothinking", "-search"]
192
+ for suffix in suffixes:
193
+ if model_name.endswith(suffix):
194
+ return model_name[:-len(suffix)]
195
  return model_name
196
 
197
  # Helper function to check if model uses search grounding
198
  def is_search_model(model_name):
199
  """Check if model name indicates search grounding should be enabled."""
200
+ return "-search" in model_name
201
+
202
+ # Helper function to check if model uses no thinking
203
+ def is_nothinking_model(model_name):
204
+ """Check if model name indicates thinking should be disabled."""
205
+ return "-nothinking" in model_name
206
+
207
+ # Helper function to check if model uses max thinking
208
+ def is_maxthinking_model(model_name):
209
+ """Check if model name indicates maximum thinking budget should be used."""
210
+ return "-maxthinking" in model_name
211
+
212
+ # Helper function to get thinking budget for a model
213
+ def get_thinking_budget(model_name):
214
+ """Get the appropriate thinking budget for a model based on its name and variant."""
215
+ base_model = get_base_model_name(model_name)
216
+
217
+ if is_nothinking_model(model_name):
218
+ if "gemini-2.5-flash" in base_model:
219
+ return 0 # No thinking for flash
220
+ elif "gemini-2.5-pro" in base_model:
221
+ return 128 # Limited thinking for pro
222
+ elif is_maxthinking_model(model_name):
223
+ if "gemini-2.5-flash" in base_model:
224
+ return 24576
225
+ elif "gemini-2.5-pro" in base_model:
226
+ return 32768
227
+ else:
228
+ # Default thinking budget for regular models
229
+ return -1 # Default for all models
230
+
231
+ # Helper function to check if thinking should be included in output
232
+ def should_include_thoughts(model_name):
233
+ """Check if thoughts should be included in the response."""
234
+ if is_nothinking_model(model_name):
235
+ # For nothinking mode, still include thoughts if it's a pro model
236
+ base_model = get_base_model_name(model_name)
237
+ return "gemini-2.5-pro" in base_model
238
+ else:
239
+ # For all other modes, include thoughts
240
+ return True
src/google_api_client.py CHANGED
@@ -11,7 +11,14 @@ from google.auth.transport.requests import Request as GoogleAuthRequest
11
 
12
  from .auth import get_credentials, save_credentials, get_user_project_id, onboard_user
13
  from .utils import get_user_agent
14
- from .config import CODE_ASSIST_ENDPOINT, DEFAULT_SAFETY_SETTINGS, get_base_model_name, is_search_model
 
 
 
 
 
 
 
15
  import asyncio
16
 
17
 
@@ -307,8 +314,12 @@ def build_gemini_payload_from_native(native_request: dict, model_from_path: str)
307
  if "thinkingConfig" not in native_request["generationConfig"]:
308
  native_request["generationConfig"]["thinkingConfig"] = {}
309
 
310
- native_request["generationConfig"]["thinkingConfig"]["includeThoughts"] = True
311
- native_request["generationConfig"]["thinkingConfig"]["thinkingBudget"] = -1
 
 
 
 
312
 
313
  # Add Google Search grounding for search models
314
  if is_search_model(model_from_path):
 
11
 
12
  from .auth import get_credentials, save_credentials, get_user_project_id, onboard_user
13
  from .utils import get_user_agent
14
+ from .config import (
15
+ CODE_ASSIST_ENDPOINT,
16
+ DEFAULT_SAFETY_SETTINGS,
17
+ get_base_model_name,
18
+ is_search_model,
19
+ get_thinking_budget,
20
+ should_include_thoughts
21
+ )
22
  import asyncio
23
 
24
 
 
314
  if "thinkingConfig" not in native_request["generationConfig"]:
315
  native_request["generationConfig"]["thinkingConfig"] = {}
316
 
317
+ # Configure thinking based on model variant
318
+ thinking_budget = get_thinking_budget(model_from_path)
319
+ include_thoughts = should_include_thoughts(model_from_path)
320
+
321
+ native_request["generationConfig"]["thinkingConfig"]["includeThoughts"] = include_thoughts
322
+ native_request["generationConfig"]["thinkingConfig"]["thinkingBudget"] = thinking_budget
323
 
324
  # Add Google Search grounding for search models
325
  if is_search_model(model_from_path):
src/models.py CHANGED
@@ -5,6 +5,7 @@ from typing import List, Optional, Union, Dict, Any
5
  class OpenAIChatMessage(BaseModel):
6
  role: str
7
  content: Union[str, List[Dict[str, Any]]]
 
8
 
9
  class OpenAIChatCompletionRequest(BaseModel):
10
  model: str
@@ -37,6 +38,7 @@ class OpenAIChatCompletionResponse(BaseModel):
37
 
38
  class OpenAIDelta(BaseModel):
39
  content: Optional[str] = None
 
40
 
41
  class OpenAIChatCompletionStreamChoice(BaseModel):
42
  index: int
 
5
  class OpenAIChatMessage(BaseModel):
6
  role: str
7
  content: Union[str, List[Dict[str, Any]]]
8
+ reasoning_content: Optional[str] = None
9
 
10
  class OpenAIChatCompletionRequest(BaseModel):
11
  model: str
 
38
 
39
  class OpenAIDelta(BaseModel):
40
  content: Optional[str] = None
41
+ reasoning_content: Optional[str] = None
42
 
43
  class OpenAIChatCompletionStreamChoice(BaseModel):
44
  index: int
src/openai_transformers.py CHANGED
@@ -8,7 +8,13 @@ import uuid
8
  from typing import Dict, Any
9
 
10
  from .models import OpenAIChatCompletionRequest, OpenAIChatCompletionResponse
11
- from .config import DEFAULT_SAFETY_SETTINGS, is_search_model, get_base_model_name
 
 
 
 
 
 
12
 
13
 
14
  def openai_request_to_gemini(openai_request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
@@ -103,6 +109,14 @@ def openai_request_to_gemini(openai_request: OpenAIChatCompletionRequest) -> Dic
103
  if is_search_model(openai_request.model):
104
  request_payload["tools"] = [{"googleSearch": {}}]
105
 
 
 
 
 
 
 
 
 
106
  return request_payload
107
 
108
 
@@ -126,18 +140,34 @@ def gemini_response_to_openai(gemini_response: Dict[str, Any], model: str) -> Di
126
  if role == "model":
127
  role = "assistant"
128
 
129
- # Extract text content from parts
130
  parts = candidate.get("content", {}).get("parts", [])
131
  content = ""
132
- if parts and len(parts) > 0:
133
- content = parts[0].get("text", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  choices.append({
136
  "index": candidate.get("index", 0),
137
- "message": {
138
- "role": role,
139
- "content": content,
140
- },
141
  "finish_reason": _map_finish_reason(candidate.get("finishReason")),
142
  })
143
 
@@ -171,17 +201,31 @@ def gemini_stream_chunk_to_openai(gemini_chunk: Dict[str, Any], model: str, resp
171
  if role == "model":
172
  role = "assistant"
173
 
174
- # Extract text content from parts
175
  parts = candidate.get("content", {}).get("parts", [])
176
  content = ""
177
- if parts and len(parts) > 0:
178
- content = parts[0].get("text", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  choices.append({
181
  "index": candidate.get("index", 0),
182
- "delta": {
183
- "content": content,
184
- },
185
  "finish_reason": _map_finish_reason(candidate.get("finishReason")),
186
  })
187
 
 
8
  from typing import Dict, Any
9
 
10
  from .models import OpenAIChatCompletionRequest, OpenAIChatCompletionResponse
11
+ from .config import (
12
+ DEFAULT_SAFETY_SETTINGS,
13
+ is_search_model,
14
+ get_base_model_name,
15
+ get_thinking_budget,
16
+ should_include_thoughts
17
+ )
18
 
19
 
20
  def openai_request_to_gemini(openai_request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
 
109
  if is_search_model(openai_request.model):
110
  request_payload["tools"] = [{"googleSearch": {}}]
111
 
112
+ # Add thinking configuration for thinking models
113
+ thinking_budget = get_thinking_budget(openai_request.model)
114
+ if thinking_budget is not None:
115
+ request_payload["generationConfig"]["thinkingConfig"] = {
116
+ "thinkingBudget": thinking_budget,
117
+ "includeThoughts": should_include_thoughts(openai_request.model)
118
+ }
119
+
120
  return request_payload
121
 
122
 
 
140
  if role == "model":
141
  role = "assistant"
142
 
143
+ # Extract and separate thinking tokens from regular content
144
  parts = candidate.get("content", {}).get("parts", [])
145
  content = ""
146
+ reasoning_content = ""
147
+
148
+ for part in parts:
149
+ if not part.get("text"):
150
+ continue
151
+
152
+ # Check if this part contains thinking tokens
153
+ if part.get("thought", False):
154
+ reasoning_content += part.get("text", "")
155
+ else:
156
+ content += part.get("text", "")
157
+
158
+ # Build message object
159
+ message = {
160
+ "role": role,
161
+ "content": content,
162
+ }
163
+
164
+ # Add reasoning_content if there are thinking tokens
165
+ if reasoning_content:
166
+ message["reasoning_content"] = reasoning_content
167
 
168
  choices.append({
169
  "index": candidate.get("index", 0),
170
+ "message": message,
 
 
 
171
  "finish_reason": _map_finish_reason(candidate.get("finishReason")),
172
  })
173
 
 
201
  if role == "model":
202
  role = "assistant"
203
 
204
+ # Extract and separate thinking tokens from regular content
205
  parts = candidate.get("content", {}).get("parts", [])
206
  content = ""
207
+ reasoning_content = ""
208
+
209
+ for part in parts:
210
+ if not part.get("text"):
211
+ continue
212
+
213
+ # Check if this part contains thinking tokens
214
+ if part.get("thought", False):
215
+ reasoning_content += part.get("text", "")
216
+ else:
217
+ content += part.get("text", "")
218
+
219
+ # Build delta object
220
+ delta = {}
221
+ if content:
222
+ delta["content"] = content
223
+ if reasoning_content:
224
+ delta["reasoning_content"] = reasoning_content
225
 
226
  choices.append({
227
  "index": candidate.get("index", 0),
228
+ "delta": delta,
 
 
229
  "finish_reason": _map_finish_reason(candidate.get("finishReason")),
230
  })
231