Jinglong Xiong commited on
Commit
9fdf27a
·
1 Parent(s): 15369ca

fix duplicate id in data

Browse files
data/descriptions.csv CHANGED
@@ -14,7 +14,6 @@ id,description,category
14
  12,a starlit night over snow-covered peaks,landscape
15
  13,khaki triangles and azure crescents,abstract
16
  14,a maroon dodecahedron interwoven with teal threads,abstract
17
- 14,a peaceful meadow under a bright blue sky,landscapes
18
  15,a bright coral beach at midday,landscapes
19
  16,a misty morning over a tranquil fjord,landscapes
20
  17,an arctic tundra blanketed in snow,landscapes
@@ -164,3 +163,4 @@ id,description,category
164
  161,quilted puffer vest in bright red with zip pockets,fashion
165
  162,polka dot silk scarf in navy with white spots,fashion
166
  163,sheer kimono in floral chiffon with wide sleeves,fashion
 
 
14
  12,a starlit night over snow-covered peaks,landscape
15
  13,khaki triangles and azure crescents,abstract
16
  14,a maroon dodecahedron interwoven with teal threads,abstract
 
17
  15,a bright coral beach at midday,landscapes
18
  16,a misty morning over a tranquil fjord,landscapes
19
  17,an arctic tundra blanketed in snow,landscapes
 
163
  161,quilted puffer vest in bright red with zip pockets,fashion
164
  162,polka dot silk scarf in navy with white spots,fashion
165
  163,sheer kimono in floral chiffon with wide sleeves,fashion
166
+ 164,a peaceful meadow under a bright blue sky,landscapes
data/eval.csv CHANGED
@@ -14,7 +14,6 @@ id,question,choices,answer
14
  12,"[""What kind of peaks are depicted in the image?"", ""What is the color of the sky in the image?"", ""Are there any stars visible in the image?"", ""What season does the image represent?""]","[[""rocky"", ""snow-covered"", ""forest-covered"", ""flat""], [""blue"", ""black"", ""gray"", ""orange""], [""yes"", ""no""], [""spring"", ""summer"", ""autumn"", ""winter""]]","[""snow-covered"", ""black"", ""yes"", ""winter""]"
15
  13,"[""What color are the triangles?"", ""What shape do the azure elements resemble?"", ""How many different colors are used in the image?"", ""Which color is associated with the triangles?"", ""Are the crescents depicted in a solid color?""]","[[""green"", ""khaki"", ""blue"", ""red""], [""squares"", ""crescents"", ""rectangles"", ""triangles""], [""one"", ""two"", ""three"", ""four""], [""khaki"", ""azure"", ""yellow"", ""purple""], [""yes"", ""no"", ""partially"", ""unknown""]]","[""khaki"", ""crescents"", ""two"", ""khaki"", ""no""]"
16
  14,"[""What color is the dodecahedron?"", ""What color are the threads interwoven with the dodecahedron?"", ""How many faces does the dodecahedron have?"", ""What is the primary geometric shape depicted in the image?""]","[[""red"", ""maroon"", ""blue"", ""green""], [""teal"", ""yellow"", ""pink"", ""black""], [""6"", ""12"", ""20"", ""8""], [""cube"", ""tetrahedron"", ""dodecahedron"", ""octahedron""]]","[""maroon"", ""teal"", ""12"", ""dodecahedron""]"
17
- 14,"[""What color is the sky in the image?"", ""What type of vegetation is predominant in the meadow?"", ""Are there any clouds in the sky?"", ""Is there any water present in the meadow?"", ""What is the overall mood conveyed by the image?""]","[[""blue"", ""gray"", ""white"", ""green""], [""grass"", ""desert"", ""trees"", ""flowers""], [""yes"", ""no""], [""yes"", ""no""], [""peaceful"", ""chaotic"", ""gloomy"", ""dark""]]","[""blue"", ""grass"", ""no"", ""no"", ""peaceful""]"
18
  15,"[""What color is the beach in the image?"", ""What time of day is depicted in the image?"", ""Are there any palm trees on the beach?"", ""What is the condition of the sky in the image?""]","[[""bright coral"", ""dark brown"", ""sand yellow"", ""deep blue""], [""morning"", ""midday"", ""evening"", ""night""], [""yes"", ""no""], [""clear"", ""cloudy"", ""stormy"", ""sunset""]]","[""bright coral"", ""midday"", ""yes"", ""clear""]"
19
  16,"[""What is the predominant weather condition depicted in the image?"", ""What color is the water of the fjord?"", ""Are there any mountains or cliffs visible in the background?"", ""Is there any visible wildlife in the image?"", ""What type of sky is depicted in the image?""]","[[""clear"", ""misty"", ""rainy"", ""snowy""], [""blue"", ""gray"", ""green"", ""brown""], [""yes"", ""no""], [""yes"", ""no""], [""sunny"", ""cloudy"", ""misty"", ""stormy""]]","[""misty"", ""gray"", ""yes"", ""no"", ""misty""]"
20
  17,"[""What is the predominant color of the landscape?"", ""Are there any trees visible in the image?"", ""What type of animal might be present in the tundra?"", ""Is the sky clear or cloudy?""]","[[""blue"", ""green"", ""white"", ""brown""], [""yes"", ""no""], [""penguin"", ""caribou"", ""lion"", ""elephant""], [""clear"", ""cloudy"", ""sunset"", ""stormy""]]","[""white"", ""no"", ""caribou"", ""cloudy""]"
@@ -164,3 +163,4 @@ id,question,choices,answer
164
  161,"[""What color is the vest?"", ""What type of pockets does the vest have?"", ""Is the vest designed with a pattern?"", ""What is the style of the vest?""]","[[""blue"", ""green"", ""bright red"", ""black""], [""zip pockets"", ""button pockets"", ""no pockets""], [""striped"", ""plaid"", ""quilted"", ""polka dot""], [""puffer"", ""sleeveless"", ""long-sleeved"", ""hooded""]]","[""bright red"", ""zip pockets"", ""quilted"", ""puffer""]"
165
  162,"[""What is the primary color of the scarf?"", ""What is the color of the spots on the scarf?"", ""What pattern is featured on the scarf?"", ""Is the scarf made of a patterned fabric?"", ""Does the scarf have a solid color background?""]","[[""red"", ""navy"", ""green"", ""black""], [""blue"", ""white"", ""red"", ""yellow""], [""stripes"", ""polka dots"", ""plaid"", ""floral""], [""yes"", ""no""], [""yes"", ""no""]]","[""navy"", ""white"", ""polka dots"", ""yes"", ""no""]"
166
  163,"[""What is the primary fabric of the kimono?"", ""How would you describe the sleeves of the kimono?"", ""What type of pattern is featured on the kimono?"", ""Is the kimono designed to be fitted or loose?"", ""What color scheme is likely present in the floral chiffon?""]","[[""silk"", ""cotton"", ""chiffon"", ""wool""], [""narrow"", ""wide"", ""long"", ""short""], [""striped"", ""floral"", ""solid"", ""geometric""], [""fitted"", ""loose"", ""tailored"", ""structured""], [""monochrome"", ""pastel"", ""bold"", ""neon""]]","[""chiffon"", ""wide"", ""floral"", ""loose"", ""pastel""]"
 
 
14
  12,"[""What kind of peaks are depicted in the image?"", ""What is the color of the sky in the image?"", ""Are there any stars visible in the image?"", ""What season does the image represent?""]","[[""rocky"", ""snow-covered"", ""forest-covered"", ""flat""], [""blue"", ""black"", ""gray"", ""orange""], [""yes"", ""no""], [""spring"", ""summer"", ""autumn"", ""winter""]]","[""snow-covered"", ""black"", ""yes"", ""winter""]"
15
  13,"[""What color are the triangles?"", ""What shape do the azure elements resemble?"", ""How many different colors are used in the image?"", ""Which color is associated with the triangles?"", ""Are the crescents depicted in a solid color?""]","[[""green"", ""khaki"", ""blue"", ""red""], [""squares"", ""crescents"", ""rectangles"", ""triangles""], [""one"", ""two"", ""three"", ""four""], [""khaki"", ""azure"", ""yellow"", ""purple""], [""yes"", ""no"", ""partially"", ""unknown""]]","[""khaki"", ""crescents"", ""two"", ""khaki"", ""no""]"
16
  14,"[""What color is the dodecahedron?"", ""What color are the threads interwoven with the dodecahedron?"", ""How many faces does the dodecahedron have?"", ""What is the primary geometric shape depicted in the image?""]","[[""red"", ""maroon"", ""blue"", ""green""], [""teal"", ""yellow"", ""pink"", ""black""], [""6"", ""12"", ""20"", ""8""], [""cube"", ""tetrahedron"", ""dodecahedron"", ""octahedron""]]","[""maroon"", ""teal"", ""12"", ""dodecahedron""]"
 
17
  15,"[""What color is the beach in the image?"", ""What time of day is depicted in the image?"", ""Are there any palm trees on the beach?"", ""What is the condition of the sky in the image?""]","[[""bright coral"", ""dark brown"", ""sand yellow"", ""deep blue""], [""morning"", ""midday"", ""evening"", ""night""], [""yes"", ""no""], [""clear"", ""cloudy"", ""stormy"", ""sunset""]]","[""bright coral"", ""midday"", ""yes"", ""clear""]"
18
  16,"[""What is the predominant weather condition depicted in the image?"", ""What color is the water of the fjord?"", ""Are there any mountains or cliffs visible in the background?"", ""Is there any visible wildlife in the image?"", ""What type of sky is depicted in the image?""]","[[""clear"", ""misty"", ""rainy"", ""snowy""], [""blue"", ""gray"", ""green"", ""brown""], [""yes"", ""no""], [""yes"", ""no""], [""sunny"", ""cloudy"", ""misty"", ""stormy""]]","[""misty"", ""gray"", ""yes"", ""no"", ""misty""]"
19
  17,"[""What is the predominant color of the landscape?"", ""Are there any trees visible in the image?"", ""What type of animal might be present in the tundra?"", ""Is the sky clear or cloudy?""]","[[""blue"", ""green"", ""white"", ""brown""], [""yes"", ""no""], [""penguin"", ""caribou"", ""lion"", ""elephant""], [""clear"", ""cloudy"", ""sunset"", ""stormy""]]","[""white"", ""no"", ""caribou"", ""cloudy""]"
 
163
  161,"[""What color is the vest?"", ""What type of pockets does the vest have?"", ""Is the vest designed with a pattern?"", ""What is the style of the vest?""]","[[""blue"", ""green"", ""bright red"", ""black""], [""zip pockets"", ""button pockets"", ""no pockets""], [""striped"", ""plaid"", ""quilted"", ""polka dot""], [""puffer"", ""sleeveless"", ""long-sleeved"", ""hooded""]]","[""bright red"", ""zip pockets"", ""quilted"", ""puffer""]"
164
  162,"[""What is the primary color of the scarf?"", ""What is the color of the spots on the scarf?"", ""What pattern is featured on the scarf?"", ""Is the scarf made of a patterned fabric?"", ""Does the scarf have a solid color background?""]","[[""red"", ""navy"", ""green"", ""black""], [""blue"", ""white"", ""red"", ""yellow""], [""stripes"", ""polka dots"", ""plaid"", ""floral""], [""yes"", ""no""], [""yes"", ""no""]]","[""navy"", ""white"", ""polka dots"", ""yes"", ""no""]"
165
  163,"[""What is the primary fabric of the kimono?"", ""How would you describe the sleeves of the kimono?"", ""What type of pattern is featured on the kimono?"", ""Is the kimono designed to be fitted or loose?"", ""What color scheme is likely present in the floral chiffon?""]","[[""silk"", ""cotton"", ""chiffon"", ""wool""], [""narrow"", ""wide"", ""long"", ""short""], [""striped"", ""floral"", ""solid"", ""geometric""], [""fitted"", ""loose"", ""tailored"", ""structured""], [""monochrome"", ""pastel"", ""bold"", ""neon""]]","[""chiffon"", ""wide"", ""floral"", ""loose"", ""pastel""]"
166
+ 164,"[""What color is the sky in the image?"", ""What type of vegetation is predominant in the meadow?"", ""Are there any clouds in the sky?"", ""Is there any water present in the meadow?"", ""What is the overall mood conveyed by the image?""]","[[""blue"", ""gray"", ""white"", ""green""], [""grass"", ""desert"", ""trees"", ""flowers""], [""yes"", ""no""], [""yes"", ""no""], [""peaceful"", ""chaotic"", ""gloomy"", ""dark""]]","[""blue"", ""grass"", ""no"", ""no"", ""peaceful""]"
data/gen_descriptions.py CHANGED
@@ -222,7 +222,7 @@ def main():
222
  existing_descriptions = set()
223
  if append and os.path.exists(csv_path):
224
  global starting_id
225
- starting_id = pd.read_csv(csv_path)["id"].max()
226
  existing_descriptions = read_existing_descriptions(csv_path)
227
  print(f"Found {len(existing_descriptions)} existing descriptions")
228
 
 
222
  existing_descriptions = set()
223
  if append and os.path.exists(csv_path):
224
  global starting_id
225
+ starting_id = pd.read_csv(csv_path)["id"].max() + 1
226
  existing_descriptions = read_existing_descriptions(csv_path)
227
  print(f"Found {len(existing_descriptions)} existing descriptions")
228
 
starter.ipynb CHANGED
@@ -233,6 +233,80 @@
233
  "print(f\"Aesthetic Score: {aesthetic_score}\")\n",
234
  "print(f\"Final Fidelity Score: {instance_score}\")"
235
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  }
237
  ],
238
  "metadata": {
 
233
  "print(f\"Aesthetic Score: {aesthetic_score}\")\n",
234
  "print(f\"Final Fidelity Score: {instance_score}\")"
235
  ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 13,
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "name": "stdout",
244
+ "output_type": "stream",
245
+ "text": [
246
+ "No duplicate IDs found in data/descriptions.csv\n",
247
+ "Sorted rows by ID\n",
248
+ "Fixed and sorted CSV saved to data/descriptions.csv\n",
249
+ "No duplicate IDs found in data/eval.csv\n",
250
+ "Sorted data/eval.csv by ID\n"
251
+ ]
252
+ }
253
+ ],
254
+ "source": [
255
+ "# Fix duplicate IDs in descriptions.csv and order rows by id\n",
256
+ "def fix_duplicate_ids(csv_path):\n",
257
+ " \"\"\"\n",
258
+ " Fix duplicate IDs in a CSV file by assigning new unique IDs to duplicates.\n",
259
+ " Then order rows by ID.\n",
260
+ " \"\"\"\n",
261
+ " # Read the CSV file\n",
262
+ " df = pd.read_csv(csv_path)\n",
263
+ " \n",
264
+ " # Check for duplicate IDs\n",
265
+ " duplicate_mask = df['id'].duplicated(keep='first')\n",
266
+ " duplicate_count = duplicate_mask.sum()\n",
267
+ " \n",
268
+ " if duplicate_count > 0:\n",
269
+ " print(f\"Found {duplicate_count} duplicate IDs in {csv_path}\")\n",
270
+ " \n",
271
+ " # Get the maximum ID value\n",
272
+ " max_id = df['id'].max()\n",
273
+ " \n",
274
+ " # Assign new IDs to duplicates\n",
275
+ " new_ids = list(range(max_id + 1, max_id + 1 + duplicate_count))\n",
276
+ " df.loc[duplicate_mask, 'id'] = new_ids\n",
277
+ " \n",
278
+ " print(f\"Assigned new IDs to duplicates\")\n",
279
+ " else:\n",
280
+ " print(f\"No duplicate IDs found in {csv_path}\")\n",
281
+ " \n",
282
+ " # Sort the dataframe by ID\n",
283
+ " df = df.sort_values(by='id')\n",
284
+ " print(f\"Sorted rows by ID\")\n",
285
+ " \n",
286
+ " # Save the fixed and sorted CSV\n",
287
+ " df.to_csv(csv_path, index=False)\n",
288
+ " print(f\"Fixed and sorted CSV saved to {csv_path}\")\n",
289
+ " \n",
290
+ " # Return the fixed dataframe\n",
291
+ " return df\n",
292
+ "\n",
293
+ "# Fix descriptions.csv\n",
294
+ "fixed_descriptions_df = fix_duplicate_ids('data/descriptions.csv')\n",
295
+ "\n",
296
+ "# Fix eval.csv if needed\n",
297
+ "# First check if eval.csv has the same issue\n",
298
+ "eval_df = pd.read_csv('data/eval.csv')\n",
299
+ "duplicate_eval_ids = eval_df['id'].duplicated(keep='first').sum()\n",
300
+ "\n",
301
+ "if duplicate_eval_ids > 0:\n",
302
+ " fixed_eval_df = fix_duplicate_ids('data/eval.csv')\n",
303
+ "else:\n",
304
+ " print(\"No duplicate IDs found in data/eval.csv\")\n",
305
+ " # Still sort by ID even if no duplicates\n",
306
+ " eval_df = eval_df.sort_values(by='id')\n",
307
+ " eval_df.to_csv('data/eval.csv', index=False)\n",
308
+ " print(\"Sorted data/eval.csv by ID\")\n"
309
+ ]
310
  }
311
  ],
312
  "metadata": {