jan-hq commited on
Commit
c0ff3e9
·
verified ·
1 Parent(s): 57ff4e4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -4
README.md CHANGED
@@ -20,9 +20,13 @@ Our key contributions are as follows:
20
 
21
 
22
  ## Model Details
23
- * Developed by: Alan Dao, Dinh Bach Vu, Tuan Le Duc Anh, Bui Quang Huy (Menlo Research)
24
- * Model type: Qwen 2.5 3B Instruct, fine-tuned for hand pose estimation
 
 
25
  * License: Apache-2.0 license
 
 
26
 
27
  ## How to Get Started
28
 
@@ -34,7 +38,7 @@ from qwen_vl_utils import process_vision_info
34
 
35
  # 1. Load model and processor
36
  device = "cuda" if torch.cuda.is_available() else "cpu"
37
- model_path = "path/to/qwen2.5_vl/checkpoint-1500/"
38
 
39
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
40
  model_path,
@@ -53,8 +57,13 @@ processor = AutoProcessor.from_pretrained(
53
  image = Image.open("your_hand_image.png").convert("RGB")
54
 
55
  # 3. Create messages
 
 
 
 
 
56
  messages = [
57
- {"role": "system", "content": "You are a specialized Vision Language Model designed to accurately estimate joint angles from hand pose images..."},
58
  {
59
  "role": "user",
60
  "content": [
 
20
 
21
 
22
  ## Model Details
23
+ * Model architecture: Qwen 2.5 3B Instruct, fine-tuned for hand pose estimation
24
+ * Dataset:
25
+ * Training: [homebrewltd/robot-hand-poses-train](https://huggingface.co/datasets/homebrewltd/robot-hand-poses-train)
26
+ * Eval: [homebrewltd/robotic-hand-poses-eval](https://huggingface.co/datasets/homebrewltd/robotic-hand-poses-eval)
27
  * License: Apache-2.0 license
28
+ * Developed by: Alan Dao, Dinh Bach Vu, Tuan Le Duc Anh, Bui Quang Huy (Menlo Research)
29
+
30
 
31
  ## How to Get Started
32
 
 
38
 
39
  # 1. Load model and processor
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
+ model_path = "homebrewltd/Poseless-3B"
42
 
43
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
44
  model_path,
 
57
  image = Image.open("your_hand_image.png").convert("RGB")
58
 
59
  # 3. Create messages
60
+ SYSTEM_PROMPT = """You are a specialized Vision Language Model designed to accurately estimate joint angles from hand pose images. Your task is to analyze images of a human or robotic hand and output precise angle measurements for each joint. Output joint angles in radians.
61
+ Output Format:
62
+ <lh_WRJ2>angle</lh_WRJ2><lh_WRJ1>angle</lh_WRJ1><lh_FFJ4>angle</lh_FFJ4><lh_FFJ3>angle</lh_FFJ3><lh_FFJ2>angle</lh_FFJ2><lh_FFJ1>angle</lh_FFJ1><lh_MFJ4>angle</lh_MFJ4><lh_MFJ3>angle</lh_MFJ3><lh_MFJ2>angle</lh_MFJ2><lh_MFJ1>angle</lh_MFJ1><lh_RFJ4>angle</lh_RFJ4><lh_RFJ3>angle</lh_RFJ3><lh_RFJ2>angle</lh_RFJ2><lh_RFJ1>angle</lh_RFJ1><lh_LFJ5>angle</lh_LFJ5><lh_LFJ4>angle</lh_LFJ4><lh_LFJ3>angle</lh_LFJ3><lh_LFJ2>angle</lh_LFJ2><lh_LFJ1>angle</lh_LFJ1><lh_THJ5>angle</lh_THJ5><lh_THJ4>angle</lh_THJ4><lh_THJ3>angle</lh_THJ3><lh_THJ2>angle</lh_THJ2><lh_THJ1>angle</lh_THJ1>
63
+ """
64
+
65
  messages = [
66
+ {"role": "system", "content": f"{SYSTEM_PROMPT}"},
67
  {
68
  "role": "user",
69
  "content": [