expansion/gw16168: image_detect.py

File image_detect.py, 6.6 KB (added by Tim Harvey, 22 hours ago)

image detection command line app

Line 
1#!/usr/bin/env python3
2"""
3Ara NPU Multi-Format Universal Image Decoder
4============================================
5"""
6
7import ctypes
8import os
9import sys
10import subprocess
11import gi
12
13gi.require_version('Gst', '1.0')
14from gi.repository import Gst
15
16Gst.init(None)
17
18# Standard COCO Class Mapping for printing human-readable labels
19COCO_CLASSES = {
20 0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus",
21 6: "train", 7: "truck", 8: "boat", 9: "traffic light", 10: "fire hydrant",
22 11: "stop sign", 12: "parking meter", 13: "bench", 14: "bird", 15: "cat",
23 16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear",
24 22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag",
25 27: "tie", 28: "suitcase", 29: "frisbee", 30: "skis", 31: "snowboard",
26 32: "sports ball", 33: "kite", 34: "baseball bat", 35: "baseball glove",
27 36: "skateboard", 37: "surfboard", 38: "tennis racket", 39: "bottle",
28 40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl",
29 46: "banana", 47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli",
30 51: "carrot", 52: "hot dog", 53: "pizza", 54: "donut", 55: "cake",
31 56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table",
32 61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard",
33 67: "cell phone", 68: "microwave", 69: "oven", 70: "toaster", 71: "sink",
34 72: "refrigerator", 73: "book", 74: "clock", 75: "vase", 76: "scissors",
35 77: "teddy bear", 78: "hair drier", 79: "toothbrush"
36}
37
38class AraDetection(ctypes.Structure):
39 _layout_ = "ms"
40 _pack_ = 1
41 _fields_ = [
42 ("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
43 ("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
44 ("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
45 ("class_name_ptr", ctypes.c_void_p)
46 ]
47
48def main():
49 if len(sys.argv) < 3:
50 print(f"Usage: {sys.argv[0]} <input_image> <output_image> [model]")
51 sys.exit(1)
52
53 input_image = sys.argv[1]
54 output_image = sys.argv[2]
55 model = "/usr/share/cnn/detection/yolov8n/model.dvm"
56 if len(sys.argv) > 3:
57 model = sys.argv[3]
58
59 if not os.path.exists(input_image):
60 print(f"ERROR: File '{input_image}' could not be located.")
61 sys.exit(1)
62
63 # Fetch native dimensions using ImageMagick
64 try:
65 dimensions = subprocess.check_output(f"identify -format '%w %h' {input_image}", shell=True).decode().split()
66 w_native, h_native = int(dimensions[0]), int(dimensions[1])
67 except Exception as e:
68 print(f"ERROR: Failed to read image properties using ImageMagick: {e}")
69 sys.exit(1)
70
71 # Print target properties cleanly
72 print(f"\nmodel: {model}")
73 print(f"image: {os.path.basename(input_image)} {w_native}x{h_native}")
74
75 MODEL_W, MODEL_H = 640, 640
76
77 pipe_str = (
78 f"multifilesrc location={input_image} loop=false num-buffers=2 ! decodebin name=d ! "
79 f"videoconvert ! videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! "
80 f"videoconvert ! video/x-raw,format=BGRA ! "
81 f"dvPre model={model} ! "
82 f"dvInf model={model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
83 f"dvPost model={model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
84 f"appsink name=mysink sync=false async=false emit-signals=true"
85 )
86
87 # Before creating the launcher, adjust the system plugin registry ranking
88 # so GStreamer ignores v4l2jpegdec element (as it doesn't support BGRA output)
89 registry = Gst.Registry.get()
90 feature = registry.lookup_feature("v4l2jpegdec")
91 if feature:
92 # Lower its rank to ZERO so decodebin skips over it permanently
93 feature.set_rank(0)
94
95 pipeline = Gst.parse_launch(pipe_str)
96 sink = pipeline.get_by_name("mysink")
97 pipeline.set_state(Gst.State.PLAYING)
98
99 last_valid_raw_bytes = None
100
101 while True:
102 sample = sink.emit("pull-sample")
103 if not sample:
104 break
105 buffer = sample.get_buffer()
106 last_valid_raw_bytes = buffer.extract_dup(0, buffer.get_size())
107
108 pipeline.set_state(Gst.State.NULL)
109
110 processed_detections = []
111
112 if last_valid_raw_bytes and len(last_valid_raw_bytes) >= 4:
113 num_detections = int.from_bytes(last_valid_raw_bytes[:4], byteorder='little')
114
115 if 0 < num_detections < 1000:
116 print(f"DETECTIONS LOGGED: FOUND {num_detections} ACTIVE OBJECTS")
117 print("-" * 70)
118
119 offset = 4
120 ds = ctypes.sizeof(AraDetection)
121
122 for i in range(num_detections):
123 if offset + ds > len(last_valid_raw_bytes): break
124 det = AraDetection.from_buffer_copy(last_valid_raw_bytes[offset:offset+ds])
125 offset += ds
126
127 # Compute native image coordinate translation mapping
128 x1_mapped = det.xmin * (w_native / MODEL_W)
129 x2_mapped = det.xmax * (w_native / MODEL_W)
130 y1_mapped = det.ymin * (h_native / MODEL_H)
131 y2_mapped = det.ymax * (h_native / MODEL_H)
132
133 coco_name = COCO_CLASSES.get(det.class_id, "unknown")
134
135 print(f"Object {i+1}: ID={det.class_id} | Name={coco_name} | Confidence={det.confidence * 100:.1f}%")
136 print(f" Bounding Box -> [{int(x1_mapped)}, {int(y1_mapped)}] to [{int(x2_mapped)}, {int(y2_mapped)}]")
137 print("-" * 70)
138
139 processed_detections.append((coco_name, det.confidence, x1_mapped, y1_mapped, x2_mapped, y2_mapped))
140
141 # Render final multi-object annotated canvas
142 if processed_detections:
143 cmd_args = [f"convert {input_image}"]
144 for coco_name, conf, x1, y1, x2, y2 in processed_detections:
145 ix1, iy1, ix2, iy2 = int(x1), int(y1), int(x2), int(y2)
146 label = f"{coco_name} {conf*100:.1f}%"
147 cmd_args.append(f'-stroke green -strokewidth 2 -fill none -draw "rectangle {ix1},{iy1} {ix2},{iy2}"')
148 cmd_args.append(f'-stroke none -fill white -pointsize 16 -annotate +{ix1}+{iy1 - 6} "{label}"')
149
150 cmd_args.append(output_image)
151 draw_cmd = " ".join(cmd_args)
152
153 try:
154 subprocess.run(draw_cmd, shell=True, check=True)
155 print(f"SUCCESS: Mapped all boxes and text labels onto -> '{output_image}'\n")
156 except subprocess.CalledProcessError:
157 print("ERROR: ImageMagick rendering execution failed.\n")
158 else:
159 print("INFO: No operational object targets were captured by the NPU context.\n")
160
161if __name__ == '__main__':
162 main()