Context Navigation

Back to expansion/gw16168

expansion/gw16168: image_detect.py

File image_detect.py, 6.6 KB (added by Tim Harvey, 22 hours ago)
image detection command line app

Line
1	#!/usr/bin/env python3
2	"""
3	Ara NPU Multi-Format Universal Image Decoder
4	============================================
5	"""
6
7	import ctypes
8	import os
9	import sys
10	import subprocess
11	import gi
12
13	gi.require_version('Gst', '1.0')
14	from gi.repository import Gst
15
16	Gst.init(None)
17
18	# Standard COCO Class Mapping for printing human-readable labels
19	COCO_CLASSES = {
20	0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus",
21	6: "train", 7: "truck", 8: "boat", 9: "traffic light", 10: "fire hydrant",
22	11: "stop sign", 12: "parking meter", 13: "bench", 14: "bird", 15: "cat",
23	16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear",
24	22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag",
25	27: "tie", 28: "suitcase", 29: "frisbee", 30: "skis", 31: "snowboard",
26	32: "sports ball", 33: "kite", 34: "baseball bat", 35: "baseball glove",
27	36: "skateboard", 37: "surfboard", 38: "tennis racket", 39: "bottle",
28	40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl",
29	46: "banana", 47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli",
30	51: "carrot", 52: "hot dog", 53: "pizza", 54: "donut", 55: "cake",
31	56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table",
32	61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard",
33	67: "cell phone", 68: "microwave", 69: "oven", 70: "toaster", 71: "sink",
34	72: "refrigerator", 73: "book", 74: "clock", 75: "vase", 76: "scissors",
35	77: "teddy bear", 78: "hair drier", 79: "toothbrush"
36	}
37
38	class AraDetection(ctypes.Structure):
39	_layout_ = "ms"
40	_pack_ = 1
41	_fields_ = [
42	("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
43	("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
44	("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
45	("class_name_ptr", ctypes.c_void_p)
46	]
47
48	def main():
49	if len(sys.argv) < 3:
50	print(f"Usage: {sys.argv[0]} <input_image> <output_image> [model]")
51	sys.exit(1)
52
53	input_image = sys.argv[1]
54	output_image = sys.argv[2]
55	model = "/usr/share/cnn/detection/yolov8n/model.dvm"
56	if len(sys.argv) > 3:
57	model = sys.argv[3]
58
59	if not os.path.exists(input_image):
60	print(f"ERROR: File '{input_image}' could not be located.")
61	sys.exit(1)
62
63	# Fetch native dimensions using ImageMagick
64	try:
65	dimensions = subprocess.check_output(f"identify -format '%w %h' {input_image}", shell=True).decode().split()
66	w_native, h_native = int(dimensions[0]), int(dimensions[1])
67	except Exception as e:
68	print(f"ERROR: Failed to read image properties using ImageMagick: {e}")
69	sys.exit(1)
70
71	# Print target properties cleanly
72	print(f"\nmodel: {model}")
73	print(f"image: {os.path.basename(input_image)} {w_native}x{h_native}")
74
75	MODEL_W, MODEL_H = 640, 640
76
77	pipe_str = (
78	f"multifilesrc location={input_image} loop=false num-buffers=2 ! decodebin name=d ! "
79	f"videoconvert ! videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! "
80	f"videoconvert ! video/x-raw,format=BGRA ! "
81	f"dvPre model={model} ! "
82	f"dvInf model={model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
83	f"dvPost model={model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
84	f"appsink name=mysink sync=false async=false emit-signals=true"
85	)
86
87	# Before creating the launcher, adjust the system plugin registry ranking
88	# so GStreamer ignores v4l2jpegdec element (as it doesn't support BGRA output)
89	registry = Gst.Registry.get()
90	feature = registry.lookup_feature("v4l2jpegdec")
91	if feature:
92	# Lower its rank to ZERO so decodebin skips over it permanently
93	feature.set_rank(0)
94
95	pipeline = Gst.parse_launch(pipe_str)
96	sink = pipeline.get_by_name("mysink")
97	pipeline.set_state(Gst.State.PLAYING)
98
99	last_valid_raw_bytes = None
100
101	while True:
102	sample = sink.emit("pull-sample")
103	if not sample:
104	break
105	buffer = sample.get_buffer()
106	last_valid_raw_bytes = buffer.extract_dup(0, buffer.get_size())
107
108	pipeline.set_state(Gst.State.NULL)
109
110	processed_detections = []
111
112	if last_valid_raw_bytes and len(last_valid_raw_bytes) >= 4:
113	num_detections = int.from_bytes(last_valid_raw_bytes[:4], byteorder='little')
114
115	if 0 < num_detections < 1000:
116	print(f"DETECTIONS LOGGED: FOUND {num_detections} ACTIVE OBJECTS")
117	print("-" * 70)
118
119	offset = 4
120	ds = ctypes.sizeof(AraDetection)
121
122	for i in range(num_detections):
123	if offset + ds > len(last_valid_raw_bytes): break
124	det = AraDetection.from_buffer_copy(last_valid_raw_bytes[offset:offset+ds])
125	offset += ds
126
127	# Compute native image coordinate translation mapping
128	x1_mapped = det.xmin * (w_native / MODEL_W)
129	x2_mapped = det.xmax * (w_native / MODEL_W)
130	y1_mapped = det.ymin * (h_native / MODEL_H)
131	y2_mapped = det.ymax * (h_native / MODEL_H)
132
133	coco_name = COCO_CLASSES.get(det.class_id, "unknown")
134
135	print(f"Object {i+1}: ID={det.class_id} \| Name={coco_name} \| Confidence={det.confidence * 100:.1f}%")
136	print(f" Bounding Box -> [{int(x1_mapped)}, {int(y1_mapped)}] to [{int(x2_mapped)}, {int(y2_mapped)}]")
137	print("-" * 70)
138
139	processed_detections.append((coco_name, det.confidence, x1_mapped, y1_mapped, x2_mapped, y2_mapped))
140
141	# Render final multi-object annotated canvas
142	if processed_detections:
143	cmd_args = [f"convert {input_image}"]
144	for coco_name, conf, x1, y1, x2, y2 in processed_detections:
145	ix1, iy1, ix2, iy2 = int(x1), int(y1), int(x2), int(y2)
146	label = f"{coco_name} {conf*100:.1f}%"
147	cmd_args.append(f'-stroke green -strokewidth 2 -fill none -draw "rectangle {ix1},{iy1} {ix2},{iy2}"')
148	cmd_args.append(f'-stroke none -fill white -pointsize 16 -annotate +{ix1}+{iy1 - 6} "{label}"')
149
150	cmd_args.append(output_image)
151	draw_cmd = " ".join(cmd_args)
152
153	try:
154	subprocess.run(draw_cmd, shell=True, check=True)
155	print(f"SUCCESS: Mapped all boxes and text labels onto -> '{output_image}'\n")
156	except subprocess.CalledProcessError:
157	print("ERROR: ImageMagick rendering execution failed.\n")
158	else:
159	print("INFO: No operational object targets were captured by the NPU context.\n")
160
161	if __name__ == '__main__':
162	main()

Download in other formats:

Original Format