Context Navigation

Back to expansion/gw16168

expansion/gw16168: vision-webapp.py

File vision-webapp.py, 19.2 KB (added by Tim Harvey, 22 hours ago)
video inference webapp

Line
1	#!/usr/bin/env python3
2	"""
3	Ara NPU Basic Video Stream & Inference Hub
4	==========================================
5	"""
6
7	import argparse
8	import ctypes
9	import glob
10	import os
11	import sys
12	import threading
13	import time
14	import logging
15	import cv2
16	import numpy as np
17	from flask import Flask, Response, jsonify, request, render_template_string
18	import gi
19
20	gi.require_version('Gst', '1.0')
21	from gi.repository import Gst
22	Gst.init(None)
23
24	# Quiet down Werkzeug HTTP traffic logging to suppress 1Hz AJAX console pollution
25	log = logging.getLogger('werkzeug')
26	log.setLevel(logging.ERROR)
27
28	app = Flask(__name__)
29	lock = threading.Lock()
30
31	class AraDetection(ctypes.Structure):
32	_pack_ = 1
33	_fields_ = [
34	("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
35	("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
36	("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
37	("class_name_ptr", ctypes.c_void_p)
38	]
39
40	# --- STATE STORAGE ---
41	STATE_REPO = {
42	"frame": None,
43	"detections": [],
44	"active_source": None,
45	"active_model_name": "yolov8n",
46	"active_model_path": "/usr/share/cnn/detection/yolov8n/model.dvm",
47	"restart_flag": False,
48	"source_registry": [],
49	"model_registry": ["yolov8n"],
50
51	# Target Pipeline Resolutions
52	"CANVAS_W": 640,
53	"CANVAS_H": 360,
54	"MODEL_W": 640,
55	"MODEL_H": 640,
56
57	# Live Telemetry Metrics
58	"native_w": 0,
59	"native_h": 0,
60	"stream_w": 0,
61	"stream_h": 0,
62	"inference_fps": 0.0
63	}
64
65	# FPS Calculation variables bound directly to the Inference thread
66	inference_timestamps = []
67
68	COCO_LABELS = {
69	0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
70	6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant',
71	11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
72	16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
73	22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
74	27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard',
75	32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove',
76	36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
77	40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
78	46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
79	51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
80	56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
81	61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
82	67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
83	72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
84	77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
85	}
86
87	def build_source_injection_string(source_path):
88	if source_path.endswith(".mp4"):
89	return f"filesrc location={source_path} ! decodebin ! videoconvert ! tee name=t "
90	else:
91	return f"v4l2src device={source_path} ! videoconvert ! tee name=t "
92
93	def gstreamer_orchestration_loop():
94	global inference_timestamps
95	CANVAS_W = STATE_REPO["CANVAS_W"]
96	CANVAS_H = STATE_REPO["CANVAS_H"]
97	MODEL_W = STATE_REPO["MODEL_W"]
98	MODEL_H = STATE_REPO["MODEL_H"]
99
100	while True:
101	while STATE_REPO["active_source"] is None:
102	time.sleep(0.2)
103	if STATE_REPO["restart_flag"]:
104	break
105
106	current_target_source = STATE_REPO["active_source"]
107	current_target_model = STATE_REPO["active_model_path"]
108	STATE_REPO["restart_flag"] = False
109
110	if current_target_source is None:
111	continue
112
113	source_segment = build_source_injection_string(current_target_source)
114
115	pipe_str = (
116	f"{source_segment} "
117	f"t. ! queue max-size-buffers=2 leaky=downstream ! appsink name=nativesink sync=false async=false emit-signals=true "
118	f"t. ! queue max-size-buffers=2 leaky=downstream ! videoscale ! video/x-raw,width={CANVAS_W},height={CANVAS_H} ! videoconvert ! video/x-raw,format=BGR ! appsink name=framesink sync=false async=false emit-signals=true "
119	f"t. ! queue max-size-buffers=2 leaky=downstream ! "
120	f"videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! videoconvert ! video/x-raw,format=BGRA ! "
121	f"dvPre model={current_target_model} ! "
122	f"dvInf model={current_target_model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
123	f"dvPost model={current_target_model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
124	f"appsink name=postsink sync=false async=false emit-signals=true"
125	)
126
127	print(f"[LAUNCH PIPELINE]\n {pipe_str}\n")
128	pipeline = Gst.parse_launch(pipe_str)
129
130	native_sink = pipeline.get_by_name("nativesink")
131	frame_sink = pipeline.get_by_name("framesink")
132	post_sink = pipeline.get_by_name("postsink")
133
134	def on_native_caps(sink):
135	sample = sink.emit("pull-sample")
136	if sample:
137	caps = sample.get_caps()
138	struct = caps.get_structure(0)
139	STATE_REPO["native_w"] = struct.get_value("width")
140	STATE_REPO["native_h"] = struct.get_value("height")
141	return Gst.FlowReturn.OK
142
143	def on_new_detection(sink):
144	global inference_timestamps
145	sample = sink.emit("pull-sample")
146	if sample:
147	# Calculate FPS derived purely from the inference hardware return loop
148	now = time.time()
149	inference_timestamps.append(now)
150	if len(inference_timestamps) > 30:
151	inference_timestamps.pop(0)
152	if len(inference_timestamps) > 1:
153	STATE_REPO["inference_fps"] = len(inference_timestamps) / (inference_timestamps[-1] - inference_timestamps[0])
154
155	buffer = sample.get_buffer()
156	raw_bytes = buffer.extract_dup(0, buffer.get_size())
157	if raw_bytes and len(raw_bytes) >= 4:
158	num_detections = np.frombuffer(raw_bytes[:4], dtype=np.uint32)[0]
159	local_dets = []
160	offset = 4
161	ds = ctypes.sizeof(AraDetection)
162	for _ in range(num_detections):
163	if offset + ds > len(raw_bytes): break
164	det = AraDetection.from_buffer_copy(raw_bytes[offset:offset+ds])
165	offset += ds
166	local_dets.append((det.class_id, det.confidence, det.xmin, det.ymin, det.xmax, det.ymax))
167	STATE_REPO["detections"] = local_dets
168	return Gst.FlowReturn.OK
169
170	def on_new_frame(sink):
171	sample = sink.emit("pull-sample")
172	if sample:
173	buffer = sample.get_buffer()
174	caps = sample.get_caps()
175	struct = caps.get_structure(0)
176	w = struct.get_value("width")
177	h = struct.get_value("height")
178
179	STATE_REPO["stream_w"] = w
180	STATE_REPO["stream_h"] = h
181
182	raw_bytes = buffer.extract_dup(0, buffer.get_size())
183	if raw_bytes:
184	try:
185	frame_flat = np.frombuffer(raw_bytes, dtype=np.uint8)
186	frame_arr = frame_flat.reshape((h, w, 3))
187	STATE_REPO["frame"] = frame_arr.copy()
188	except ValueError:
189	pass
190	return Gst.FlowReturn.OK
191
192	native_sink.connect("new-sample", on_native_caps)
193	post_sink.connect("new-sample", on_new_detection)
194	frame_sink.connect("new-sample", on_new_frame)
195	pipeline.set_state(Gst.State.PLAYING)
196
197	bus = pipeline.get_bus()
198	while True:
199	msg = bus.timed_pop_filtered(Gst.SECOND * 0.05, Gst.MessageType.ERROR \| Gst.MessageType.EOS)
200	if msg:
201	if msg.type == Gst.MessageType.EOS and current_target_source.endswith(".mp4"):
202	pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH \| Gst.SeekFlags.KEY_UNIT, 0)
203	continue
204	break
205
206	if STATE_REPO["restart_flag"]:
207	break
208
209	pipeline.set_state(Gst.State.NULL)
210	STATE_REPO["frame"] = None
211	STATE_REPO["detections"] = []
212	STATE_REPO["native_w"] = 0
213	STATE_REPO["native_h"] = 0
214	STATE_REPO["stream_w"] = 0
215	STATE_REPO["stream_h"] = 0
216	STATE_REPO["inference_fps"] = 0.0
217	inference_timestamps = []
218	time.sleep(1.0)
219
220	@app.route('/')
221	def index():
222	src_active = STATE_REPO["active_source"]
223
224	if not STATE_REPO["source_registry"]:
225	src_html = '<option value="" disabled selected>-- NO VALID INPUT SOURCES AVAILABLE --</option>'
226	else:
227	src_html = '<option value="" disabled selected>-- SELECT TARGET SOURCE CHANNEL --</option>' if src_active is None else ""
228	src_html += "".join(f'<option value="{s}" {"selected" if s == src_active else ""}>{s}</option>' for s in STATE_REPO["source_registry"])
229
230	mdl_active = STATE_REPO["active_model_name"]
231	mdl_html = "".join(f'<option value="{m}" {"selected" if m == mdl_active else ""}>{m}</option>' for m in STATE_REPO["model_registry"])
232
233	html_template = """<!DOCTYPE html>
234	<html>
235	<head>
236	<title>Ara Stream Client</title>
237	<style>
238	body { font-family: sans-serif; background: #0c0c0e; color: #e1e1e6; margin: 0; padding: 20px; display: flex; flex-direction: column; align-items: center; }
239	.dashboard-layout { display: flex; flex-direction: column; gap: 15px; width: 660px; }
240	.panel { background: #121216; padding: 12px 15px; border-radius: 6px; border: 1px solid #1f1f24; display: flex; flex-direction: column; gap: 10px; }
241	.control-row { display: flex; align-items: center; justify-content: space-between; }
242	label { font-size: 12px; font-weight: bold; color: #8f8f9d; text-transform: uppercase; }
243	select { background: #0c0c0e; color: #fff; border: 1px solid #04d361; padding: 6px 10px; border-radius: 4px; width: 420px; outline: none; }
244	.stats-banner { display: flex; justify-content: space-between; background: #17171f; padding: 10px 15px; border: 1px solid #1f1f24; border-radius: 4px; font-family: monospace; font-size: 13px; color: #8f8f9d; }
245	.stats-banner span strong { color: #04d361; }
246	.media-container { background: #121216; padding: 8px; border-radius: 6px; border: 1px solid #1f1f24; position: relative; min-height: 480px; display: flex; align-items: center; justify-content: center; }
247	img { display: block; border-radius: 4px; width: 100%; height: auto; }
248	.overlay { position: absolute; top: 0; left: 0; width: 100%; height: 100%; background: rgba(12,12,14,0.9); display: flex; flex-direction: column; align-items: center; justify-content: center; border-radius: 6px; text-align: center; }
249	.prompt-text { color: #04d361; font-weight: bold; font-size: 16px; margin-bottom: 10px; }
250	</style>
251	<script>
252	let streamStarted = {% if active_src %}true{% else %}false{% endif %};
253
254	async function switchConfig() {
255	const src = document.getElementById('source-picker').value;
256	const mdl = document.getElementById('model-picker').value;
257	if(!src) return;
258
259	await fetch('/api/swap_config', {
260	method: 'POST',
261	headers: { 'Content-Type': 'application/json' },
262	body: JSON.stringify({ "source": src, "model": mdl })
263	});
264
265	streamStarted = true;
266	document.getElementById('gatekeeper-overlay').style.display = 'none';
267	setTimeout(() => {
268	document.getElementById('stream-player').src = '/stream.mjpg';
269	}, 1000);
270	}
271
272	async function updateStreamMetrics() {
273	if (!streamStarted) return;
274	try {
275	const response = await fetch('/api/stream_info');
276	const data = await response.json();
277
278	document.getElementById('metric-res').innerText = 'Source:' + data.native_w + 'x' + data.native_h + ' Canvas:' + data.width + 'x' + data.height;
279	document.getElementById('metric-fps').innerText = data.fps.toFixed(1);
280	document.getElementById('metric-dets').innerText = data.detections;
281	} catch (err) {}
282	}
283	setInterval(updateStreamMetrics, 1000);
284	</script>
285	</head>
286	<body>
287	<h2>Ara Vision Engine</h2>
288	<div class="dashboard-layout">
289	<div class="panel">
290	<div class="control-row">
291	<label for="source-picker">Media Stream Target:</label>
292	<select id="source-picker" onchange="switchConfig()">""" + src_html + """</select>
293	</div>
294	<div class="control-row">
295	<label for="model-picker">NPU Pipeline Model:</label>
296	<select id="model-picker" onchange="switchConfig()">""" + mdl_html + """</select>
297	</div>
298	</div>
299
300	<div class="stats-banner">
301	<span id="metric-res">Source:0x0 Canvas:0x0</span>
302	<span>NPU Inference: <span id="metric-fps">0.0</span> FPS</span>
303	<span>Active Detections: <span id="metric-dets">0</span></span>
304	</div>
305
306	<div class="media-container">
307	{% if not active_src %}
308	<div class="overlay" id="gatekeeper-overlay">
309	<div class="prompt-text">Awaiting Source Context</div>
310	<div style="color: #8f8f9d; font-size: 13px; max-width: 400px;">Please select a media path and model from the drop-downs above to mount your pipeline.</div>
311	</div>
312	{% endif %}
313	<img id="stream-player" {% if active_src %}src="/stream.mjpg"{% endif %} style="max-width: """ + str(STATE_REPO["CANVAS_W"]) + """px;" />
314	</div>
315	</div>
316	</body>
317	</html>"""
318	return render_template_string(html_template, active_src=src_active)
319
320	@app.route('/api/stream_info')
321	def stream_info():
322	with lock:
323	return jsonify({
324	"native_w": STATE_REPO["native_w"],
325	"native_h": STATE_REPO["native_h"],
326	"width": STATE_REPO["stream_w"],
327	"height": STATE_REPO["stream_h"],
328	"fps": STATE_REPO["inference_fps"],
329	"detections": len(STATE_REPO["detections"])
330	})
331
332	@app.route('/api/swap_config', methods=['POST'])
333	def swap_config():
334	payload = request.get_json()
335	src_selected = payload.get("source")
336	mdl_selected = payload.get("model")
337
338	with lock:
339	trigger_restart = False
340	if src_selected in STATE_REPO["source_registry"] and STATE_REPO["active_source"] != src_selected:
341	STATE_REPO["active_source"] = src_selected
342	trigger_restart = True
343	if mdl_selected in STATE_REPO["model_registry"] and STATE_REPO["active_model_name"] != mdl_selected:
344	base_dir = app.config["MODEL_DIR"]
345	STATE_REPO["active_model_name"] = mdl_selected
346	STATE_REPO["active_model_path"] = os.path.join(base_dir, mdl_selected, "model.dvm")
347	trigger_restart = True
348	if trigger_restart:
349	STATE_REPO["restart_flag"] = True
350	return jsonify({"status": "success"})
351
352	def generate_mjpeg_stream_generator():
353	MODEL_W = float(STATE_REPO["MODEL_W"])
354	MODEL_H = float(STATE_REPO["MODEL_H"])
355
356	while True:
357	time.sleep(0.04)
358	frame_copy = STATE_REPO["frame"]
359	local_dets = list(STATE_REPO["detections"])
360	if frame_copy is not None:
361	frame = frame_copy.copy()
362	h_native, w_native, _ = frame_copy.shape
363	for class_id, confidence, rx1, ry1, rx2, ry2 in local_dets:
364	cx1 = int(rx1 * (float(w_native) / MODEL_W))
365	cx2 = int(rx2 * (float(w_native) / MODEL_W))
366	cy1 = int(ry1 * (float(h_native) / MODEL_H))
367	cy2 = int(ry2 * (float(h_native) / MODEL_H))
368	label = f"{COCO_LABELS.get(class_id, f'Class {class_id}')} ({confidence*100:.1f}%)"
369	cv2.rectangle(frame, (cx1, cy1), (cx2, cy2), (0, 255, 97), 2)
370	cv2.putText(frame, label, (cx1, max(15, cy1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 2)
371	_, encoded_img = cv2.imencode(".jpg", frame)
372	yield (b'--frame\r\n'
373	b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
374	else:
375	waiting_canvas = np.zeros((480, 640, 3), dtype=np.uint8)
376	cv2.putText(waiting_canvas, "AWAITING MEDIA INPUT SELECTION...", (140, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 1)
377	_, encoded_img = cv2.imencode(".jpg", waiting_canvas)
378	yield (b'--frame\r\n'
379	b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
380
381	@app.route('/stream.mjpg')
382	def video_feed_stream_route():
383	return Response(generate_mjpeg_stream_generator(), mimetype='multipart/x-mixed-replace; boundary=frame')
384
385	def main():
386	parser = argparse.ArgumentParser(description="Wiki Template: Ara Flask Video Engine")
387	parser.add_argument("--camera", default=None, help="Camera context device node path")
388	parser.add_argument("--mp4", default=None, help="Directory containing target mp4 sample videos")
389	parser.add_argument("--port", type=int, default=8080, help="Target port mapping")
390	parser.add_argument("--model-dir", default="/usr/share/cnn/detection", help="Directory containing target models")
391	parser.add_argument("--model", default="yolov8n", help="Initial model selection")
392	args = parser.parse_args()
393
394	app.config["MODEL_DIR"] = args.model_dir
395	STATE_REPO["source_registry"] = []
396
397	if args.camera and os.path.exists(args.camera):
398	STATE_REPO["source_registry"].append(args.camera)
399
400	if args.mp4 and os.path.exists(args.mp4):
401	local_videos = glob.glob(os.path.join(args.mp4, "*.mp4"))
402	for vid in sorted(local_videos):
403	STATE_REPO["source_registry"].append(vid)
404
405	if os.path.exists(args.model_dir):
406	discovered_models = []
407	for entry in sorted(os.listdir(args.model_dir)):
408	full_subdir = os.path.join(args.model_dir, entry)
409	if os.path.isdir(full_subdir) and os.path.exists(os.path.join(full_subdir, "model.dvm")):
410	discovered_models.append(entry)
411	if discovered_models:
412	STATE_REPO["model_registry"] = discovered_models
413	STATE_REPO["active_model_name"] = args.model if args.model in discovered_models else discovered_models[0]
414	STATE_REPO["active_model_path"] = os.path.join(args.model_dir, STATE_REPO["active_model_name"], "model.dvm")
415
416	threading.Thread(target=gstreamer_orchestration_loop, daemon=True).start()
417
418	print(f"Server serving on: http://localhost:{args.port}/")
419	app.run(host='0.0.0.0', port=args.port, threaded=True, use_reloader=False, debug=False)
420
421	if __name__ == '__main__':
422	main()

Download in other formats:

Original Format