expansion/gw16168: vision-webapp.py

File vision-webapp.py, 19.2 KB (added by Tim Harvey, 22 hours ago)

video inference webapp

Line 
1#!/usr/bin/env python3
2"""
3Ara NPU Basic Video Stream & Inference Hub
4==========================================
5"""
6
7import argparse
8import ctypes
9import glob
10import os
11import sys
12import threading
13import time
14import logging
15import cv2
16import numpy as np
17from flask import Flask, Response, jsonify, request, render_template_string
18import gi
19
20gi.require_version('Gst', '1.0')
21from gi.repository import Gst
22Gst.init(None)
23
24# Quiet down Werkzeug HTTP traffic logging to suppress 1Hz AJAX console pollution
25log = logging.getLogger('werkzeug')
26log.setLevel(logging.ERROR)
27
28app = Flask(__name__)
29lock = threading.Lock()
30
31class AraDetection(ctypes.Structure):
32 _pack_ = 1
33 _fields_ = [
34 ("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
35 ("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
36 ("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
37 ("class_name_ptr", ctypes.c_void_p)
38 ]
39
40# --- STATE STORAGE ---
41STATE_REPO = {
42 "frame": None,
43 "detections": [],
44 "active_source": None,
45 "active_model_name": "yolov8n",
46 "active_model_path": "/usr/share/cnn/detection/yolov8n/model.dvm",
47 "restart_flag": False,
48 "source_registry": [],
49 "model_registry": ["yolov8n"],
50
51 # Target Pipeline Resolutions
52 "CANVAS_W": 640,
53 "CANVAS_H": 360,
54 "MODEL_W": 640,
55 "MODEL_H": 640,
56
57 # Live Telemetry Metrics
58 "native_w": 0,
59 "native_h": 0,
60 "stream_w": 0,
61 "stream_h": 0,
62 "inference_fps": 0.0
63}
64
65# FPS Calculation variables bound directly to the Inference thread
66inference_timestamps = []
67
68COCO_LABELS = {
69 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
70 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant',
71 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
72 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
73 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
74 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard',
75 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove',
76 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
77 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
78 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
79 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
80 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
81 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
82 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
83 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
84 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
85}
86
87def build_source_injection_string(source_path):
88 if source_path.endswith(".mp4"):
89 return f"filesrc location={source_path} ! decodebin ! videoconvert ! tee name=t "
90 else:
91 return f"v4l2src device={source_path} ! videoconvert ! tee name=t "
92
93def gstreamer_orchestration_loop():
94 global inference_timestamps
95 CANVAS_W = STATE_REPO["CANVAS_W"]
96 CANVAS_H = STATE_REPO["CANVAS_H"]
97 MODEL_W = STATE_REPO["MODEL_W"]
98 MODEL_H = STATE_REPO["MODEL_H"]
99
100 while True:
101 while STATE_REPO["active_source"] is None:
102 time.sleep(0.2)
103 if STATE_REPO["restart_flag"]:
104 break
105
106 current_target_source = STATE_REPO["active_source"]
107 current_target_model = STATE_REPO["active_model_path"]
108 STATE_REPO["restart_flag"] = False
109
110 if current_target_source is None:
111 continue
112
113 source_segment = build_source_injection_string(current_target_source)
114
115 pipe_str = (
116 f"{source_segment} "
117 f"t. ! queue max-size-buffers=2 leaky=downstream ! appsink name=nativesink sync=false async=false emit-signals=true "
118 f"t. ! queue max-size-buffers=2 leaky=downstream ! videoscale ! video/x-raw,width={CANVAS_W},height={CANVAS_H} ! videoconvert ! video/x-raw,format=BGR ! appsink name=framesink sync=false async=false emit-signals=true "
119 f"t. ! queue max-size-buffers=2 leaky=downstream ! "
120 f"videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! videoconvert ! video/x-raw,format=BGRA ! "
121 f"dvPre model={current_target_model} ! "
122 f"dvInf model={current_target_model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
123 f"dvPost model={current_target_model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
124 f"appsink name=postsink sync=false async=false emit-signals=true"
125 )
126
127 print(f"[LAUNCH PIPELINE]\n {pipe_str}\n")
128 pipeline = Gst.parse_launch(pipe_str)
129
130 native_sink = pipeline.get_by_name("nativesink")
131 frame_sink = pipeline.get_by_name("framesink")
132 post_sink = pipeline.get_by_name("postsink")
133
134 def on_native_caps(sink):
135 sample = sink.emit("pull-sample")
136 if sample:
137 caps = sample.get_caps()
138 struct = caps.get_structure(0)
139 STATE_REPO["native_w"] = struct.get_value("width")
140 STATE_REPO["native_h"] = struct.get_value("height")
141 return Gst.FlowReturn.OK
142
143 def on_new_detection(sink):
144 global inference_timestamps
145 sample = sink.emit("pull-sample")
146 if sample:
147 # Calculate FPS derived purely from the inference hardware return loop
148 now = time.time()
149 inference_timestamps.append(now)
150 if len(inference_timestamps) > 30:
151 inference_timestamps.pop(0)
152 if len(inference_timestamps) > 1:
153 STATE_REPO["inference_fps"] = len(inference_timestamps) / (inference_timestamps[-1] - inference_timestamps[0])
154
155 buffer = sample.get_buffer()
156 raw_bytes = buffer.extract_dup(0, buffer.get_size())
157 if raw_bytes and len(raw_bytes) >= 4:
158 num_detections = np.frombuffer(raw_bytes[:4], dtype=np.uint32)[0]
159 local_dets = []
160 offset = 4
161 ds = ctypes.sizeof(AraDetection)
162 for _ in range(num_detections):
163 if offset + ds > len(raw_bytes): break
164 det = AraDetection.from_buffer_copy(raw_bytes[offset:offset+ds])
165 offset += ds
166 local_dets.append((det.class_id, det.confidence, det.xmin, det.ymin, det.xmax, det.ymax))
167 STATE_REPO["detections"] = local_dets
168 return Gst.FlowReturn.OK
169
170 def on_new_frame(sink):
171 sample = sink.emit("pull-sample")
172 if sample:
173 buffer = sample.get_buffer()
174 caps = sample.get_caps()
175 struct = caps.get_structure(0)
176 w = struct.get_value("width")
177 h = struct.get_value("height")
178
179 STATE_REPO["stream_w"] = w
180 STATE_REPO["stream_h"] = h
181
182 raw_bytes = buffer.extract_dup(0, buffer.get_size())
183 if raw_bytes:
184 try:
185 frame_flat = np.frombuffer(raw_bytes, dtype=np.uint8)
186 frame_arr = frame_flat.reshape((h, w, 3))
187 STATE_REPO["frame"] = frame_arr.copy()
188 except ValueError:
189 pass
190 return Gst.FlowReturn.OK
191
192 native_sink.connect("new-sample", on_native_caps)
193 post_sink.connect("new-sample", on_new_detection)
194 frame_sink.connect("new-sample", on_new_frame)
195 pipeline.set_state(Gst.State.PLAYING)
196
197 bus = pipeline.get_bus()
198 while True:
199 msg = bus.timed_pop_filtered(Gst.SECOND * 0.05, Gst.MessageType.ERROR | Gst.MessageType.EOS)
200 if msg:
201 if msg.type == Gst.MessageType.EOS and current_target_source.endswith(".mp4"):
202 pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH | Gst.SeekFlags.KEY_UNIT, 0)
203 continue
204 break
205
206 if STATE_REPO["restart_flag"]:
207 break
208
209 pipeline.set_state(Gst.State.NULL)
210 STATE_REPO["frame"] = None
211 STATE_REPO["detections"] = []
212 STATE_REPO["native_w"] = 0
213 STATE_REPO["native_h"] = 0
214 STATE_REPO["stream_w"] = 0
215 STATE_REPO["stream_h"] = 0
216 STATE_REPO["inference_fps"] = 0.0
217 inference_timestamps = []
218 time.sleep(1.0)
219
220@app.route('/')
221def index():
222 src_active = STATE_REPO["active_source"]
223
224 if not STATE_REPO["source_registry"]:
225 src_html = '<option value="" disabled selected>-- NO VALID INPUT SOURCES AVAILABLE --</option>'
226 else:
227 src_html = '<option value="" disabled selected>-- SELECT TARGET SOURCE CHANNEL --</option>' if src_active is None else ""
228 src_html += "".join(f'<option value="{s}" {"selected" if s == src_active else ""}>{s}</option>' for s in STATE_REPO["source_registry"])
229
230 mdl_active = STATE_REPO["active_model_name"]
231 mdl_html = "".join(f'<option value="{m}" {"selected" if m == mdl_active else ""}>{m}</option>' for m in STATE_REPO["model_registry"])
232
233 html_template = """<!DOCTYPE html>
234 <html>
235 <head>
236 <title>Ara Stream Client</title>
237 <style>
238 body { font-family: sans-serif; background: #0c0c0e; color: #e1e1e6; margin: 0; padding: 20px; display: flex; flex-direction: column; align-items: center; }
239 .dashboard-layout { display: flex; flex-direction: column; gap: 15px; width: 660px; }
240 .panel { background: #121216; padding: 12px 15px; border-radius: 6px; border: 1px solid #1f1f24; display: flex; flex-direction: column; gap: 10px; }
241 .control-row { display: flex; align-items: center; justify-content: space-between; }
242 label { font-size: 12px; font-weight: bold; color: #8f8f9d; text-transform: uppercase; }
243 select { background: #0c0c0e; color: #fff; border: 1px solid #04d361; padding: 6px 10px; border-radius: 4px; width: 420px; outline: none; }
244 .stats-banner { display: flex; justify-content: space-between; background: #17171f; padding: 10px 15px; border: 1px solid #1f1f24; border-radius: 4px; font-family: monospace; font-size: 13px; color: #8f8f9d; }
245 .stats-banner span strong { color: #04d361; }
246 .media-container { background: #121216; padding: 8px; border-radius: 6px; border: 1px solid #1f1f24; position: relative; min-height: 480px; display: flex; align-items: center; justify-content: center; }
247 img { display: block; border-radius: 4px; width: 100%; height: auto; }
248 .overlay { position: absolute; top: 0; left: 0; width: 100%; height: 100%; background: rgba(12,12,14,0.9); display: flex; flex-direction: column; align-items: center; justify-content: center; border-radius: 6px; text-align: center; }
249 .prompt-text { color: #04d361; font-weight: bold; font-size: 16px; margin-bottom: 10px; }
250 </style>
251 <script>
252 let streamStarted = {% if active_src %}true{% else %}false{% endif %};
253
254 async function switchConfig() {
255 const src = document.getElementById('source-picker').value;
256 const mdl = document.getElementById('model-picker').value;
257 if(!src) return;
258
259 await fetch('/api/swap_config', {
260 method: 'POST',
261 headers: { 'Content-Type': 'application/json' },
262 body: JSON.stringify({ "source": src, "model": mdl })
263 });
264
265 streamStarted = true;
266 document.getElementById('gatekeeper-overlay').style.display = 'none';
267 setTimeout(() => {
268 document.getElementById('stream-player').src = '/stream.mjpg';
269 }, 1000);
270 }
271
272 async function updateStreamMetrics() {
273 if (!streamStarted) return;
274 try {
275 const response = await fetch('/api/stream_info');
276 const data = await response.json();
277
278 document.getElementById('metric-res').innerText = 'Source:' + data.native_w + 'x' + data.native_h + ' Canvas:' + data.width + 'x' + data.height;
279 document.getElementById('metric-fps').innerText = data.fps.toFixed(1);
280 document.getElementById('metric-dets').innerText = data.detections;
281 } catch (err) {}
282 }
283 setInterval(updateStreamMetrics, 1000);
284 </script>
285 </head>
286 <body>
287 <h2>Ara Vision Engine</h2>
288 <div class="dashboard-layout">
289 <div class="panel">
290 <div class="control-row">
291 <label for="source-picker">Media Stream Target:</label>
292 <select id="source-picker" onchange="switchConfig()">""" + src_html + """</select>
293 </div>
294 <div class="control-row">
295 <label for="model-picker">NPU Pipeline Model:</label>
296 <select id="model-picker" onchange="switchConfig()">""" + mdl_html + """</select>
297 </div>
298 </div>
299
300 <div class="stats-banner">
301 <span id="metric-res">Source:0x0 Canvas:0x0</span>
302 <span>NPU Inference: <span id="metric-fps">0.0</span> FPS</span>
303 <span>Active Detections: <span id="metric-dets">0</span></span>
304 </div>
305
306 <div class="media-container">
307 {% if not active_src %}
308 <div class="overlay" id="gatekeeper-overlay">
309 <div class="prompt-text">Awaiting Source Context</div>
310 <div style="color: #8f8f9d; font-size: 13px; max-width: 400px;">Please select a media path and model from the drop-downs above to mount your pipeline.</div>
311 </div>
312 {% endif %}
313 <img id="stream-player" {% if active_src %}src="/stream.mjpg"{% endif %} style="max-width: """ + str(STATE_REPO["CANVAS_W"]) + """px;" />
314 </div>
315 </div>
316 </body>
317 </html>"""
318 return render_template_string(html_template, active_src=src_active)
319
320@app.route('/api/stream_info')
321def stream_info():
322 with lock:
323 return jsonify({
324 "native_w": STATE_REPO["native_w"],
325 "native_h": STATE_REPO["native_h"],
326 "width": STATE_REPO["stream_w"],
327 "height": STATE_REPO["stream_h"],
328 "fps": STATE_REPO["inference_fps"],
329 "detections": len(STATE_REPO["detections"])
330 })
331
332@app.route('/api/swap_config', methods=['POST'])
333def swap_config():
334 payload = request.get_json()
335 src_selected = payload.get("source")
336 mdl_selected = payload.get("model")
337
338 with lock:
339 trigger_restart = False
340 if src_selected in STATE_REPO["source_registry"] and STATE_REPO["active_source"] != src_selected:
341 STATE_REPO["active_source"] = src_selected
342 trigger_restart = True
343 if mdl_selected in STATE_REPO["model_registry"] and STATE_REPO["active_model_name"] != mdl_selected:
344 base_dir = app.config["MODEL_DIR"]
345 STATE_REPO["active_model_name"] = mdl_selected
346 STATE_REPO["active_model_path"] = os.path.join(base_dir, mdl_selected, "model.dvm")
347 trigger_restart = True
348 if trigger_restart:
349 STATE_REPO["restart_flag"] = True
350 return jsonify({"status": "success"})
351
352def generate_mjpeg_stream_generator():
353 MODEL_W = float(STATE_REPO["MODEL_W"])
354 MODEL_H = float(STATE_REPO["MODEL_H"])
355
356 while True:
357 time.sleep(0.04)
358 frame_copy = STATE_REPO["frame"]
359 local_dets = list(STATE_REPO["detections"])
360 if frame_copy is not None:
361 frame = frame_copy.copy()
362 h_native, w_native, _ = frame_copy.shape
363 for class_id, confidence, rx1, ry1, rx2, ry2 in local_dets:
364 cx1 = int(rx1 * (float(w_native) / MODEL_W))
365 cx2 = int(rx2 * (float(w_native) / MODEL_W))
366 cy1 = int(ry1 * (float(h_native) / MODEL_H))
367 cy2 = int(ry2 * (float(h_native) / MODEL_H))
368 label = f"{COCO_LABELS.get(class_id, f'Class {class_id}')} ({confidence*100:.1f}%)"
369 cv2.rectangle(frame, (cx1, cy1), (cx2, cy2), (0, 255, 97), 2)
370 cv2.putText(frame, label, (cx1, max(15, cy1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 2)
371 _, encoded_img = cv2.imencode(".jpg", frame)
372 yield (b'--frame\r\n'
373 b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
374 else:
375 waiting_canvas = np.zeros((480, 640, 3), dtype=np.uint8)
376 cv2.putText(waiting_canvas, "AWAITING MEDIA INPUT SELECTION...", (140, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 1)
377 _, encoded_img = cv2.imencode(".jpg", waiting_canvas)
378 yield (b'--frame\r\n'
379 b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
380
381@app.route('/stream.mjpg')
382def video_feed_stream_route():
383 return Response(generate_mjpeg_stream_generator(), mimetype='multipart/x-mixed-replace; boundary=frame')
384
385def main():
386 parser = argparse.ArgumentParser(description="Wiki Template: Ara Flask Video Engine")
387 parser.add_argument("--camera", default=None, help="Camera context device node path")
388 parser.add_argument("--mp4", default=None, help="Directory containing target mp4 sample videos")
389 parser.add_argument("--port", type=int, default=8080, help="Target port mapping")
390 parser.add_argument("--model-dir", default="/usr/share/cnn/detection", help="Directory containing target models")
391 parser.add_argument("--model", default="yolov8n", help="Initial model selection")
392 args = parser.parse_args()
393
394 app.config["MODEL_DIR"] = args.model_dir
395 STATE_REPO["source_registry"] = []
396
397 if args.camera and os.path.exists(args.camera):
398 STATE_REPO["source_registry"].append(args.camera)
399
400 if args.mp4 and os.path.exists(args.mp4):
401 local_videos = glob.glob(os.path.join(args.mp4, "*.mp4"))
402 for vid in sorted(local_videos):
403 STATE_REPO["source_registry"].append(vid)
404
405 if os.path.exists(args.model_dir):
406 discovered_models = []
407 for entry in sorted(os.listdir(args.model_dir)):
408 full_subdir = os.path.join(args.model_dir, entry)
409 if os.path.isdir(full_subdir) and os.path.exists(os.path.join(full_subdir, "model.dvm")):
410 discovered_models.append(entry)
411 if discovered_models:
412 STATE_REPO["model_registry"] = discovered_models
413 STATE_REPO["active_model_name"] = args.model if args.model in discovered_models else discovered_models[0]
414 STATE_REPO["active_model_path"] = os.path.join(args.model_dir, STATE_REPO["active_model_name"], "model.dvm")
415
416 threading.Thread(target=gstreamer_orchestration_loop, daemon=True).start()
417
418 print(f"Server serving on: http://localhost:{args.port}/")
419 app.run(host='0.0.0.0', port=args.port, threaded=True, use_reloader=False, debug=False)
420
421if __name__ == '__main__':
422 main()