Context Navigation

Changes between Version 10 and Version 11 of expansion/gw16168

Timestamp:: 06/05/2026 10:48:13 PM (8 weeks ago)
Author:: Tim Harvey
Comment:: added a video detection example

Legend:

: Unmodified
: Added
: Removed
: Modified

expansion/gw16168

-              v10
+              v11
 }}}
 [=#gstreamer
+[=#gstreamer]
 === GStreamer plugins
 The rt-sdk-ara2 provides a set of gstreamer plugins for inference:
 …
 }}}
+[=#detection]
 === Detection Examples
 Examples:
  * gst-launch pipeline prototypeing:
+ * gst-launch pipeline prototyping:
   - enabling debug level 6 on dvPost will show the number of object detections in its debug output but if you want to do anything with that data you need to write an application that can decode frame buffers. Still this is useful for prototyping:
    * perform detection on a v4l2 video device like a webcam:
 …
    - you would think that if your original image was 1080x1920 and you resized it to the model size of 640x640 that if you tell dvPost the orig-width=1080 orig-height=1920 that it would scale the bounding boxes properly however in practice it seems it does not unless your image has the same aspect ratio of the model. mapping it as above (telling dvPost that the image is 640x640 and scaling ourselves) resolves this
+ * Video detection with boxing via Python in a headless webapp
+  - Python is incredibly useful for accessing GStreamer and handling the ARA detection frame data and building webapps
+  - The script using PyGObject which is a Python package that provides bindings for libraries based on GObject Introspection such as GTK, !WebKit, and GStreamer. It allows you to use C-based frameworks in python. We need to install the C libs for GSTreamer for this:
+{{{#!bash
+apt-get install -y \
+  libcairo2-dev \
+  libgirepository-2.0-dev \
+  python3-dev \
+  python3-gst-1.0 \
+  cmake pkg-config
+# we are also going to need to install gstreamer and its dev packages
+apt-get install -y \
+  libgstreamer1.0-dev \
+  libgstreamer-plugins-base1.0-dev \
+  libgstreamer-plugins-bad1.0-dev \
+  gstreamer1.0-plugins-base \
+  gstreamer1.0-plugins-good \
+  gstreamer1.0-plugins-bad \
+  gstreamer1.0-plugins-ugly \
+  gstreamer1.0-libav \
+  gstreamer1.0-tools
+}}}
+  - create a python virtual env (always a good idea to keep python dependencies containerized) and install python libs we need:
+{{{#!bash
+# create a venv (.venv)
+uv venv
+# install our scripts dependencies
+uv pip install pygobject opencv-python-headless
+cat << EOF > vision-webapp.py
+#!/usr/bin/env python3
+"""
+Ara NPU Basic Video Stream & Inference Hub
+==========================================
+"""
+import argparse
+import ctypes
+import glob
+import os
+import sys
+import threading
+import time
+import logging
+import cv2
+import numpy as np
+from flask import Flask, Response, jsonify, request, render_template_string
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+# Quiet down Werkzeug HTTP traffic logging to suppress 1Hz AJAX console pollution
+log = logging.getLogger('werkzeug')
+log.setLevel(logging.ERROR)
+app = Flask(__name__)
+lock = threading.Lock()
+class AraDetection(ctypes.Structure):
+    _pack_ = 1
+    _fields_ = [
+        ("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
+        ("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
+        ("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
+        ("class_name_ptr", ctypes.c_void_p)
+    ]
+# --- STATE STORAGE ---
+STATE_REPO = {
+    "frame": None,
+    "detections": [],
+    "active_source": None,
+    "active_model_name": "yolov8n",
+    "active_model_path": "/usr/share/cnn/detection/yolov8n/model.dvm",
+    "restart_flag": False,
+    "source_registry": [],
+    "model_registry": ["yolov8n"],
+    # Target Pipeline Resolutions
+    "CANVAS_W": 640,
+    "CANVAS_H": 360,
+    "MODEL_W": 640,
+    "MODEL_H": 640,
+    # Live Telemetry Metrics
+    "native_w": 0,
+    "native_h": 0,
+    "stream_w": 0,
+    "stream_h": 0,
+    "inference_fps": 0.0
+}
+# FPS Calculation variables bound directly to the Inference thread
+inference_timestamps = []
+COCO_LABELS = {
+: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
+: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant',
+: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
+: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
+: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
+: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard',
+: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove',
+: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
+: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
+: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
+: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
+: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
+: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
+: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
+: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
+: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
+}
+def build_source_injection_string(source_path):
+    if source_path.endswith(".mp4"):
+        return f"filesrc location={source_path} ! decodebin ! videoconvert ! tee name=t "
+    else:
+        return f"v4l2src device={source_path} ! videoconvert ! tee name=t "
+def gstreamer_orchestration_loop():
+    global inference_timestamps
+    CANVAS_W = STATE_REPO["CANVAS_W"]
+    CANVAS_H = STATE_REPO["CANVAS_H"]
+    MODEL_W = STATE_REPO["MODEL_W"]
+    MODEL_H = STATE_REPO["MODEL_H"]
+    while True:
+        while STATE_REPO["active_source"] is None:
+            time.sleep(0.2)
+            if STATE_REPO["restart_flag"]:
+                break
+        current_target_source = STATE_REPO["active_source"]
+        current_target_model = STATE_REPO["active_model_path"]
+        STATE_REPO["restart_flag"] = False
+        if current_target_source is None:
+            continue
+        source_segment = build_source_injection_string(current_target_source)
+        pipe_str = (
+            f"{source_segment} "
+            f"t. ! queue max-size-buffers=2 leaky=downstream ! appsink name=nativesink sync=false async=false emit-signals=true "
+            f"t. ! queue max-size-buffers=2 leaky=downstream ! videoscale ! video/x-raw,width={CANVAS_W},height={CANVAS_H} ! videoconvert ! video/x-raw,format=BGR ! appsink name=framesink sync=false async=false emit-signals=true "
+            f"t. ! queue max-size-buffers=2 leaky=downstream ! "
+            f"videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! videoconvert ! video/x-raw,format=BGRA ! "
+            f"dvPre model={current_target_model} ! "
+            f"dvInf model={current_target_model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
+            f"dvPost model={current_target_model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
+            f"appsink name=postsink sync=false async=false emit-signals=true"
+        )
+        print(f"[LAUNCH PIPELINE]\n   {pipe_str}\n")
+        pipeline = Gst.parse_launch(pipe_str)
+        native_sink = pipeline.get_by_name("nativesink")
+        frame_sink = pipeline.get_by_name("framesink")
+        post_sink = pipeline.get_by_name("postsink")
+        def on_native_caps(sink):
+            sample = sink.emit("pull-sample")
+            if sample:
+                caps = sample.get_caps()
+                struct = caps.get_structure(0)
+                STATE_REPO["native_w"] = struct.get_value("width")
+                STATE_REPO["native_h"] = struct.get_value("height")
+            return Gst.FlowReturn.OK
+        def on_new_detection(sink):
+            global inference_timestamps
+            sample = sink.emit("pull-sample")
+            if sample:
+                # Calculate FPS derived purely from the inference hardware return loop
+                now = time.time()
+                inference_timestamps.append(now)
+                if len(inference_timestamps) > 30:
+                    inference_timestamps.pop(0)
+                if len(inference_timestamps) > 1:
+                    STATE_REPO["inference_fps"] = len(inference_timestamps) / (inference_timestamps[-1] - inference_timestamps[0])
+                buffer = sample.get_buffer()
+                raw_bytes = buffer.extract_dup(0, buffer.get_size())
+                if raw_bytes and len(raw_bytes) >= 4:
+                    num_detections = np.frombuffer(raw_bytes[:4], dtype=np.uint32)[0]
+                    local_dets = []
+                    offset = 4
+                    ds = ctypes.sizeof(AraDetection)
+                    for _ in range(num_detections):
+                        if offset + ds > len(raw_bytes): break
+                        det = AraDetection.from_buffer_copy(raw_bytes[offset:offset+ds])
+                        offset += ds
+                        local_dets.append((det.class_id, det.confidence, det.xmin, det.ymin, det.xmax, det.ymax))
+                    STATE_REPO["detections"] = local_dets
+            return Gst.FlowReturn.OK
+        def on_new_frame(sink):
+            sample = sink.emit("pull-sample")
+            if sample:
+                buffer = sample.get_buffer()
+                caps = sample.get_caps()
+                struct = caps.get_structure(0)
+                w = struct.get_value("width")
+                h = struct.get_value("height")
+                STATE_REPO["stream_w"] = w
+                STATE_REPO["stream_h"] = h
+                raw_bytes = buffer.extract_dup(0, buffer.get_size())
+                if raw_bytes:
+                    try:
+                        frame_flat = np.frombuffer(raw_bytes, dtype=np.uint8)
+                        frame_arr = frame_flat.reshape((h, w, 3))
+                        STATE_REPO["frame"] = frame_arr.copy()
+                    except ValueError:
+                        pass
+            return Gst.FlowReturn.OK
+        native_sink.connect("new-sample", on_native_caps)
+        post_sink.connect("new-sample", on_new_detection)
+        frame_sink.connect("new-sample", on_new_frame)
+        pipeline.set_state(Gst.State.PLAYING)
+        bus = pipeline.get_bus()
+        while True:
+            msg = bus.timed_pop_filtered(Gst.SECOND * 0.05, Gst.MessageType.ERROR | Gst.MessageType.EOS)
+            if msg:
+                if msg.type == Gst.MessageType.EOS and current_target_source.endswith(".mp4"):
+                    pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH | Gst.SeekFlags.KEY_UNIT, 0)
+                    continue
+                break
+            if STATE_REPO["restart_flag"]:
+                break
+        pipeline.set_state(Gst.State.NULL)
+        STATE_REPO["frame"] = None
+        STATE_REPO["detections"] = []
+        STATE_REPO["native_w"] = 0
+        STATE_REPO["native_h"] = 0
+        STATE_REPO["stream_w"] = 0
+        STATE_REPO["stream_h"] = 0
+        STATE_REPO["inference_fps"] = 0.0
+        inference_timestamps = []
+        time.sleep(1.0)
+@app.route('/')
+def index():
+    src_active = STATE_REPO["active_source"]
+    if not STATE_REPO["source_registry"]:
+        src_html = '<option value="" disabled selected>-- NO VALID INPUT SOURCES AVAILABLE --</option>'
+    else:
+        src_html = '<option value="" disabled selected>-- SELECT TARGET SOURCE CHANNEL --</option>' if src_active is None else ""
+        src_html += "".join(f'<option value="{s}" {"selected" if s == src_active else ""}>{s}</option>' for s in STATE_REPO["source_registry"])
+    mdl_active = STATE_REPO["active_model_name"]
+    mdl_html = "".join(f'<option value="{m}" {"selected" if m == mdl_active else ""}>{m}</option>' for m in STATE_REPO["model_registry"])
+    html_template = """<!DOCTYPE html>
+    <html>
+    <head>
+        <title>Ara Stream Client</title>
+        <style>
+            body { font-family: sans-serif; background: #0c0c0e; color: #e1e1e6; margin: 0; padding: 20px; display: flex; flex-direction: column; align-items: center; }
+            .dashboard-layout { display: flex; flex-direction: column; gap: 15px; width: 660px; }
+            .panel { background: #121216; padding: 12px 15px; border-radius: 6px; border: 1px solid #1f1f24; display: flex; flex-direction: column; gap: 10px; }
+            .control-row { display: flex; align-items: center; justify-content: space-between; }
+            label { font-size: 12px; font-weight: bold; color: #8f8f9d; text-transform: uppercase; }
+            select { background: #0c0c0e; color: #fff; border: 1px solid #04d361; padding: 6px 10px; border-radius: 4px; width: 420px; outline: none; }
+            .stats-banner { display: flex; justify-content: space-between; background: #17171f; padding: 10px 15px; border: 1px solid #1f1f24; border-radius: 4px; font-family: monospace; font-size: 13px; color: #8f8f9d; }
+            .stats-banner span strong { color: #04d361; }
+            .media-container { background: #121216; padding: 8px; border-radius: 6px; border: 1px solid #1f1f24; position: relative; min-height: 480px; display: flex; align-items: center; justify-content: center; }
+            img { display: block; border-radius: 4px; width: 100%; height: auto; }
+            .overlay { position: absolute; top: 0; left: 0; width: 100%; height: 100%; background: rgba(12,12,14,0.9); display: flex; flex-direction: column; align-items: center; justify-content: center; border-radius: 6px; text-align: center; }
+            .prompt-text { color: #04d361; font-weight: bold; font-size: 16px; margin-bottom: 10px; }
+        </style>
+        <script>
+            let streamStarted = {% if active_src %}true{% else %}false{% endif %};
+            async function switchConfig() {
+                const src = document.getElementById('source-picker').value;
+                const mdl = document.getElementById('model-picker').value;
+                if(!src) return;
+                await fetch('/api/swap_config', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ "source": src, "model": mdl })
+                });
+                streamStarted = true;
+                document.getElementById('gatekeeper-overlay').style.display = 'none';
+                setTimeout(() => {
+                    document.getElementById('stream-player').src = '/stream.mjpg';
+                }, 1000);
+            }
+            async function updateStreamMetrics() {
+                if (!streamStarted) return;
+                try {
+                    const response = await fetch('/api/stream_info');
+                    const data = await response.json();
+                    document.getElementById('metric-res').innerText = 'Source:' + data.native_w + 'x' + data.native_h + ' Canvas:' + data.width + 'x' + data.height;
+                    document.getElementById('metric-fps').innerText = data.fps.toFixed(1);
+                    document.getElementById('metric-dets').innerText = data.detections;
+                } catch (err) {}
+            }
+            setInterval(updateStreamMetrics, 1000);
+        </script>
+    </head>
+    <body>
+        <h2>Ara Vision Engine</h2>
+        <div class="dashboard-layout">
+            <div class="panel">
+                <div class="control-row">
+                    <label for="source-picker">Media Stream Target:</label>
+                    <select id="source-picker" onchange="switchConfig()">""" + src_html + """</select>
+                </div>
+                <div class="control-row">
+                    <label for="model-picker">NPU Pipeline Model:</label>
+                    <select id="model-picker" onchange="switchConfig()">""" + mdl_html + """</select>
+                </div>
+            </div>
+            <div class="stats-banner">
+                <span id="metric-res">Source:0x0 Canvas:0x0</span>
+                <span>NPU Inference: <span id="metric-fps">0.0</span> FPS</span>
+                <span>Active Detections: <span id="metric-dets">0</span></span>
+            </div>
+            <div class="media-container">
+                {% if not active_src %}
+                <div class="overlay" id="gatekeeper-overlay">
+                    <div class="prompt-text">Awaiting Source Context</div>
+                    <div style="color: #8f8f9d; font-size: 13px; max-width: 400px;">Please select a media path and model from the drop-downs above to mount your pipeline.</div>
+                </div>
+                {% endif %}
+                <img id="stream-player" {% if active_src %}src="/stream.mjpg"{% endif %} style="max-width: """ + str(STATE_REPO["CANVAS_W"]) + """px;" />
+            </div>
+        </div>
+    </body>
+    </html>"""
+    return render_template_string(html_template, active_src=src_active)
+@app.route('/api/stream_info')
+def stream_info():
+    with lock:
+        return jsonify({
+            "native_w": STATE_REPO["native_w"],
+            "native_h": STATE_REPO["native_h"],
+            "width": STATE_REPO["stream_w"],
+            "height": STATE_REPO["stream_h"],
+            "fps": STATE_REPO["inference_fps"],
+            "detections": len(STATE_REPO["detections"])
+        })
+@app.route('/api/swap_config', methods=['POST'])
+def swap_config():
+    payload = request.get_json()
+    src_selected = payload.get("source")
+    mdl_selected = payload.get("model")
+    with lock:
+        trigger_restart = False
+        if src_selected in STATE_REPO["source_registry"] and STATE_REPO["active_source"] != src_selected:
+            STATE_REPO["active_source"] = src_selected
+            trigger_restart = True
+        if mdl_selected in STATE_REPO["model_registry"] and STATE_REPO["active_model_name"] != mdl_selected:
+            base_dir = app.config["MODEL_DIR"]
+            STATE_REPO["active_model_name"] = mdl_selected
+            STATE_REPO["active_model_path"] = os.path.join(base_dir, mdl_selected, "model.dvm")
+            trigger_restart = True
+        if trigger_restart:
+            STATE_REPO["restart_flag"] = True
+    return jsonify({"status": "success"})
+def generate_mjpeg_stream_generator():
+    MODEL_W = float(STATE_REPO["MODEL_W"])
+    MODEL_H = float(STATE_REPO["MODEL_H"])
+    while True:
+        time.sleep(0.04)
+        frame_copy = STATE_REPO["frame"]
+        local_dets = list(STATE_REPO["detections"])
+        if frame_copy is not None:
+            frame = frame_copy.copy()
+            h_native, w_native, _ = frame_copy.shape
+            for class_id, confidence, rx1, ry1, rx2, ry2 in local_dets:
+                cx1 = int(rx1 * (float(w_native) / MODEL_W))
+                cx2 = int(rx2 * (float(w_native) / MODEL_W))
+                cy1 = int(ry1 * (float(h_native) / MODEL_H))
+                cy2 = int(ry2 * (float(h_native) / MODEL_H))
+                label = f"{COCO_LABELS.get(class_id, f'Class {class_id}')} ({confidence*100:.1f}%)"
+                cv2.rectangle(frame, (cx1, cy1), (cx2, cy2), (0, 255, 97), 2)
+                cv2.putText(frame, label, (cx1, max(15, cy1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 2)
+            _, encoded_img = cv2.imencode(".jpg", frame)
+            yield (b'--frame\r\n'
+                   b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
+        else:
+            waiting_canvas = np.zeros((480, 640, 3), dtype=np.uint8)
+            cv2.putText(waiting_canvas, "AWAITING MEDIA INPUT SELECTION...", (140, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 1)
+            _, encoded_img = cv2.imencode(".jpg", waiting_canvas)
+            yield (b'--frame\r\n'
+                   b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n')
+@app.route('/stream.mjpg')
+def video_feed_stream_route():
+    return Response(generate_mjpeg_stream_generator(), mimetype='multipart/x-mixed-replace; boundary=frame')
+def main():
+    parser = argparse.ArgumentParser(description="Wiki Template: Ara Flask Video Engine")
+    parser.add_argument("--camera", default=None, help="Camera context device node path")
+    parser.add_argument("--mp4", default=None, help="Directory containing target mp4 sample videos")
+    parser.add_argument("--port", type=int, default=8080, help="Target port mapping")
+    parser.add_argument("--model-dir", default="/usr/share/cnn/detection", help="Directory containing target models")
+    parser.add_argument("--model", default="yolov8n", help="Initial model selection")
+    args = parser.parse_args()
+    app.config["MODEL_DIR"] = args.model_dir
+    STATE_REPO["source_registry"] = []
+    if args.camera and os.path.exists(args.camera):
+        STATE_REPO["source_registry"].append(args.camera)
+    if args.mp4 and os.path.exists(args.mp4):
+        local_videos = glob.glob(os.path.join(args.mp4, "*.mp4"))
+        for vid in sorted(local_videos):
+            STATE_REPO["source_registry"].append(vid)
+    if os.path.exists(args.model_dir):
+        discovered_models = []
+        for entry in sorted(os.listdir(args.model_dir)):
+            full_subdir = os.path.join(args.model_dir, entry)
+            if os.path.isdir(full_subdir) and os.path.exists(os.path.join(full_subdir, "model.dvm")):
+                discovered_models.append(entry)
+        if discovered_models:
+            STATE_REPO["model_registry"] = discovered_models
+            STATE_REPO["active_model_name"] = args.model if args.model in discovered_models else discovered_models[0]
+            STATE_REPO["active_model_path"] = os.path.join(args.model_dir, STATE_REPO["active_model_name"], "model.dvm")
+    threading.Thread(target=gstreamer_orchestration_loop, daemon=True).start()
+    print(f"Server serving on: http://localhost:{args.port}/")
+    app.run(host='0.0.0.0', port=args.port, threaded=True, use_reloader=False, debug=False)
+if __name__ == '__main__':
+    main()
+EOF
+}}}
+  - run the script (vison-webapp.py [--port <portno>] [--camera <camera-dev>] [--mp4 <mp4-dir>]
+{{{#!bash
+uv run vision-webapp.py --camera /dev/video_webcam --mp4 /usr/share/ara2-vision-examples/sample_videos/
+}}}
+  - you can provide a webcam device path to enable streaming from a webcam and/or an mp4 directory to enable processing those. A dropdown will allow you to select the input stream and the model and the browser window will show you detections and statistics
 [=#eiq-aaf-connector]
 === eIQ AAF Connector