| | 527 | * Video detection with boxing via Python in a headless webapp |
| | 528 | - Python is incredibly useful for accessing GStreamer and handling the ARA detection frame data and building webapps |
| | 529 | - The script using PyGObject which is a Python package that provides bindings for libraries based on GObject Introspection such as GTK, !WebKit, and GStreamer. It allows you to use C-based frameworks in python. We need to install the C libs for GSTreamer for this: |
| | 530 | {{{#!bash |
| | 531 | apt-get install -y \ |
| | 532 | libcairo2-dev \ |
| | 533 | libgirepository-2.0-dev \ |
| | 534 | python3-dev \ |
| | 535 | python3-gst-1.0 \ |
| | 536 | cmake pkg-config |
| | 537 | # we are also going to need to install gstreamer and its dev packages |
| | 538 | apt-get install -y \ |
| | 539 | libgstreamer1.0-dev \ |
| | 540 | libgstreamer-plugins-base1.0-dev \ |
| | 541 | libgstreamer-plugins-bad1.0-dev \ |
| | 542 | gstreamer1.0-plugins-base \ |
| | 543 | gstreamer1.0-plugins-good \ |
| | 544 | gstreamer1.0-plugins-bad \ |
| | 545 | gstreamer1.0-plugins-ugly \ |
| | 546 | gstreamer1.0-libav \ |
| | 547 | gstreamer1.0-tools |
| | 548 | }}} |
| | 549 | - create a python virtual env (always a good idea to keep python dependencies containerized) and install python libs we need: |
| | 550 | {{{#!bash |
| | 551 | # create a venv (.venv) |
| | 552 | uv venv |
| | 553 | # install our scripts dependencies |
| | 554 | uv pip install pygobject opencv-python-headless |
| | 555 | cat << EOF > vision-webapp.py |
| | 556 | #!/usr/bin/env python3 |
| | 557 | """ |
| | 558 | Ara NPU Basic Video Stream & Inference Hub |
| | 559 | ========================================== |
| | 560 | """ |
| | 561 | |
| | 562 | import argparse |
| | 563 | import ctypes |
| | 564 | import glob |
| | 565 | import os |
| | 566 | import sys |
| | 567 | import threading |
| | 568 | import time |
| | 569 | import logging |
| | 570 | import cv2 |
| | 571 | import numpy as np |
| | 572 | from flask import Flask, Response, jsonify, request, render_template_string |
| | 573 | import gi |
| | 574 | |
| | 575 | gi.require_version('Gst', '1.0') |
| | 576 | from gi.repository import Gst |
| | 577 | Gst.init(None) |
| | 578 | |
| | 579 | # Quiet down Werkzeug HTTP traffic logging to suppress 1Hz AJAX console pollution |
| | 580 | log = logging.getLogger('werkzeug') |
| | 581 | log.setLevel(logging.ERROR) |
| | 582 | |
| | 583 | app = Flask(__name__) |
| | 584 | lock = threading.Lock() |
| | 585 | |
| | 586 | class AraDetection(ctypes.Structure): |
| | 587 | _pack_ = 1 |
| | 588 | _fields_ = [ |
| | 589 | ("xmin", ctypes.c_float), ("ymin", ctypes.c_float), |
| | 590 | ("xmax", ctypes.c_float), ("ymax", ctypes.c_float), |
| | 591 | ("confidence", ctypes.c_float), ("class_id", ctypes.c_int32), |
| | 592 | ("class_name_ptr", ctypes.c_void_p) |
| | 593 | ] |
| | 594 | |
| | 595 | # --- STATE STORAGE --- |
| | 596 | STATE_REPO = { |
| | 597 | "frame": None, |
| | 598 | "detections": [], |
| | 599 | "active_source": None, |
| | 600 | "active_model_name": "yolov8n", |
| | 601 | "active_model_path": "/usr/share/cnn/detection/yolov8n/model.dvm", |
| | 602 | "restart_flag": False, |
| | 603 | "source_registry": [], |
| | 604 | "model_registry": ["yolov8n"], |
| | 605 | |
| | 606 | # Target Pipeline Resolutions |
| | 607 | "CANVAS_W": 640, |
| | 608 | "CANVAS_H": 360, |
| | 609 | "MODEL_W": 640, |
| | 610 | "MODEL_H": 640, |
| | 611 | |
| | 612 | # Live Telemetry Metrics |
| | 613 | "native_w": 0, |
| | 614 | "native_h": 0, |
| | 615 | "stream_w": 0, |
| | 616 | "stream_h": 0, |
| | 617 | "inference_fps": 0.0 |
| | 618 | } |
| | 619 | |
| | 620 | # FPS Calculation variables bound directly to the Inference thread |
| | 621 | inference_timestamps = [] |
| | 622 | |
| | 623 | COCO_LABELS = { |
| | 624 | 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', |
| | 625 | 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', |
| | 626 | 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', |
| | 627 | 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', |
| | 628 | 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', |
| | 629 | 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', |
| | 630 | 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', |
| | 631 | 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', |
| | 632 | 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', |
| | 633 | 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', |
| | 634 | 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', |
| | 635 | 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', |
| | 636 | 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', |
| | 637 | 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', |
| | 638 | 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', |
| | 639 | 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' |
| | 640 | } |
| | 641 | |
| | 642 | def build_source_injection_string(source_path): |
| | 643 | if source_path.endswith(".mp4"): |
| | 644 | return f"filesrc location={source_path} ! decodebin ! videoconvert ! tee name=t " |
| | 645 | else: |
| | 646 | return f"v4l2src device={source_path} ! videoconvert ! tee name=t " |
| | 647 | |
| | 648 | def gstreamer_orchestration_loop(): |
| | 649 | global inference_timestamps |
| | 650 | CANVAS_W = STATE_REPO["CANVAS_W"] |
| | 651 | CANVAS_H = STATE_REPO["CANVAS_H"] |
| | 652 | MODEL_W = STATE_REPO["MODEL_W"] |
| | 653 | MODEL_H = STATE_REPO["MODEL_H"] |
| | 654 | |
| | 655 | while True: |
| | 656 | while STATE_REPO["active_source"] is None: |
| | 657 | time.sleep(0.2) |
| | 658 | if STATE_REPO["restart_flag"]: |
| | 659 | break |
| | 660 | |
| | 661 | current_target_source = STATE_REPO["active_source"] |
| | 662 | current_target_model = STATE_REPO["active_model_path"] |
| | 663 | STATE_REPO["restart_flag"] = False |
| | 664 | |
| | 665 | if current_target_source is None: |
| | 666 | continue |
| | 667 | |
| | 668 | source_segment = build_source_injection_string(current_target_source) |
| | 669 | |
| | 670 | pipe_str = ( |
| | 671 | f"{source_segment} " |
| | 672 | f"t. ! queue max-size-buffers=2 leaky=downstream ! appsink name=nativesink sync=false async=false emit-signals=true " |
| | 673 | f"t. ! queue max-size-buffers=2 leaky=downstream ! videoscale ! video/x-raw,width={CANVAS_W},height={CANVAS_H} ! videoconvert ! video/x-raw,format=BGR ! appsink name=framesink sync=false async=false emit-signals=true " |
| | 674 | f"t. ! queue max-size-buffers=2 leaky=downstream ! " |
| | 675 | f"videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! videoconvert ! video/x-raw,format=BGRA ! " |
| | 676 | f"dvPre model={current_target_model} ! " |
| | 677 | f"dvInf model={current_target_model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! " |
| | 678 | f"dvPost model={current_target_model} orig-width={MODEL_W} orig-height={MODEL_H} ! " |
| | 679 | f"appsink name=postsink sync=false async=false emit-signals=true" |
| | 680 | ) |
| | 681 | |
| | 682 | print(f"[LAUNCH PIPELINE]\n {pipe_str}\n") |
| | 683 | pipeline = Gst.parse_launch(pipe_str) |
| | 684 | |
| | 685 | native_sink = pipeline.get_by_name("nativesink") |
| | 686 | frame_sink = pipeline.get_by_name("framesink") |
| | 687 | post_sink = pipeline.get_by_name("postsink") |
| | 688 | |
| | 689 | def on_native_caps(sink): |
| | 690 | sample = sink.emit("pull-sample") |
| | 691 | if sample: |
| | 692 | caps = sample.get_caps() |
| | 693 | struct = caps.get_structure(0) |
| | 694 | STATE_REPO["native_w"] = struct.get_value("width") |
| | 695 | STATE_REPO["native_h"] = struct.get_value("height") |
| | 696 | return Gst.FlowReturn.OK |
| | 697 | |
| | 698 | def on_new_detection(sink): |
| | 699 | global inference_timestamps |
| | 700 | sample = sink.emit("pull-sample") |
| | 701 | if sample: |
| | 702 | # Calculate FPS derived purely from the inference hardware return loop |
| | 703 | now = time.time() |
| | 704 | inference_timestamps.append(now) |
| | 705 | if len(inference_timestamps) > 30: |
| | 706 | inference_timestamps.pop(0) |
| | 707 | if len(inference_timestamps) > 1: |
| | 708 | STATE_REPO["inference_fps"] = len(inference_timestamps) / (inference_timestamps[-1] - inference_timestamps[0]) |
| | 709 | |
| | 710 | buffer = sample.get_buffer() |
| | 711 | raw_bytes = buffer.extract_dup(0, buffer.get_size()) |
| | 712 | if raw_bytes and len(raw_bytes) >= 4: |
| | 713 | num_detections = np.frombuffer(raw_bytes[:4], dtype=np.uint32)[0] |
| | 714 | local_dets = [] |
| | 715 | offset = 4 |
| | 716 | ds = ctypes.sizeof(AraDetection) |
| | 717 | for _ in range(num_detections): |
| | 718 | if offset + ds > len(raw_bytes): break |
| | 719 | det = AraDetection.from_buffer_copy(raw_bytes[offset:offset+ds]) |
| | 720 | offset += ds |
| | 721 | local_dets.append((det.class_id, det.confidence, det.xmin, det.ymin, det.xmax, det.ymax)) |
| | 722 | STATE_REPO["detections"] = local_dets |
| | 723 | return Gst.FlowReturn.OK |
| | 724 | |
| | 725 | def on_new_frame(sink): |
| | 726 | sample = sink.emit("pull-sample") |
| | 727 | if sample: |
| | 728 | buffer = sample.get_buffer() |
| | 729 | caps = sample.get_caps() |
| | 730 | struct = caps.get_structure(0) |
| | 731 | w = struct.get_value("width") |
| | 732 | h = struct.get_value("height") |
| | 733 | |
| | 734 | STATE_REPO["stream_w"] = w |
| | 735 | STATE_REPO["stream_h"] = h |
| | 736 | |
| | 737 | raw_bytes = buffer.extract_dup(0, buffer.get_size()) |
| | 738 | if raw_bytes: |
| | 739 | try: |
| | 740 | frame_flat = np.frombuffer(raw_bytes, dtype=np.uint8) |
| | 741 | frame_arr = frame_flat.reshape((h, w, 3)) |
| | 742 | STATE_REPO["frame"] = frame_arr.copy() |
| | 743 | except ValueError: |
| | 744 | pass |
| | 745 | return Gst.FlowReturn.OK |
| | 746 | |
| | 747 | native_sink.connect("new-sample", on_native_caps) |
| | 748 | post_sink.connect("new-sample", on_new_detection) |
| | 749 | frame_sink.connect("new-sample", on_new_frame) |
| | 750 | pipeline.set_state(Gst.State.PLAYING) |
| | 751 | |
| | 752 | bus = pipeline.get_bus() |
| | 753 | while True: |
| | 754 | msg = bus.timed_pop_filtered(Gst.SECOND * 0.05, Gst.MessageType.ERROR | Gst.MessageType.EOS) |
| | 755 | if msg: |
| | 756 | if msg.type == Gst.MessageType.EOS and current_target_source.endswith(".mp4"): |
| | 757 | pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH | Gst.SeekFlags.KEY_UNIT, 0) |
| | 758 | continue |
| | 759 | break |
| | 760 | |
| | 761 | if STATE_REPO["restart_flag"]: |
| | 762 | break |
| | 763 | |
| | 764 | pipeline.set_state(Gst.State.NULL) |
| | 765 | STATE_REPO["frame"] = None |
| | 766 | STATE_REPO["detections"] = [] |
| | 767 | STATE_REPO["native_w"] = 0 |
| | 768 | STATE_REPO["native_h"] = 0 |
| | 769 | STATE_REPO["stream_w"] = 0 |
| | 770 | STATE_REPO["stream_h"] = 0 |
| | 771 | STATE_REPO["inference_fps"] = 0.0 |
| | 772 | inference_timestamps = [] |
| | 773 | time.sleep(1.0) |
| | 774 | |
| | 775 | @app.route('/') |
| | 776 | def index(): |
| | 777 | src_active = STATE_REPO["active_source"] |
| | 778 | |
| | 779 | if not STATE_REPO["source_registry"]: |
| | 780 | src_html = '<option value="" disabled selected>-- NO VALID INPUT SOURCES AVAILABLE --</option>' |
| | 781 | else: |
| | 782 | src_html = '<option value="" disabled selected>-- SELECT TARGET SOURCE CHANNEL --</option>' if src_active is None else "" |
| | 783 | src_html += "".join(f'<option value="{s}" {"selected" if s == src_active else ""}>{s}</option>' for s in STATE_REPO["source_registry"]) |
| | 784 | |
| | 785 | mdl_active = STATE_REPO["active_model_name"] |
| | 786 | mdl_html = "".join(f'<option value="{m}" {"selected" if m == mdl_active else ""}>{m}</option>' for m in STATE_REPO["model_registry"]) |
| | 787 | |
| | 788 | html_template = """<!DOCTYPE html> |
| | 789 | <html> |
| | 790 | <head> |
| | 791 | <title>Ara Stream Client</title> |
| | 792 | <style> |
| | 793 | body { font-family: sans-serif; background: #0c0c0e; color: #e1e1e6; margin: 0; padding: 20px; display: flex; flex-direction: column; align-items: center; } |
| | 794 | .dashboard-layout { display: flex; flex-direction: column; gap: 15px; width: 660px; } |
| | 795 | .panel { background: #121216; padding: 12px 15px; border-radius: 6px; border: 1px solid #1f1f24; display: flex; flex-direction: column; gap: 10px; } |
| | 796 | .control-row { display: flex; align-items: center; justify-content: space-between; } |
| | 797 | label { font-size: 12px; font-weight: bold; color: #8f8f9d; text-transform: uppercase; } |
| | 798 | select { background: #0c0c0e; color: #fff; border: 1px solid #04d361; padding: 6px 10px; border-radius: 4px; width: 420px; outline: none; } |
| | 799 | .stats-banner { display: flex; justify-content: space-between; background: #17171f; padding: 10px 15px; border: 1px solid #1f1f24; border-radius: 4px; font-family: monospace; font-size: 13px; color: #8f8f9d; } |
| | 800 | .stats-banner span strong { color: #04d361; } |
| | 801 | .media-container { background: #121216; padding: 8px; border-radius: 6px; border: 1px solid #1f1f24; position: relative; min-height: 480px; display: flex; align-items: center; justify-content: center; } |
| | 802 | img { display: block; border-radius: 4px; width: 100%; height: auto; } |
| | 803 | .overlay { position: absolute; top: 0; left: 0; width: 100%; height: 100%; background: rgba(12,12,14,0.9); display: flex; flex-direction: column; align-items: center; justify-content: center; border-radius: 6px; text-align: center; } |
| | 804 | .prompt-text { color: #04d361; font-weight: bold; font-size: 16px; margin-bottom: 10px; } |
| | 805 | </style> |
| | 806 | <script> |
| | 807 | let streamStarted = {% if active_src %}true{% else %}false{% endif %}; |
| | 808 | |
| | 809 | async function switchConfig() { |
| | 810 | const src = document.getElementById('source-picker').value; |
| | 811 | const mdl = document.getElementById('model-picker').value; |
| | 812 | if(!src) return; |
| | 813 | |
| | 814 | await fetch('/api/swap_config', { |
| | 815 | method: 'POST', |
| | 816 | headers: { 'Content-Type': 'application/json' }, |
| | 817 | body: JSON.stringify({ "source": src, "model": mdl }) |
| | 818 | }); |
| | 819 | |
| | 820 | streamStarted = true; |
| | 821 | document.getElementById('gatekeeper-overlay').style.display = 'none'; |
| | 822 | setTimeout(() => { |
| | 823 | document.getElementById('stream-player').src = '/stream.mjpg'; |
| | 824 | }, 1000); |
| | 825 | } |
| | 826 | |
| | 827 | async function updateStreamMetrics() { |
| | 828 | if (!streamStarted) return; |
| | 829 | try { |
| | 830 | const response = await fetch('/api/stream_info'); |
| | 831 | const data = await response.json(); |
| | 832 | |
| | 833 | document.getElementById('metric-res').innerText = 'Source:' + data.native_w + 'x' + data.native_h + ' Canvas:' + data.width + 'x' + data.height; |
| | 834 | document.getElementById('metric-fps').innerText = data.fps.toFixed(1); |
| | 835 | document.getElementById('metric-dets').innerText = data.detections; |
| | 836 | } catch (err) {} |
| | 837 | } |
| | 838 | setInterval(updateStreamMetrics, 1000); |
| | 839 | </script> |
| | 840 | </head> |
| | 841 | <body> |
| | 842 | <h2>Ara Vision Engine</h2> |
| | 843 | <div class="dashboard-layout"> |
| | 844 | <div class="panel"> |
| | 845 | <div class="control-row"> |
| | 846 | <label for="source-picker">Media Stream Target:</label> |
| | 847 | <select id="source-picker" onchange="switchConfig()">""" + src_html + """</select> |
| | 848 | </div> |
| | 849 | <div class="control-row"> |
| | 850 | <label for="model-picker">NPU Pipeline Model:</label> |
| | 851 | <select id="model-picker" onchange="switchConfig()">""" + mdl_html + """</select> |
| | 852 | </div> |
| | 853 | </div> |
| | 854 | |
| | 855 | <div class="stats-banner"> |
| | 856 | <span id="metric-res">Source:0x0 Canvas:0x0</span> |
| | 857 | <span>NPU Inference: <span id="metric-fps">0.0</span> FPS</span> |
| | 858 | <span>Active Detections: <span id="metric-dets">0</span></span> |
| | 859 | </div> |
| | 860 | |
| | 861 | <div class="media-container"> |
| | 862 | {% if not active_src %} |
| | 863 | <div class="overlay" id="gatekeeper-overlay"> |
| | 864 | <div class="prompt-text">Awaiting Source Context</div> |
| | 865 | <div style="color: #8f8f9d; font-size: 13px; max-width: 400px;">Please select a media path and model from the drop-downs above to mount your pipeline.</div> |
| | 866 | </div> |
| | 867 | {% endif %} |
| | 868 | <img id="stream-player" {% if active_src %}src="/stream.mjpg"{% endif %} style="max-width: """ + str(STATE_REPO["CANVAS_W"]) + """px;" /> |
| | 869 | </div> |
| | 870 | </div> |
| | 871 | </body> |
| | 872 | </html>""" |
| | 873 | return render_template_string(html_template, active_src=src_active) |
| | 874 | |
| | 875 | @app.route('/api/stream_info') |
| | 876 | def stream_info(): |
| | 877 | with lock: |
| | 878 | return jsonify({ |
| | 879 | "native_w": STATE_REPO["native_w"], |
| | 880 | "native_h": STATE_REPO["native_h"], |
| | 881 | "width": STATE_REPO["stream_w"], |
| | 882 | "height": STATE_REPO["stream_h"], |
| | 883 | "fps": STATE_REPO["inference_fps"], |
| | 884 | "detections": len(STATE_REPO["detections"]) |
| | 885 | }) |
| | 886 | |
| | 887 | @app.route('/api/swap_config', methods=['POST']) |
| | 888 | def swap_config(): |
| | 889 | payload = request.get_json() |
| | 890 | src_selected = payload.get("source") |
| | 891 | mdl_selected = payload.get("model") |
| | 892 | |
| | 893 | with lock: |
| | 894 | trigger_restart = False |
| | 895 | if src_selected in STATE_REPO["source_registry"] and STATE_REPO["active_source"] != src_selected: |
| | 896 | STATE_REPO["active_source"] = src_selected |
| | 897 | trigger_restart = True |
| | 898 | if mdl_selected in STATE_REPO["model_registry"] and STATE_REPO["active_model_name"] != mdl_selected: |
| | 899 | base_dir = app.config["MODEL_DIR"] |
| | 900 | STATE_REPO["active_model_name"] = mdl_selected |
| | 901 | STATE_REPO["active_model_path"] = os.path.join(base_dir, mdl_selected, "model.dvm") |
| | 902 | trigger_restart = True |
| | 903 | if trigger_restart: |
| | 904 | STATE_REPO["restart_flag"] = True |
| | 905 | return jsonify({"status": "success"}) |
| | 906 | |
| | 907 | def generate_mjpeg_stream_generator(): |
| | 908 | MODEL_W = float(STATE_REPO["MODEL_W"]) |
| | 909 | MODEL_H = float(STATE_REPO["MODEL_H"]) |
| | 910 | |
| | 911 | while True: |
| | 912 | time.sleep(0.04) |
| | 913 | frame_copy = STATE_REPO["frame"] |
| | 914 | local_dets = list(STATE_REPO["detections"]) |
| | 915 | if frame_copy is not None: |
| | 916 | frame = frame_copy.copy() |
| | 917 | h_native, w_native, _ = frame_copy.shape |
| | 918 | for class_id, confidence, rx1, ry1, rx2, ry2 in local_dets: |
| | 919 | cx1 = int(rx1 * (float(w_native) / MODEL_W)) |
| | 920 | cx2 = int(rx2 * (float(w_native) / MODEL_W)) |
| | 921 | cy1 = int(ry1 * (float(h_native) / MODEL_H)) |
| | 922 | cy2 = int(ry2 * (float(h_native) / MODEL_H)) |
| | 923 | label = f"{COCO_LABELS.get(class_id, f'Class {class_id}')} ({confidence*100:.1f}%)" |
| | 924 | cv2.rectangle(frame, (cx1, cy1), (cx2, cy2), (0, 255, 97), 2) |
| | 925 | cv2.putText(frame, label, (cx1, max(15, cy1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 2) |
| | 926 | _, encoded_img = cv2.imencode(".jpg", frame) |
| | 927 | yield (b'--frame\r\n' |
| | 928 | b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n') |
| | 929 | else: |
| | 930 | waiting_canvas = np.zeros((480, 640, 3), dtype=np.uint8) |
| | 931 | cv2.putText(waiting_canvas, "AWAITING MEDIA INPUT SELECTION...", (140, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 97), 1) |
| | 932 | _, encoded_img = cv2.imencode(".jpg", waiting_canvas) |
| | 933 | yield (b'--frame\r\n' |
| | 934 | b'Content-Type: image/jpeg\r\n\r\n' + encoded_img.tobytes() + b'\r\n') |
| | 935 | |
| | 936 | @app.route('/stream.mjpg') |
| | 937 | def video_feed_stream_route(): |
| | 938 | return Response(generate_mjpeg_stream_generator(), mimetype='multipart/x-mixed-replace; boundary=frame') |
| | 939 | |
| | 940 | def main(): |
| | 941 | parser = argparse.ArgumentParser(description="Wiki Template: Ara Flask Video Engine") |
| | 942 | parser.add_argument("--camera", default=None, help="Camera context device node path") |
| | 943 | parser.add_argument("--mp4", default=None, help="Directory containing target mp4 sample videos") |
| | 944 | parser.add_argument("--port", type=int, default=8080, help="Target port mapping") |
| | 945 | parser.add_argument("--model-dir", default="/usr/share/cnn/detection", help="Directory containing target models") |
| | 946 | parser.add_argument("--model", default="yolov8n", help="Initial model selection") |
| | 947 | args = parser.parse_args() |
| | 948 | |
| | 949 | app.config["MODEL_DIR"] = args.model_dir |
| | 950 | STATE_REPO["source_registry"] = [] |
| | 951 | |
| | 952 | if args.camera and os.path.exists(args.camera): |
| | 953 | STATE_REPO["source_registry"].append(args.camera) |
| | 954 | |
| | 955 | if args.mp4 and os.path.exists(args.mp4): |
| | 956 | local_videos = glob.glob(os.path.join(args.mp4, "*.mp4")) |
| | 957 | for vid in sorted(local_videos): |
| | 958 | STATE_REPO["source_registry"].append(vid) |
| | 959 | |
| | 960 | if os.path.exists(args.model_dir): |
| | 961 | discovered_models = [] |
| | 962 | for entry in sorted(os.listdir(args.model_dir)): |
| | 963 | full_subdir = os.path.join(args.model_dir, entry) |
| | 964 | if os.path.isdir(full_subdir) and os.path.exists(os.path.join(full_subdir, "model.dvm")): |
| | 965 | discovered_models.append(entry) |
| | 966 | if discovered_models: |
| | 967 | STATE_REPO["model_registry"] = discovered_models |
| | 968 | STATE_REPO["active_model_name"] = args.model if args.model in discovered_models else discovered_models[0] |
| | 969 | STATE_REPO["active_model_path"] = os.path.join(args.model_dir, STATE_REPO["active_model_name"], "model.dvm") |
| | 970 | |
| | 971 | threading.Thread(target=gstreamer_orchestration_loop, daemon=True).start() |
| | 972 | |
| | 973 | print(f"Server serving on: http://localhost:{args.port}/") |
| | 974 | app.run(host='0.0.0.0', port=args.port, threaded=True, use_reloader=False, debug=False) |
| | 975 | |
| | 976 | if __name__ == '__main__': |
| | 977 | main() |
| | 978 | EOF |
| | 979 | }}} |
| | 980 | - run the script (vison-webapp.py [--port <portno>] [--camera <camera-dev>] [--mp4 <mp4-dir>] |
| | 981 | {{{#!bash |
| | 982 | uv run vision-webapp.py --camera /dev/video_webcam --mp4 /usr/share/ara2-vision-examples/sample_videos/ |
| | 983 | }}} |
| | 984 | - you can provide a webcam device path to enable streaming from a webcam and/or an mp4 directory to enable processing those. A dropdown will allow you to select the input stream and the model and the browser window will show you detections and statistics |
| | 985 | |