Context Navigation

Changes between Version 11 and Version 12 of expansion/gw16168

Timestamp:: 06/18/2026 12:10:27 AM (38 hours ago)
Author:: Tim Harvey
Comment:: added VLM webapp example

Legend:

: Unmodified
: Added
: Removed
: Modified

expansion/gw16168

-              v11
+              v12
 }}}
+[=#vlm]
+=== Web based python VLM eIQ example
+The eIQ AAF Connector can be used to analyze video and images.
+Here is an example of a headless web-app based off NXP's [https://github.com/nxp-imx-support/vlm-edge-studio/tree/main/src vlm-edge-studio example] using:
+ - Qwen2.5-VL-7B-Instruct-Ara240
+ - eIQ AAF Connector
+Example:
+ - if you want some video examples you can download NXP's vlm-edge-studio_1.0.0.deb and extract its data:
+{{{#!bash
+# extract data (but don't install the deb)
+dpkg-deb --vextract vlm-edge-studio_1.0.0.deb /
+}}}
+  - this installs a number of videos to /usr/share/vlm-edge-studio/assets/videos
+ - The AAF connector requires a lot of DRAM when loading large models (ie the 12GB Qwen2.5-VL-7B-Instruct model) so we will create a swap file to avoid memory issues when loading the model:
+{{{#!bash
+swapon --show # shows nothing as not enabled
+# pre-allocate space for swap file
+fallocate -l 4G /swapfile
+# make sure it is accessible by root only
+chmod 600 /swapfile
+# format the file as swap
+mkswap /swapfile
+# activate the swapfile
+swapon /swapfile
+# add it to /etc/fstab so that it mounts on boot
+echo '/swapfile none swap sw 0 0' >> /etc/fstab
+}}}
+ - install Qwen2.5-VL-7B-Instruct-Ara240 model
+{{{#!bash
+fetch_models --repo-id nxp/Qwen2.5-VL-7B-Instruct-Ara240 # 12GB
+}}}
+ - To avoid loading models we are not using into the ARA and run into memory issues, disable all models except for Qwen2.5-7B-Instruct in the AAF connectors config file:
+{{{#!python
+python3 -c 'import json; p="/usr/share/eiq/aaf-connector/server_config.json"; f=open(p,"r+"); d=json.load(f); [m.update({"enabled": (m.get("name") == "Qwen2.5-VL-7B-Instruct")}) for m in d.get("available_models", [])]; f.seek(0); json.dump(d, f, indent=4); f.truncate()'
+# restart AAF connector
+systemctl restart eiq-aaf-connector.service
+# wait for it to be up and running (as it will take several minutes to load the 12GB Qwen2.5-7B-Instruct to the ARA)
+until (echo > /dev/tcp/127.0.0.1/8000) >/dev/null 2>&1; do echo -n .; sleep 1; done
+}}}
+ - create a dir for us to work in and create the python script
+{{{#!bash
+mkdir vlm-webapp; cd vlm-webapp
+cat << \EOF > vlm.py
+import argparse
+import os
+import httpx
+import uvicorn
+import json
+import urllib.request
+import time
+from datetime import datetime
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from typing import List, Dict
+# ═══════════════════════════════════════════════════════════════
+# Command Line Arguments & Global Constants Configuration
+# ═══════════════════════════════════════════════════════════════
+parser = argparse.ArgumentParser(description="VLM Edge Studio WebApp Bridge")
+parser.add_argument("--video-dir", required=True, help="Directory path where video MP4 files are hosted")
+parser.add_argument("--aaf-server", default="http://127.0.0.1:8000", help="AAF Server backend Base URL")
+parser.add_argument("--host", default="0.0.0.0", help="Host binding address for this web application")
+parser.add_argument("--port", type=int, default=8080, help="Port binding for this web application")
+parser.add_argument("--verbose", action="store_true", default=False, help="Enable verbose raw JSON payload terminal dumping")
+args, _ = parser.parse_known_args()
+TARGET_MODEL = "Qwen2.5-VL-7B-Instruct"
+app = FastAPI(title="VLM Video Web Analyzer")
+if not os.path.isdir(args.video_dir):
+    raise RuntimeError(f"Provided video directory target does not exist: {args.video_dir}")
+# Mount local streaming static location directly from the primary video-dir configuration
+app.mount("/stream/videos", StaticFiles(directory=args.video_dir), name="videos")
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class MultiTurnPayload(BaseModel):
+    video_name: str
+    history: List[ChatMessage]
+def get_timestamp():
+    return datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+# ═══════════════════════════════════════════════════════════════
+# API Engine Intercept Proxy Routes
+# ═══════════════════════════════════════════════════════════════
+@app.get("/api/videos", tags=["Media"])
+async def get_available_videos():
+    try:
+        if not os.path.exists(args.video_dir):
+            return []
+        files = os.listdir(args.video_dir)
+        valid_extensions = (".mp4", ".mov", ".mkv", ".avi")
+        return [f for f in files if f.lower().endswith(valid_extensions)]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/metrics")
+async def proxy_metrics():
+    headers = {"Accept": "application/json", "User-Agent": "AAFConnector/1.0"}
+    async with httpx.AsyncClient() as client:
+        try:
+            url = f"{args.aaf_server}/metrics/"
+            response = await client.get(url, params={"model_name": TARGET_MODEL}, headers=headers, timeout=3.0)
+            return response.json()
+        except Exception as e:
+            return {
+                TARGET_MODEL: {
+                    "llm_average_token_per_second": 0.0,
+                    "llm_first_infer_duration": 0.0,
+                    "generated_token_num": 0
+                }
+            }
+@app.post("/api/analyze")
+async def proxy_analysis_stream(payload: MultiTurnPayload):
+    absolute_video_target_path = os.path.join(args.video_dir, payload.video_name)
+    start_time = time.time()
+    if len(payload.history) > 1:
+        flattened_text = "Here is the conversation history context for this execution sequence:\n"
+        for msg in payload.history[:-1]:
+            label = "User Question" if msg.role == "user" else "Your Previous Response"
+            flattened_text += f"[{label}]: {msg.content}\n"
+        flattened_text += f"\n[New Follow-up Question to Answer]: {payload.history[-1].content}"
+    else:
+        flattened_text = payload.history[0].content
+    aaf_payload = {
+        "model": TARGET_MODEL,
+        "stream": True,
+        "messages": [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": flattened_text},
+                {"type": "video_url", "video_url": {"url": absolute_video_target_path}}
+            ]
+        }]
+    }
+    print("\n" + "═"*70)
+    print(f"[{get_timestamp()}] [CONVERSATIONAL INFERENCE DISPATCH]")
+    print(f"  Model ID      : {TARGET_MODEL}")
+    print(f"  Target Path   : {absolute_video_target_path}")
+    print(f"  Turn Count    : {len(payload.history)} turns processed in session state.")
+    if args.verbose:
+        print("─"*70)
+        print("[RAW OUTGOING JSON PAYLOAD SENT TO AAF SERVER]:")
+        print(json.dumps(aaf_payload, indent=2))
+    print("═"*70 + "\n")
+    def raw_socket_generator():
+        target_endpoint = f"{args.aaf_server}/v1/chat/completions"
+        data_bytes = json.dumps(aaf_payload).encode('utf-8')
+        req = urllib.request.Request(
+            target_endpoint,
+            data=data_bytes,
+            headers={
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+                "User-Agent": "AAFConnector/1.0"
+            },
+            method="POST"
+        )
+        first_token_received = False
+        try:
+            with urllib.request.urlopen(req, timeout=300.0) as response:
+                while True:
+                    line_bytes = response.readline()
+                    if not line_bytes:
+                        break
+                    line_str = line_bytes.decode('utf-8', errors='ignore')
+                    trimmed = line_str.strip()
+                    if trimmed:
+                        yield f"{trimmed}\n".encode('utf-8')
+                        if trimmed.startswith('data: '):
+                            data_content = trimmed[5:].strip()
+                            if data_content == "[DONE]":
+                                continue
+                            try:
+                                parsed = json.loads(data_content)
+                                token = parsed["choices"][0]["delta"].get("content", "")
+                                if token:
+                                    if not first_token_received:
+                                        ttft_duration = time.time() - start_time
+                                        print(f"[{get_timestamp()}] [TTFT / DECODE PHASE]: {ttft_duration:.2f}s.")
+                                        print(f"[{get_timestamp()}] [STREAMING TEXT TOKENS]: ", end="")
+                                        first_token_received = True
+                                    print(token, end="", flush=True)
+                            except Exception:
+                                pass
+        except urllib.error.HTTPError as http_err:
+            err_body = http_err.read().decode('utf-8', errors='ignore')
+            yield f"data: {{\"error\": \"AAF Server Engine error {http_err.code}: {err_body}\"}}\n\n".encode('utf-8')
+        except Exception as e:
+            yield f"data: {{\"error\": \"Direct socket pipeline fault: {str(e)}\"}}\n\n".encode('utf-8')
+        finally:
+            duration = time.time() - start_time
+            print("\n" + "═"*70)
+            print(f"[{get_timestamp()}] [INFERENCE COMPLETED] Turn Runtime: {duration:.2f}s")
+            print("═"*70 + "\n")
+    return StreamingResponse(raw_socket_generator(), media_type="text/event-stream")
+# ═══════════════════════════════════════════════════════════════
+# User Interface (HTML Layer)
+# ═══════════════════════════════════════════════════════════════
+@app.get("/", response_class=HTMLResponse)
+async def serve_index():
+    # Enforcing a raw python string (r"") so Python never converts or drops backslashes
+    html_content = r"""
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>VLM Edge Studio Analyzer</title>
+        <script src="https://cdn.tailwindcss.com"></script>
+        <style>
+            .skeleton-pulse {
+                background: linear-gradient(-90deg, #1e293b 0%, #334155 50%, #1e293b 100%);
+                background-size: 400% 400%;
+                animation: pulse 1.5s ease-in-out infinite;
+            }
+            @keyframes pulse {
+% { background-position: 100% 50%; }
+% { background-position: 0% 50%; }
+            }
+        </style>
+    </head>
+    <body class="bg-gray-900 text-gray-100 min-h-screen p-6">
+        <div class="max-w-6xl mx-auto space-y-6">
+            <header class="border-b border-gray-800 pb-4 flex justify-between items-center">
+                <div>
+                    <h1 class="text-2xl font-bold tracking-wide text-indigo-400">VLM Edge Platform Interface</h1>
+                    <p id="metricsPanel" class="text-xs text-gray-400 mt-1 font-mono">Metrics: Waiting for pipeline activity...</p>
+                </div>
+                <div class="flex items-center space-x-3">
+                    <span class="text-xs font-mono bg-gray-800 border border-gray-700 rounded px-2.5 py-1 text-indigo-300">Target Profile: __MODEL_NAME_PLACEHOLDER__</span>
+                    <button id="clearChatBtn" class="bg-red-900/40 hover:bg-red-800 border border-red-700 text-red-200 text-xs py-1.5 px-3 rounded transition-colors">Clear Chat History</button>
+                </div>
+            </header>
+            <div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
+                <div class="lg:col-span-2 space-y-4">
+                    <div class="flex items-center space-x-4">
+                        <label class="font-medium text-sm">Select Stream Source:</label>
+                        <select id="videoSelect" class="flex-1 bg-gray-800 border border-gray-700 rounded p-2 focus:outline-none focus:border-indigo-500"></select>
+                    </div>
+                    <div class="bg-black rounded-lg overflow-hidden aspect-video relative flex items-center justify-center border border-gray-800">
+                        <video id="videoPlayer" controls class="w-full h-full hidden"></video>
+                        <div id="videoPlaceholder" class="text-gray-500 text-sm">No Active Video Stream Sample Loaded</div>
+                    </div>
+                </div>
+                <div class="flex flex-col h-[480px]">
+                    <div class="bg-gray-800 border border-gray-700 rounded-lg p-4 flex-1 flex flex-col min-h-0 relative overflow-hidden">
+                        <div class="flex justify-between items-center mb-3 flex-none">
+                            <h2 class="text-sm font-semibold tracking-wider text-gray-400 uppercase">Conversational History Log</h2>
+                            <div id="busySpinner" class="hidden h-4 w-4 animate-spin rounded-full border-2 border-indigo-500 border-t-transparent"></div>
+                        </div>
+                        <div id="chatHistoryLog" class="flex-1 space-y-4 text-sm overflow-y-auto bg-gray-900 p-3 rounded border border-gray-750 font-mono min-h-0">
+                            <div class="text-gray-500 text-xs italic">System initialized. Awaiting prompt loop...</div>
+                        </div>
+                    </div>
+                    <div class="space-y-2 mt-4 flex-none">
+                        <textarea id="promptInput" rows="2" class="w-full bg-gray-800 border border-gray-700 rounded-lg p-3 text-sm focus:outline-none focus:border-indigo-500 resize-none placeholder-gray-500" placeholder="Ask a follow-up question..."></textarea>
+                        <button id="submitBtn" class="w-full bg-indigo-600 hover:bg-indigo-500 disabled:bg-gray-700 disabled:cursor-not-allowed text-white font-medium py-2.5 px-4 rounded-lg transition-colors flex items-center justify-center space-x-2">
+                            <span id="btnText">Execute Analysis Prompt</span>
+                        </button>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <script>
+            const videoSelect = document.getElementById('videoSelect');
+            const videoPlayer = document.getElementById('videoPlayer');
+            const videoPlaceholder = document.getElementById('videoPlaceholder');
+            const promptInput = document.getElementById('promptInput');
+            const submitBtn = document.getElementById('submitBtn');
+            const btnText = document.getElementById('btnText');
+            const chatHistoryLog = document.getElementById('chatHistoryLog');
+            const metricsPanel = document.getElementById('metricsPanel');
+            const busySpinner = document.getElementById('busySpinner');
+            const clearChatBtn = document.getElementById('clearChatBtn');
+            let chatHistoryBuffer = [];
+            async function initializeApp() {
+                try {
+                    const videoRes = await fetch('/api/videos');
+                    const videos = await videoRes.json();
+                    videos.forEach(v => videoSelect.add(new Option(v, v)));
+                    if(videos.length > 0) handleVideoChange(videos[0]);
+                } catch (e) {
+                    chatHistoryLog.innerHTML = `<div class="text-red-400">Initialization Fault: ${e.message}</div>`;
+                }
+            }
+            function appendMessageBlock(role, text, isSkeleton = false) {
+                const block = document.createElement('div');
+                block.className = `p-2.5 rounded border ${role === 'user' ? 'bg-indigo-950/40 border-indigo-900/60 ml-6' : 'bg-gray-800/60 border-gray-700/50 mr-6'} ${isSkeleton ? 'skeleton-pulse min-h-[40px]' : ''}`;
+                const senderLabel = document.createElement('div');
+                senderLabel.className = `text-[10px] font-bold uppercase tracking-wider mb-1 ${role === 'user' ? 'text-indigo-400' : 'text-gray-400'}`;
+                senderLabel.textContent = role === 'user' ? '● User Prompt' : '● Model Response';
+                const contentText = document.createElement('div');
+                contentText.className = "whitespace-pre-wrap leading-relaxed break-words text-sm font-mono text-gray-100";
+                if (!isSkeleton) contentText.textContent = text;
+                block.appendChild(senderLabel);
+                block.appendChild(contentText);
+                chatHistoryLog.appendChild(block);
+                chatHistoryLog.scrollTop = chatHistoryLog.scrollHeight;
+                return contentText;
+            }
+            async function updateMetrics(clientLatencySec) {
+                try {
+                    const res = await fetch('/api/metrics');
+                    const root = await res.json();
+                    const metrics = Object.values(root)[0];
+                    if (metrics) {
+                        const tps = metrics.llm_average_token_per_second?.toFixed(1) || "0.0";
+                        const ttft = metrics.llm_first_infer_duration?.toFixed(2) || "0.00";
+                        const tokens = metrics.generated_token_num || 0;
+                        metricsPanel.textContent = `Metrics: ${tps} tok/s • TTFT: ${ttft}s • ${tokens} tokens • Latency: ${clientLatencySec.toFixed(2)}s`;
+                    }
+                } catch (e) {
+                    console.error(e);
+                }
+            }
+            function handleVideoChange(filename) {
+                resetChatHistory();
+                if(!filename) {
+                    videoPlayer.classList.add('hidden');
+                    videoPlaceholder.classList.remove('hidden');
+                    return;
+                }
+                videoPlaceholder.classList.add('hidden');
+                videoPlayer.classList.remove('hidden');
+                videoPlayer.src = `/stream/videos/${encodeURIComponent(filename)}`;
+                videoPlayer.load();
+            }
+            function resetChatHistory() {
+                chatHistoryBuffer = [];
+                chatHistoryLog.innerHTML = `<div class="text-gray-500 text-xs italic">Conversation wiped. Ready for prompt input...</div>`;
+                promptInput.value = "what is happening in this video?";
+            }
+            videoSelect.addEventListener('change', (e) => handleVideoChange(e.target.value));
+            clearChatBtn.addEventListener('click', resetChatHistory);
+            submitBtn.addEventListener('click', async () => {
+                const prompt = promptInput.value.trim();
+                const video_name = videoSelect.value;
+                if (!prompt || !video_name) return;
+                const clientStartTime = performance.now();
+                appendMessageBlock('user', prompt);
+                chatHistoryBuffer.push({ role: 'user', content: prompt });
+                promptInput.value = "";
+                submitBtn.disabled = true;
+                btnText.textContent = "Processing Inference...";
+                busySpinner.classList.remove('hidden');
+                const liveResponseNode = appendMessageBlock('assistant', "Connecting...", true);
+                try {
+                    const response = await fetch('/api/analyze', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ video_name, history: chatHistoryBuffer })
+                    });
+                    if (!response.ok) throw new Error("Server engine pipeline connection fault.");
+                    liveResponseNode.parentElement.classList.remove('skeleton-pulse');
+                    liveResponseNode.textContent = "";
+                    const reader = response.body.getReader();
+                    const decoder = new TextDecoder();
+                    let buffer = "";
+                    let fullModelResponse = "";
+                    while (true) {
+                        const { value, done } = await reader.read();
+                        if (done) break;
+                        buffer += decoder.decode(value, { stream: true });
+                        const lines = buffer.split('\n');
+                        buffer = lines.pop();
+                        for (const line of lines) {
+                            const trimmed = line.trim();
+                            if (!trimmed || !trimmed.startsWith('data: ')) continue;
+                            const dataStr = trimmed.slice(5).trim();
+                            if (dataStr === '[DONE]') continue;
+                            try {
+                                const json = JSON.parse(dataStr);
+                                if(json.error) {
+                                    liveResponseNode.textContent += `\n[AAF Error]: ${json.error}`;
+                                    continue;
+                                }
+                                const contentToken = json.choices?.[0]?.delta?.content || "";
+                                if (contentToken) {
+                                    fullModelResponse += contentToken;
+                                    liveResponseNode.textContent = fullModelResponse;
+                                    chatHistoryLog.scrollTop = chatHistoryLog.scrollHeight;
+                                }
+                            } catch(e) {}
+                        }
+                    }
+                    chatHistoryBuffer.push({ role: 'assistant', content: fullModelResponse });
+                    const clientLatencySec = (performance.now() - clientStartTime) / 1000;
+                    setTimeout(() => updateMetrics(clientLatencySec), 500);
+                } catch (err) {
+                    liveResponseNode.parentElement.classList.remove('skeleton-pulse');
+                    liveResponseNode.textContent = `\n[Pipeline Runtime Exception]: ${err.message}`;
+                } finally {
+                    submitBtn.disabled = false;
+                    btnText.textContent = "Execute Analysis Prompt";
+                    busySpinner.classList.add('hidden');
+                }
+            });
+            initializeApp();
+        </script>
+    </body>
+    </html>
+    """
+    return HTMLResponse(content=html_content.replace("__MODEL_NAME_PLACEHOLDER__", TARGET_MODEL))
+if __name__ == "__main__":
+    uvicorn.run(app, host=args.host, port=args.port)
+EOF
+}}}
+ - create a python virtual env (always a good idea to keep python dependencies containerized) and install python modules we need:
+{{{#!bash
+# create a venv (.venv)
+uv venv
+# install our scripts dependencies
+uv pip install httpx uvicorn fastapi argparse
+}}}
+ - run the app giving it the host interface and port to listen on the URL of the AAF server and the directory of the videos:
+{{{#!bash
+# run the app
+uv run vlm.py  --host 0.0.0.0 --port 8080 --video-dir /usr/share/vlm-edge-studio/assets/videos --aaf-server http://127.0.0.1:8000
+}}}
+  - Note that the AAF server must have access to the video so if for some reason its running on a different server make sure to handle adjusting the URL that is submitted to analyze
+ - open a browser to the host port 8080, select a video and submit a query
 == Troubleshooting