| | 1581 | |
| | 1582 | [=#vlm] |
| | 1583 | === Web based python VLM eIQ example |
| | 1584 | The eIQ AAF Connector can be used to analyze video and images. |
| | 1585 | |
| | 1586 | Here is an example of a headless web-app based off NXP's [https://github.com/nxp-imx-support/vlm-edge-studio/tree/main/src vlm-edge-studio example] using: |
| | 1587 | - Qwen2.5-VL-7B-Instruct-Ara240 |
| | 1588 | - eIQ AAF Connector |
| | 1589 | |
| | 1590 | Example: |
| | 1591 | - if you want some video examples you can download NXP's vlm-edge-studio_1.0.0.deb and extract its data: |
| | 1592 | {{{#!bash |
| | 1593 | # extract data (but don't install the deb) |
| | 1594 | dpkg-deb --vextract vlm-edge-studio_1.0.0.deb / |
| | 1595 | }}} |
| | 1596 | - this installs a number of videos to /usr/share/vlm-edge-studio/assets/videos |
| | 1597 | - The AAF connector requires a lot of DRAM when loading large models (ie the 12GB Qwen2.5-VL-7B-Instruct model) so we will create a swap file to avoid memory issues when loading the model: |
| | 1598 | {{{#!bash |
| | 1599 | swapon --show # shows nothing as not enabled |
| | 1600 | # pre-allocate space for swap file |
| | 1601 | fallocate -l 4G /swapfile |
| | 1602 | # make sure it is accessible by root only |
| | 1603 | chmod 600 /swapfile |
| | 1604 | # format the file as swap |
| | 1605 | mkswap /swapfile |
| | 1606 | # activate the swapfile |
| | 1607 | swapon /swapfile |
| | 1608 | # add it to /etc/fstab so that it mounts on boot |
| | 1609 | echo '/swapfile none swap sw 0 0' >> /etc/fstab |
| | 1610 | }}} |
| | 1611 | - install Qwen2.5-VL-7B-Instruct-Ara240 model |
| | 1612 | {{{#!bash |
| | 1613 | fetch_models --repo-id nxp/Qwen2.5-VL-7B-Instruct-Ara240 # 12GB |
| | 1614 | }}} |
| | 1615 | - To avoid loading models we are not using into the ARA and run into memory issues, disable all models except for Qwen2.5-7B-Instruct in the AAF connectors config file: |
| | 1616 | {{{#!python |
| | 1617 | python3 -c 'import json; p="/usr/share/eiq/aaf-connector/server_config.json"; f=open(p,"r+"); d=json.load(f); [m.update({"enabled": (m.get("name") == "Qwen2.5-VL-7B-Instruct")}) for m in d.get("available_models", [])]; f.seek(0); json.dump(d, f, indent=4); f.truncate()' |
| | 1618 | # restart AAF connector |
| | 1619 | systemctl restart eiq-aaf-connector.service |
| | 1620 | # wait for it to be up and running (as it will take several minutes to load the 12GB Qwen2.5-7B-Instruct to the ARA) |
| | 1621 | until (echo > /dev/tcp/127.0.0.1/8000) >/dev/null 2>&1; do echo -n .; sleep 1; done |
| | 1622 | }}} |
| | 1623 | - create a dir for us to work in and create the python script |
| | 1624 | {{{#!bash |
| | 1625 | mkdir vlm-webapp; cd vlm-webapp |
| | 1626 | cat << \EOF > vlm.py |
| | 1627 | import argparse |
| | 1628 | import os |
| | 1629 | import httpx |
| | 1630 | import uvicorn |
| | 1631 | import json |
| | 1632 | import urllib.request |
| | 1633 | import time |
| | 1634 | from datetime import datetime |
| | 1635 | from fastapi import FastAPI, HTTPException |
| | 1636 | from fastapi.responses import HTMLResponse, StreamingResponse |
| | 1637 | from fastapi.staticfiles import StaticFiles |
| | 1638 | from pydantic import BaseModel |
| | 1639 | from typing import List, Dict |
| | 1640 | |
| | 1641 | # ═══════════════════════════════════════════════════════════════ |
| | 1642 | # Command Line Arguments & Global Constants Configuration |
| | 1643 | # ═══════════════════════════════════════════════════════════════ |
| | 1644 | parser = argparse.ArgumentParser(description="VLM Edge Studio WebApp Bridge") |
| | 1645 | parser.add_argument("--video-dir", required=True, help="Directory path where video MP4 files are hosted") |
| | 1646 | parser.add_argument("--aaf-server", default="http://127.0.0.1:8000", help="AAF Server backend Base URL") |
| | 1647 | parser.add_argument("--host", default="0.0.0.0", help="Host binding address for this web application") |
| | 1648 | parser.add_argument("--port", type=int, default=8080, help="Port binding for this web application") |
| | 1649 | parser.add_argument("--verbose", action="store_true", default=False, help="Enable verbose raw JSON payload terminal dumping") |
| | 1650 | |
| | 1651 | args, _ = parser.parse_known_args() |
| | 1652 | |
| | 1653 | TARGET_MODEL = "Qwen2.5-VL-7B-Instruct" |
| | 1654 | |
| | 1655 | app = FastAPI(title="VLM Video Web Analyzer") |
| | 1656 | |
| | 1657 | if not os.path.isdir(args.video_dir): |
| | 1658 | raise RuntimeError(f"Provided video directory target does not exist: {args.video_dir}") |
| | 1659 | |
| | 1660 | # Mount local streaming static location directly from the primary video-dir configuration |
| | 1661 | app.mount("/stream/videos", StaticFiles(directory=args.video_dir), name="videos") |
| | 1662 | |
| | 1663 | class ChatMessage(BaseModel): |
| | 1664 | role: str |
| | 1665 | content: str |
| | 1666 | |
| | 1667 | class MultiTurnPayload(BaseModel): |
| | 1668 | video_name: str |
| | 1669 | history: List[ChatMessage] |
| | 1670 | |
| | 1671 | def get_timestamp(): |
| | 1672 | return datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] |
| | 1673 | |
| | 1674 | # ═══════════════════════════════════════════════════════════════ |
| | 1675 | # API Engine Intercept Proxy Routes |
| | 1676 | # ═══════════════════════════════════════════════════════════════ |
| | 1677 | |
| | 1678 | @app.get("/api/videos", tags=["Media"]) |
| | 1679 | async def get_available_videos(): |
| | 1680 | try: |
| | 1681 | if not os.path.exists(args.video_dir): |
| | 1682 | return [] |
| | 1683 | files = os.listdir(args.video_dir) |
| | 1684 | valid_extensions = (".mp4", ".mov", ".mkv", ".avi") |
| | 1685 | return [f for f in files if f.lower().endswith(valid_extensions)] |
| | 1686 | except Exception as e: |
| | 1687 | raise HTTPException(status_code=500, detail=str(e)) |
| | 1688 | |
| | 1689 | @app.get("/api/metrics") |
| | 1690 | async def proxy_metrics(): |
| | 1691 | headers = {"Accept": "application/json", "User-Agent": "AAFConnector/1.0"} |
| | 1692 | async with httpx.AsyncClient() as client: |
| | 1693 | try: |
| | 1694 | url = f"{args.aaf_server}/metrics/" |
| | 1695 | response = await client.get(url, params={"model_name": TARGET_MODEL}, headers=headers, timeout=3.0) |
| | 1696 | return response.json() |
| | 1697 | except Exception as e: |
| | 1698 | return { |
| | 1699 | TARGET_MODEL: { |
| | 1700 | "llm_average_token_per_second": 0.0, |
| | 1701 | "llm_first_infer_duration": 0.0, |
| | 1702 | "generated_token_num": 0 |
| | 1703 | } |
| | 1704 | } |
| | 1705 | |
| | 1706 | @app.post("/api/analyze") |
| | 1707 | async def proxy_analysis_stream(payload: MultiTurnPayload): |
| | 1708 | absolute_video_target_path = os.path.join(args.video_dir, payload.video_name) |
| | 1709 | start_time = time.time() |
| | 1710 | |
| | 1711 | if len(payload.history) > 1: |
| | 1712 | flattened_text = "Here is the conversation history context for this execution sequence:\n" |
| | 1713 | for msg in payload.history[:-1]: |
| | 1714 | label = "User Question" if msg.role == "user" else "Your Previous Response" |
| | 1715 | flattened_text += f"[{label}]: {msg.content}\n" |
| | 1716 | flattened_text += f"\n[New Follow-up Question to Answer]: {payload.history[-1].content}" |
| | 1717 | else: |
| | 1718 | flattened_text = payload.history[0].content |
| | 1719 | |
| | 1720 | aaf_payload = { |
| | 1721 | "model": TARGET_MODEL, |
| | 1722 | "stream": True, |
| | 1723 | "messages": [{ |
| | 1724 | "role": "user", |
| | 1725 | "content": [ |
| | 1726 | {"type": "text", "text": flattened_text}, |
| | 1727 | {"type": "video_url", "video_url": {"url": absolute_video_target_path}} |
| | 1728 | ] |
| | 1729 | }] |
| | 1730 | } |
| | 1731 | |
| | 1732 | print("\n" + "═"*70) |
| | 1733 | print(f"[{get_timestamp()}] [CONVERSATIONAL INFERENCE DISPATCH]") |
| | 1734 | print(f" Model ID : {TARGET_MODEL}") |
| | 1735 | print(f" Target Path : {absolute_video_target_path}") |
| | 1736 | print(f" Turn Count : {len(payload.history)} turns processed in session state.") |
| | 1737 | |
| | 1738 | if args.verbose: |
| | 1739 | print("─"*70) |
| | 1740 | print("[RAW OUTGOING JSON PAYLOAD SENT TO AAF SERVER]:") |
| | 1741 | print(json.dumps(aaf_payload, indent=2)) |
| | 1742 | |
| | 1743 | print("═"*70 + "\n") |
| | 1744 | |
| | 1745 | def raw_socket_generator(): |
| | 1746 | target_endpoint = f"{args.aaf_server}/v1/chat/completions" |
| | 1747 | data_bytes = json.dumps(aaf_payload).encode('utf-8') |
| | 1748 | |
| | 1749 | req = urllib.request.Request( |
| | 1750 | target_endpoint, |
| | 1751 | data=data_bytes, |
| | 1752 | headers={ |
| | 1753 | "Content-Type": "application/json", |
| | 1754 | "Accept": "application/json", |
| | 1755 | "User-Agent": "AAFConnector/1.0" |
| | 1756 | }, |
| | 1757 | method="POST" |
| | 1758 | ) |
| | 1759 | |
| | 1760 | first_token_received = False |
| | 1761 | |
| | 1762 | try: |
| | 1763 | with urllib.request.urlopen(req, timeout=300.0) as response: |
| | 1764 | while True: |
| | 1765 | line_bytes = response.readline() |
| | 1766 | if not line_bytes: |
| | 1767 | break |
| | 1768 | |
| | 1769 | line_str = line_bytes.decode('utf-8', errors='ignore') |
| | 1770 | trimmed = line_str.strip() |
| | 1771 | |
| | 1772 | if trimmed: |
| | 1773 | yield f"{trimmed}\n".encode('utf-8') |
| | 1774 | |
| | 1775 | if trimmed.startswith('data: '): |
| | 1776 | data_content = trimmed[5:].strip() |
| | 1777 | if data_content == "[DONE]": |
| | 1778 | continue |
| | 1779 | |
| | 1780 | try: |
| | 1781 | parsed = json.loads(data_content) |
| | 1782 | token = parsed["choices"][0]["delta"].get("content", "") |
| | 1783 | if token: |
| | 1784 | if not first_token_received: |
| | 1785 | ttft_duration = time.time() - start_time |
| | 1786 | print(f"[{get_timestamp()}] [TTFT / DECODE PHASE]: {ttft_duration:.2f}s.") |
| | 1787 | print(f"[{get_timestamp()}] [STREAMING TEXT TOKENS]: ", end="") |
| | 1788 | first_token_received = True |
| | 1789 | |
| | 1790 | print(token, end="", flush=True) |
| | 1791 | except Exception: |
| | 1792 | pass |
| | 1793 | |
| | 1794 | except urllib.error.HTTPError as http_err: |
| | 1795 | err_body = http_err.read().decode('utf-8', errors='ignore') |
| | 1796 | yield f"data: {{\"error\": \"AAF Server Engine error {http_err.code}: {err_body}\"}}\n\n".encode('utf-8') |
| | 1797 | except Exception as e: |
| | 1798 | yield f"data: {{\"error\": \"Direct socket pipeline fault: {str(e)}\"}}\n\n".encode('utf-8') |
| | 1799 | finally: |
| | 1800 | duration = time.time() - start_time |
| | 1801 | print("\n" + "═"*70) |
| | 1802 | print(f"[{get_timestamp()}] [INFERENCE COMPLETED] Turn Runtime: {duration:.2f}s") |
| | 1803 | print("═"*70 + "\n") |
| | 1804 | |
| | 1805 | return StreamingResponse(raw_socket_generator(), media_type="text/event-stream") |
| | 1806 | |
| | 1807 | # ═══════════════════════════════════════════════════════════════ |
| | 1808 | # User Interface (HTML Layer) |
| | 1809 | # ═══════════════════════════════════════════════════════════════ |
| | 1810 | @app.get("/", response_class=HTMLResponse) |
| | 1811 | async def serve_index(): |
| | 1812 | # Enforcing a raw python string (r"") so Python never converts or drops backslashes |
| | 1813 | html_content = r""" |
| | 1814 | <!DOCTYPE html> |
| | 1815 | <html lang="en"> |
| | 1816 | <head> |
| | 1817 | <meta charset="UTF-8"> |
| | 1818 | <title>VLM Edge Studio Analyzer</title> |
| | 1819 | <script src="https://cdn.tailwindcss.com"></script> |
| | 1820 | <style> |
| | 1821 | .skeleton-pulse { |
| | 1822 | background: linear-gradient(-90deg, #1e293b 0%, #334155 50%, #1e293b 100%); |
| | 1823 | background-size: 400% 400%; |
| | 1824 | animation: pulse 1.5s ease-in-out infinite; |
| | 1825 | } |
| | 1826 | @keyframes pulse { |
| | 1827 | 0% { background-position: 100% 50%; } |
| | 1828 | 100% { background-position: 0% 50%; } |
| | 1829 | } |
| | 1830 | </style> |
| | 1831 | </head> |
| | 1832 | <body class="bg-gray-900 text-gray-100 min-h-screen p-6"> |
| | 1833 | <div class="max-w-6xl mx-auto space-y-6"> |
| | 1834 | <header class="border-b border-gray-800 pb-4 flex justify-between items-center"> |
| | 1835 | <div> |
| | 1836 | <h1 class="text-2xl font-bold tracking-wide text-indigo-400">VLM Edge Platform Interface</h1> |
| | 1837 | <p id="metricsPanel" class="text-xs text-gray-400 mt-1 font-mono">Metrics: Waiting for pipeline activity...</p> |
| | 1838 | </div> |
| | 1839 | <div class="flex items-center space-x-3"> |
| | 1840 | <span class="text-xs font-mono bg-gray-800 border border-gray-700 rounded px-2.5 py-1 text-indigo-300">Target Profile: __MODEL_NAME_PLACEHOLDER__</span> |
| | 1841 | <button id="clearChatBtn" class="bg-red-900/40 hover:bg-red-800 border border-red-700 text-red-200 text-xs py-1.5 px-3 rounded transition-colors">Clear Chat History</button> |
| | 1842 | </div> |
| | 1843 | </header> |
| | 1844 | |
| | 1845 | <div class="grid grid-cols-1 lg:grid-cols-3 gap-6"> |
| | 1846 | <div class="lg:col-span-2 space-y-4"> |
| | 1847 | <div class="flex items-center space-x-4"> |
| | 1848 | <label class="font-medium text-sm">Select Stream Source:</label> |
| | 1849 | <select id="videoSelect" class="flex-1 bg-gray-800 border border-gray-700 rounded p-2 focus:outline-none focus:border-indigo-500"></select> |
| | 1850 | </div> |
| | 1851 | <div class="bg-black rounded-lg overflow-hidden aspect-video relative flex items-center justify-center border border-gray-800"> |
| | 1852 | <video id="videoPlayer" controls class="w-full h-full hidden"></video> |
| | 1853 | <div id="videoPlaceholder" class="text-gray-500 text-sm">No Active Video Stream Sample Loaded</div> |
| | 1854 | </div> |
| | 1855 | </div> |
| | 1856 | |
| | 1857 | <div class="flex flex-col h-[480px]"> |
| | 1858 | <div class="bg-gray-800 border border-gray-700 rounded-lg p-4 flex-1 flex flex-col min-h-0 relative overflow-hidden"> |
| | 1859 | <div class="flex justify-between items-center mb-3 flex-none"> |
| | 1860 | <h2 class="text-sm font-semibold tracking-wider text-gray-400 uppercase">Conversational History Log</h2> |
| | 1861 | <div id="busySpinner" class="hidden h-4 w-4 animate-spin rounded-full border-2 border-indigo-500 border-t-transparent"></div> |
| | 1862 | </div> |
| | 1863 | <div id="chatHistoryLog" class="flex-1 space-y-4 text-sm overflow-y-auto bg-gray-900 p-3 rounded border border-gray-750 font-mono min-h-0"> |
| | 1864 | <div class="text-gray-500 text-xs italic">System initialized. Awaiting prompt loop...</div> |
| | 1865 | </div> |
| | 1866 | </div> |
| | 1867 | |
| | 1868 | <div class="space-y-2 mt-4 flex-none"> |
| | 1869 | <textarea id="promptInput" rows="2" class="w-full bg-gray-800 border border-gray-700 rounded-lg p-3 text-sm focus:outline-none focus:border-indigo-500 resize-none placeholder-gray-500" placeholder="Ask a follow-up question..."></textarea> |
| | 1870 | <button id="submitBtn" class="w-full bg-indigo-600 hover:bg-indigo-500 disabled:bg-gray-700 disabled:cursor-not-allowed text-white font-medium py-2.5 px-4 rounded-lg transition-colors flex items-center justify-center space-x-2"> |
| | 1871 | <span id="btnText">Execute Analysis Prompt</span> |
| | 1872 | </button> |
| | 1873 | </div> |
| | 1874 | </div> |
| | 1875 | </div> |
| | 1876 | </div> |
| | 1877 | |
| | 1878 | <script> |
| | 1879 | const videoSelect = document.getElementById('videoSelect'); |
| | 1880 | const videoPlayer = document.getElementById('videoPlayer'); |
| | 1881 | const videoPlaceholder = document.getElementById('videoPlaceholder'); |
| | 1882 | const promptInput = document.getElementById('promptInput'); |
| | 1883 | const submitBtn = document.getElementById('submitBtn'); |
| | 1884 | const btnText = document.getElementById('btnText'); |
| | 1885 | const chatHistoryLog = document.getElementById('chatHistoryLog'); |
| | 1886 | const metricsPanel = document.getElementById('metricsPanel'); |
| | 1887 | const busySpinner = document.getElementById('busySpinner'); |
| | 1888 | const clearChatBtn = document.getElementById('clearChatBtn'); |
| | 1889 | |
| | 1890 | let chatHistoryBuffer = []; |
| | 1891 | |
| | 1892 | async function initializeApp() { |
| | 1893 | try { |
| | 1894 | const videoRes = await fetch('/api/videos'); |
| | 1895 | const videos = await videoRes.json(); |
| | 1896 | videos.forEach(v => videoSelect.add(new Option(v, v))); |
| | 1897 | |
| | 1898 | if(videos.length > 0) handleVideoChange(videos[0]); |
| | 1899 | } catch (e) { |
| | 1900 | chatHistoryLog.innerHTML = `<div class="text-red-400">Initialization Fault: ${e.message}</div>`; |
| | 1901 | } |
| | 1902 | } |
| | 1903 | |
| | 1904 | function appendMessageBlock(role, text, isSkeleton = false) { |
| | 1905 | const block = document.createElement('div'); |
| | 1906 | block.className = `p-2.5 rounded border ${role === 'user' ? 'bg-indigo-950/40 border-indigo-900/60 ml-6' : 'bg-gray-800/60 border-gray-700/50 mr-6'} ${isSkeleton ? 'skeleton-pulse min-h-[40px]' : ''}`; |
| | 1907 | |
| | 1908 | const senderLabel = document.createElement('div'); |
| | 1909 | senderLabel.className = `text-[10px] font-bold uppercase tracking-wider mb-1 ${role === 'user' ? 'text-indigo-400' : 'text-gray-400'}`; |
| | 1910 | senderLabel.textContent = role === 'user' ? '● User Prompt' : '● Model Response'; |
| | 1911 | |
| | 1912 | const contentText = document.createElement('div'); |
| | 1913 | contentText.className = "whitespace-pre-wrap leading-relaxed break-words text-sm font-mono text-gray-100"; |
| | 1914 | if (!isSkeleton) contentText.textContent = text; |
| | 1915 | |
| | 1916 | block.appendChild(senderLabel); |
| | 1917 | block.appendChild(contentText); |
| | 1918 | chatHistoryLog.appendChild(block); |
| | 1919 | chatHistoryLog.scrollTop = chatHistoryLog.scrollHeight; |
| | 1920 | return contentText; |
| | 1921 | } |
| | 1922 | |
| | 1923 | async function updateMetrics(clientLatencySec) { |
| | 1924 | try { |
| | 1925 | const res = await fetch('/api/metrics'); |
| | 1926 | const root = await res.json(); |
| | 1927 | const metrics = Object.values(root)[0]; |
| | 1928 | if (metrics) { |
| | 1929 | const tps = metrics.llm_average_token_per_second?.toFixed(1) || "0.0"; |
| | 1930 | const ttft = metrics.llm_first_infer_duration?.toFixed(2) || "0.00"; |
| | 1931 | const tokens = metrics.generated_token_num || 0; |
| | 1932 | metricsPanel.textContent = `Metrics: ${tps} tok/s • TTFT: ${ttft}s • ${tokens} tokens • Latency: ${clientLatencySec.toFixed(2)}s`; |
| | 1933 | } |
| | 1934 | } catch (e) { |
| | 1935 | console.error(e); |
| | 1936 | } |
| | 1937 | } |
| | 1938 | |
| | 1939 | function handleVideoChange(filename) { |
| | 1940 | resetChatHistory(); |
| | 1941 | if(!filename) { |
| | 1942 | videoPlayer.classList.add('hidden'); |
| | 1943 | videoPlaceholder.classList.remove('hidden'); |
| | 1944 | return; |
| | 1945 | } |
| | 1946 | videoPlaceholder.classList.add('hidden'); |
| | 1947 | videoPlayer.classList.remove('hidden'); |
| | 1948 | videoPlayer.src = `/stream/videos/${encodeURIComponent(filename)}`; |
| | 1949 | videoPlayer.load(); |
| | 1950 | } |
| | 1951 | |
| | 1952 | function resetChatHistory() { |
| | 1953 | chatHistoryBuffer = []; |
| | 1954 | chatHistoryLog.innerHTML = `<div class="text-gray-500 text-xs italic">Conversation wiped. Ready for prompt input...</div>`; |
| | 1955 | promptInput.value = "what is happening in this video?"; |
| | 1956 | } |
| | 1957 | |
| | 1958 | videoSelect.addEventListener('change', (e) => handleVideoChange(e.target.value)); |
| | 1959 | clearChatBtn.addEventListener('click', resetChatHistory); |
| | 1960 | |
| | 1961 | submitBtn.addEventListener('click', async () => { |
| | 1962 | const prompt = promptInput.value.trim(); |
| | 1963 | const video_name = videoSelect.value; |
| | 1964 | |
| | 1965 | if (!prompt || !video_name) return; |
| | 1966 | |
| | 1967 | const clientStartTime = performance.now(); |
| | 1968 | |
| | 1969 | appendMessageBlock('user', prompt); |
| | 1970 | chatHistoryBuffer.push({ role: 'user', content: prompt }); |
| | 1971 | |
| | 1972 | promptInput.value = ""; |
| | 1973 | submitBtn.disabled = true; |
| | 1974 | btnText.textContent = "Processing Inference..."; |
| | 1975 | busySpinner.classList.remove('hidden'); |
| | 1976 | |
| | 1977 | const liveResponseNode = appendMessageBlock('assistant', "Connecting...", true); |
| | 1978 | |
| | 1979 | try { |
| | 1980 | const response = await fetch('/api/analyze', { |
| | 1981 | method: 'POST', |
| | 1982 | headers: { 'Content-Type': 'application/json' }, |
| | 1983 | body: JSON.stringify({ video_name, history: chatHistoryBuffer }) |
| | 1984 | }); |
| | 1985 | |
| | 1986 | if (!response.ok) throw new Error("Server engine pipeline connection fault."); |
| | 1987 | |
| | 1988 | liveResponseNode.parentElement.classList.remove('skeleton-pulse'); |
| | 1989 | liveResponseNode.textContent = ""; |
| | 1990 | |
| | 1991 | const reader = response.body.getReader(); |
| | 1992 | const decoder = new TextDecoder(); |
| | 1993 | let buffer = ""; |
| | 1994 | let fullModelResponse = ""; |
| | 1995 | |
| | 1996 | while (true) { |
| | 1997 | const { value, done } = await reader.read(); |
| | 1998 | if (done) break; |
| | 1999 | |
| | 2000 | buffer += decoder.decode(value, { stream: true }); |
| | 2001 | const lines = buffer.split('\n'); |
| | 2002 | buffer = lines.pop(); |
| | 2003 | |
| | 2004 | for (const line of lines) { |
| | 2005 | const trimmed = line.trim(); |
| | 2006 | |
| | 2007 | if (!trimmed || !trimmed.startsWith('data: ')) continue; |
| | 2008 | |
| | 2009 | const dataStr = trimmed.slice(5).trim(); |
| | 2010 | if (dataStr === '[DONE]') continue; |
| | 2011 | |
| | 2012 | try { |
| | 2013 | const json = JSON.parse(dataStr); |
| | 2014 | if(json.error) { |
| | 2015 | liveResponseNode.textContent += `\n[AAF Error]: ${json.error}`; |
| | 2016 | continue; |
| | 2017 | } |
| | 2018 | |
| | 2019 | const contentToken = json.choices?.[0]?.delta?.content || ""; |
| | 2020 | if (contentToken) { |
| | 2021 | fullModelResponse += contentToken; |
| | 2022 | liveResponseNode.textContent = fullModelResponse; |
| | 2023 | chatHistoryLog.scrollTop = chatHistoryLog.scrollHeight; |
| | 2024 | } |
| | 2025 | } catch(e) {} |
| | 2026 | } |
| | 2027 | } |
| | 2028 | |
| | 2029 | chatHistoryBuffer.push({ role: 'assistant', content: fullModelResponse }); |
| | 2030 | |
| | 2031 | const clientLatencySec = (performance.now() - clientStartTime) / 1000; |
| | 2032 | setTimeout(() => updateMetrics(clientLatencySec), 500); |
| | 2033 | |
| | 2034 | } catch (err) { |
| | 2035 | liveResponseNode.parentElement.classList.remove('skeleton-pulse'); |
| | 2036 | liveResponseNode.textContent = `\n[Pipeline Runtime Exception]: ${err.message}`; |
| | 2037 | } finally { |
| | 2038 | submitBtn.disabled = false; |
| | 2039 | btnText.textContent = "Execute Analysis Prompt"; |
| | 2040 | busySpinner.classList.add('hidden'); |
| | 2041 | } |
| | 2042 | }); |
| | 2043 | |
| | 2044 | initializeApp(); |
| | 2045 | </script> |
| | 2046 | </body> |
| | 2047 | </html> |
| | 2048 | """ |
| | 2049 | return HTMLResponse(content=html_content.replace("__MODEL_NAME_PLACEHOLDER__", TARGET_MODEL)) |
| | 2050 | |
| | 2051 | if __name__ == "__main__": |
| | 2052 | uvicorn.run(app, host=args.host, port=args.port) |
| | 2053 | EOF |
| | 2054 | }}} |
| | 2055 | - create a python virtual env (always a good idea to keep python dependencies containerized) and install python modules we need: |
| | 2056 | {{{#!bash |
| | 2057 | # create a venv (.venv) |
| | 2058 | uv venv |
| | 2059 | # install our scripts dependencies |
| | 2060 | uv pip install httpx uvicorn fastapi argparse |
| | 2061 | }}} |
| | 2062 | - run the app giving it the host interface and port to listen on the URL of the AAF server and the directory of the videos: |
| | 2063 | {{{#!bash |
| | 2064 | # run the app |
| | 2065 | uv run vlm.py --host 0.0.0.0 --port 8080 --video-dir /usr/share/vlm-edge-studio/assets/videos --aaf-server http://127.0.0.1:8000 |
| | 2066 | }}} |
| | 2067 | - Note that the AAF server must have access to the video so if for some reason its running on a different server make sure to handle adjusting the URL that is submitted to analyze |
| | 2068 | - open a browser to the host port 8080, select a video and submit a query |
| | 2069 | |
| | 2070 | |
| | 2071 | |