#!/usr/bin/env python3
"""
Ara NPU Multi-Format Universal Image Decoder
============================================
"""

import ctypes
import os
import sys
import subprocess
import gi

gi.require_version('Gst', '1.0')
from gi.repository import Gst

Gst.init(None)

# Standard COCO Class Mapping for printing human-readable labels
COCO_CLASSES = {
    0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus",
    6: "train", 7: "truck", 8: "boat", 9: "traffic light", 10: "fire hydrant",
    11: "stop sign", 12: "parking meter", 13: "bench", 14: "bird", 15: "cat",
    16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear",
    22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag",
    27: "tie", 28: "suitcase", 29: "frisbee", 30: "skis", 31: "snowboard",
    32: "sports ball", 33: "kite", 34: "baseball bat", 35: "baseball glove",
    36: "skateboard", 37: "surfboard", 38: "tennis racket", 39: "bottle",
    40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl",
    46: "banana", 47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli",
    51: "carrot", 52: "hot dog", 53: "pizza", 54: "donut", 55: "cake",
    56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table",
    61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard",
    67: "cell phone", 68: "microwave", 69: "oven", 70: "toaster", 71: "sink",
    72: "refrigerator", 73: "book", 74: "clock", 75: "vase", 76: "scissors",
    77: "teddy bear", 78: "hair drier", 79: "toothbrush"
}

class AraDetection(ctypes.Structure):
    _layout_ = "ms"
    _pack_ = 1
    _fields_ = [
        ("xmin", ctypes.c_float), ("ymin", ctypes.c_float),
        ("xmax", ctypes.c_float), ("ymax", ctypes.c_float),
        ("confidence", ctypes.c_float), ("class_id", ctypes.c_int32),
        ("class_name_ptr", ctypes.c_void_p)
    ]

def main():
    if len(sys.argv) < 3:
        print(f"Usage: {sys.argv[0]} <input_image> <output_image> [model]")
        sys.exit(1)

    input_image = sys.argv[1]
    output_image = sys.argv[2]
    model = "/usr/share/cnn/detection/yolov8n/model.dvm"
    if len(sys.argv) > 3:
        model = sys.argv[3]

    if not os.path.exists(input_image):
        print(f"ERROR: File '{input_image}' could not be located.")
        sys.exit(1)

    # Fetch native dimensions using ImageMagick
    try:
        dimensions = subprocess.check_output(f"identify -format '%w %h' {input_image}", shell=True).decode().split()
        w_native, h_native = int(dimensions[0]), int(dimensions[1])
    except Exception as e:
        print(f"ERROR: Failed to read image properties using ImageMagick: {e}")
        sys.exit(1)
   
    # Print target properties cleanly
    print(f"\nmodel: {model}")
    print(f"image: {os.path.basename(input_image)} {w_native}x{h_native}")

    MODEL_W, MODEL_H = 640, 640

    pipe_str = (
        f"multifilesrc location={input_image} loop=false num-buffers=2 ! decodebin name=d ! "
        f"videoconvert ! videoscale ! video/x-raw,width={MODEL_W},height={MODEL_H} ! "
        f"videoconvert ! video/x-raw,format=BGRA ! "
        f"dvPre model={model} ! "
        f"dvInf model={model} sock=/var/run/proxy.sock use-shm=true shm-path=/dev/shm/ara_inf_ ! "
        f"dvPost model={model} orig-width={MODEL_W} orig-height={MODEL_H} ! "
        f"appsink name=mysink sync=false async=false emit-signals=true"
    )

    # Before creating the launcher, adjust the system plugin registry ranking 
    # so GStreamer ignores v4l2jpegdec element (as it doesn't support BGRA output)
    registry = Gst.Registry.get()
    feature = registry.lookup_feature("v4l2jpegdec")
    if feature:
        # Lower its rank to ZERO so decodebin skips over it permanently
        feature.set_rank(0)

    pipeline = Gst.parse_launch(pipe_str)
    sink = pipeline.get_by_name("mysink")
    pipeline.set_state(Gst.State.PLAYING)

    last_valid_raw_bytes = None

    while True:
        sample = sink.emit("pull-sample")
        if not sample:
            break
        buffer = sample.get_buffer()
        last_valid_raw_bytes = buffer.extract_dup(0, buffer.get_size())

    pipeline.set_state(Gst.State.NULL)
    
    processed_detections = []

    if last_valid_raw_bytes and len(last_valid_raw_bytes) >= 4:
        num_detections = int.from_bytes(last_valid_raw_bytes[:4], byteorder='little')
        
        if 0 < num_detections < 1000:
            print(f"DETECTIONS LOGGED: FOUND {num_detections} ACTIVE OBJECTS")
            print("-" * 70)
            
            offset = 4
            ds = ctypes.sizeof(AraDetection)
            
            for i in range(num_detections):
                if offset + ds > len(last_valid_raw_bytes): break
                det = AraDetection.from_buffer_copy(last_valid_raw_bytes[offset:offset+ds])
                offset += ds
                
                    # Compute native image coordinate translation mapping
                x1_mapped = det.xmin * (w_native / MODEL_W)
                x2_mapped = det.xmax * (w_native / MODEL_W)
                y1_mapped = det.ymin * (h_native / MODEL_H)
                y2_mapped = det.ymax * (h_native / MODEL_H)
                
                coco_name = COCO_CLASSES.get(det.class_id, "unknown")
                
                print(f"Object {i+1}: ID={det.class_id} | Name={coco_name} | Confidence={det.confidence * 100:.1f}%")
                print(f"          Bounding Box -> [{int(x1_mapped)}, {int(y1_mapped)}] to [{int(x2_mapped)}, {int(y2_mapped)}]")
                print("-" * 70)
                
                processed_detections.append((coco_name, det.confidence, x1_mapped, y1_mapped, x2_mapped, y2_mapped))

    # Render final multi-object annotated canvas
    if processed_detections:
        cmd_args = [f"convert {input_image}"]
        for coco_name, conf, x1, y1, x2, y2 in processed_detections:
            ix1, iy1, ix2, iy2 = int(x1), int(y1), int(x2), int(y2)
            label = f"{coco_name} {conf*100:.1f}%"
            cmd_args.append(f'-stroke green -strokewidth 2 -fill none -draw "rectangle {ix1},{iy1} {ix2},{iy2}"')
            cmd_args.append(f'-stroke none -fill white -pointsize 16 -annotate +{ix1}+{iy1 - 6} "{label}"')
            
        cmd_args.append(output_image)
        draw_cmd = " ".join(cmd_args)
        
        try:
            subprocess.run(draw_cmd, shell=True, check=True)
            print(f"SUCCESS: Mapped all boxes and text labels onto -> '{output_image}'\n")
        except subprocess.CalledProcessError:
            print("ERROR: ImageMagick rendering execution failed.\n")
    else:
        print("INFO: No operational object targets were captured by the NPU context.\n")

if __name__ == '__main__':
    main()
