Skip to content

Utilities

Shared helpers used across processors and examples: IoU for bounding-box overlap, JSON extraction and type coercion for cleaning up local-model output, and confidence clamping to keep scores in the [0, 1] range.

utils

Utility modules for gaze.

compute_iou

compute_iou(
    box1: Sequence[float], box2: Sequence[float]
) -> float

Compute Intersection over Union (IoU) of two bounding boxes.

Parameters:

Name Type Description Default
box1 Sequence[float]

First bounding box as [x1, y1, x2, y2]

required
box2 Sequence[float]

Second bounding box as [x1, y1, x2, y2]

required

Returns:

Type Description
float

IoU score between 0 and 1

Raises:

Type Description
ValueError

If boxes don't have exactly 4 coordinates

Source code in src/gaze/utils/iou.py
@beartype
def compute_iou(box1: Sequence[float], box2: Sequence[float]) -> float:
    """Compute Intersection over Union (IoU) of two bounding boxes.

    Args:
        box1: First bounding box as [x1, y1, x2, y2]
        box2: Second bounding box as [x1, y1, x2, y2]

    Returns:
        IoU score between 0 and 1

    Raises:
        ValueError: If boxes don't have exactly 4 coordinates
    """
    if len(box1) != 4 or len(box2) != 4:
        raise ValueError("Bounding boxes must have exactly 4 coordinates")

    # Normalize coordinates so x1 <= x2, y1 <= y2.
    # VLMs commonly emit coordinates in arbitrary order.
    x1_1, y1_1, x2_1, y2_1 = (
        min(box1[0], box1[2]),
        min(box1[1], box1[3]),
        max(box1[0], box1[2]),
        max(box1[1], box1[3]),
    )
    x1_2, y1_2, x2_2, y2_2 = (
        min(box2[0], box2[2]),
        min(box2[1], box2[3]),
        max(box2[0], box2[2]),
        max(box2[1], box2[3]),
    )

    # Calculate intersection
    x1_i = max(x1_1, x1_2)
    y1_i = max(y1_1, y1_2)
    x2_i = min(x2_1, x2_2)
    y2_i = min(y2_1, y2_2)

    # Check if boxes intersect
    if x2_i <= x1_i or y2_i <= y1_i:
        return 0.0

    intersection_area = (x2_i - x1_i) * (y2_i - y1_i)

    # Calculate union
    area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - intersection_area

    # Avoid division by zero
    if union_area == 0:
        return 0.0

    return intersection_area / union_area

coerce_json_types

coerce_json_types(
    response: dict[str, Any], schema: dict[str, Any]
) -> dict[str, Any]

Coerce response values to match JSON schema types, in place.

Recurses into nested objects and array items to arbitrary depth. The response dict is mutated in place and also returned, so the call can be used fluently: parsed = coerce_json_types(parsed, schema).

Parameters:

Name Type Description Default
response dict[str, Any]

Parsed JSON response dict (mutated in place).

required
schema dict[str, Any]

The response_format dict, raw JSON Schema object, or the nested json_schema.schema sub-dict -- all accepted.

required

Returns:

Type Description
dict[str, Any]

The same response dict, after coercion.

Source code in src/gaze/utils/json_coerce.py
@beartype
def coerce_json_types(response: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
    """Coerce response values to match JSON schema types, in place.

    Recurses into nested objects and array items to arbitrary depth. The
    ``response`` dict is mutated in place and also returned, so the call can be
    used fluently: ``parsed = coerce_json_types(parsed, schema)``.

    Args:
        response: Parsed JSON response dict (mutated in place).
        schema: The ``response_format`` dict, raw JSON Schema object, or
                the nested ``json_schema.schema`` sub-dict -- all accepted.

    Returns:
        The same ``response`` dict, after coercion.
    """
    props = schema
    if "json_schema" in props:
        props = props["json_schema"]
    if "schema" in props:
        props = props["schema"]

    _coerce_dict(response, props)
    return response

extract_json_from_text

extract_json_from_text(text: str) -> dict[str, Any] | None

Extract JSON object from model output text.

Handles common formats: - Markdown code blocks (json ...) - Raw JSON objects - JSON embedded in surrounding text

Uses Python's JSONDecoder.raw_decode() for robust parsing that correctly handles JSON strings containing braces.

Parameters:

Name Type Description Default
text str

Text that may contain a JSON object

required

Returns:

Type Description
dict[str, Any] | None

Parsed JSON dict, or None if no valid JSON found

Source code in src/gaze/utils/json_extract.py
@beartype
def extract_json_from_text(text: str) -> dict[str, Any] | None:
    """Extract JSON object from model output text.

    Handles common formats:
    - Markdown code blocks (```json ... ```)
    - Raw JSON objects
    - JSON embedded in surrounding text

    Uses Python's JSONDecoder.raw_decode() for robust parsing that correctly
    handles JSON strings containing braces.

    Args:
        text: Text that may contain a JSON object

    Returns:
        Parsed JSON dict, or None if no valid JSON found
    """
    text = text.strip()
    if not text:
        return None

    # Handle markdown code block
    if text.startswith("```"):
        first_newline = text.find("\n")
        if first_newline == -1:
            return None
        closing = text.rfind("```")
        if closing <= first_newline:
            return None
        text = text[first_newline + 1 : closing].strip()

    # Try to parse directly first (most common case)
    try:
        result = json.loads(text)
        if isinstance(result, dict):
            return cast("dict[str, Any]", result)
        return None
    except json.JSONDecodeError:
        pass

    # Use raw_decode to find JSON object - this correctly handles strings with braces
    decoder = json.JSONDecoder()
    # Find all potential JSON start positions
    for i, c in enumerate(text):
        if c != "{":
            continue
        try:
            result, _ = decoder.raw_decode(text, i)
            if isinstance(result, dict):
                return cast("dict[str, Any]", result)
        except json.JSONDecodeError:
            continue

    # Last resort: try repairing truncated JSON by closing unclosed brackets.
    # Local models frequently get cut off mid-generation, producing parseable
    # fragments that just need closing delimiters.
    return _try_repair_truncated(text)

clamp_confidence

clamp_confidence(value: object) -> float | None

Clamp a confidence value to [0.0, 1.0].

Local models often emit confidence on non-standard scales (e.g. 0-100 or 0-5) or as word labels (e.g. "high", "medium"). Rather than rejecting these as invalid, we normalize to the expected range so the rest of the pipeline can proceed.

Returns None for boolean, NaN, or infinite inputs.

Source code in src/gaze/utils/__init__.py
@beartype
def clamp_confidence(value: object) -> float | None:
    """Clamp a confidence value to [0.0, 1.0].

    Local models often emit confidence on non-standard scales (e.g. 0-100
    or 0-5) or as word labels (e.g. "high", "medium").  Rather than
    rejecting these as invalid, we normalize to the expected range so
    the rest of the pipeline can proceed.

    Returns None for boolean, NaN, or infinite inputs.
    """
    if isinstance(value, bool):
        return None
    # Handle word labels from local models (e.g. "high", "medium")
    if isinstance(value, str):
        label = value.strip().lower()
        if label in _CONFIDENCE_WORD_MAP:
            return _CONFIDENCE_WORD_MAP[label]
        # Try parsing as a numeric string (e.g. "0.85")
        try:
            f = float(label)
        except (ValueError, OverflowError):
            return None
        if math.isnan(f) or math.isinf(f):
            return None
        return max(0.0, min(1.0, f))
    if not isinstance(value, int | float):
        return None
    f = float(value)
    if math.isnan(f) or math.isinf(f):
        return None
    return max(0.0, min(1.0, f))