Merge pull request #723 from trinh31201/bugfix/json-extraction-validation

micro-fix(graph): validate LLM JSON extraction to prevent empty/fabricated data
2026-02-01 18:51:51 -08:00
parent 23146c8dae 3ee6d98905
commit a5ae071a03
1 changed files with 11 additions and 1 deletions
@@ -1348,7 +1348,9 @@ Expected output keys: {output_keys}
 LLM Response:
 {raw_response}

-Output ONLY the JSON object, nothing else."""
+Output ONLY the JSON object, nothing else.
+If no valid JSON object exists in the response, output exactly: {{"error": "NO_JSON_FOUND"}}
+Do NOT fabricate data or return empty objects."""

        try:
            result = cleaner_llm.complete(
@@ -1395,6 +1397,14 @@ Output ONLY the JSON object, nothing else."""
                parsed = json.loads(cleaned)
            except json.JSONDecodeError:
                parsed = json.loads(_fix_unescaped_newlines_in_json(cleaned))
+
+            # Validate LLM didn't return empty or fabricated data
+            if parsed.get("error") == "NO_JSON_FOUND":
+                raise ValueError("Cannot parse JSON from response")
+            if not parsed or parsed == {}:
+                raise ValueError("Cannot parse JSON from response")
+            if all(v is None for v in parsed.values()):
+                raise ValueError("Cannot parse JSON from response")
            logger.info("      ✓ LLM cleaned JSON output")
            return parsed