Exploring AI-ML-NLP: This AI FIXES Its Own Mistakes?! Agentic LLMs & Self-Improving Prompts Explained

Introduction.

In this tutorial, we break down the future of AI assistants by exploring Agentic LLMs and Self-Improving Prompts—two techniques that transform chatbots from passive answer machines into reliable, evidence-backed problem solvers. You’ll learn how Agentic LLMs plan, call tools, and fetch real data, while Self-Improving Prompts add a reflection and repair loop that makes answers safer, more consistent, and audit-ready. From finance to healthcare, discover why these methods are the secret to building trustworthy AI systems for high-stakes, real-world use.
Video Tutorial.

Code.

import os, json, requests, time, math, datetime, textwrap
from typing import Any, Dict, List

# ======================
# Config
# ======================
ENDPOINT = "https://api.groq.com/openai/v1/chat/completions"
MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")  # known-good public model
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "USE YOUR OWN KEYS")
TEMPERATURE = float(os.getenv("GROQ_TEMPERATURE", "0.2"))
TODAY = os.getenv("TODAY", "2025-08-23")

if not GROQ_API_KEY:
    raise SystemExit("Please set GROQ_API_KEY in this shell. Example: export GROQ_API_KEY='YOUR_REAL_KEY'")

def _mask(k: str) -> str:
    return "<EMPTY>" if not k else f"{k[:4]}…{k[-4:]} (len={len(k)})"

print("== GROQ CONFIG ==")
print("Endpoint:", ENDPOINT)
print("Model   :", MODEL)
print("Key     :", _mask(GROQ_API_KEY))

# ======================
# API Request part
# ======================
def _post(payload: dict) -> dict:
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json",
        "Accept": "application/json",
    }
    # IMPORTANT: use data=json.dumps(payload) like your probe
    r = requests.post(ENDPOINT, headers=headers, data=json.dumps(payload), timeout=60)
    if not r.ok:
        print("\n--- LLM API ERROR ---")
        print("Status:", r.status_code)
        try:
            print("Body:", r.json())
        except Exception:
            print("Body:", r.text)
        r.raise_for_status()
    return r.json()

# ======================
# Smoke test (identical pattern to your probe)
# ======================
def smoke_test():
    payload = {
        "model": MODEL,
        "temperature": 0,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Reply with the single word: pong"},
        ],
    }
    resp = _post(payload)
    msg = resp["choices"][0]["message"]["content"].strip()
    print("Smoke test:", msg)
    if "pong" not in msg.lower():
        print("Warning: unexpected smoke test response. Check model/endpoint if issues persist.")

# ======================
# --- Synthetic data
# ======================
CUSTOMERS = {
    "CUST-1001": {
        "customer_id": "CUST-1001",
        "name": "Arjun Mehta",
        "dob": "1990-02-14",
        "risk_tier": "Low",
        "country": "IN",
        "kyc_status": "Verified",
        "account_ids": ["ACCT-111", "ACCT-112"]
    },
    "CUST-2002": {
        "customer_id": "CUST-2002",
        "name": "Priya Nair",
        "dob": "1986-11-02",
        "risk_tier": "Medium",
        "country": "IN",
        "kyc_status": "Verified",
        "account_ids": ["ACCT-221"]
    },
    "CUST-3003": {
        "customer_id": "CUST-3003",
        "name": "Mohammed Rahman",
        "dob": "1978-07-29",
        "risk_tier": "High",
        "country": "IN",
        "kyc_status": "Verified",
        "account_ids": ["ACCT-331"]
    }
}

ACCOUNTS = {
    "ACCT-111": {"account_id": "ACCT-111", "customer_id": "CUST-1001", "type": "debit_card"},
    "ACCT-112": {"account_id": "ACCT-112", "customer_id": "CUST-1001", "type": "savings"},
    "ACCT-221": {"account_id": "ACCT-221", "customer_id": "CUST-2002", "type": "checking"},
    "ACCT-331": {"account_id": "ACCT-331", "customer_id": "CUST-3003", "type": "checking"},
}

TXNS = {
    "ACCT-111": [
        {"ts": "2025-08-22T18:15:00", "amount": 7999, "currency": "INR", "mcc": "5812", "merchant": "Cafe Brew", "lat": 19.119, "lon": 72.846, "country": "IN"},
        {"ts": "2025-08-22T21:05:00", "amount": 108000, "currency": "INR", "mcc": "6011", "merchant": "ATM Withdrawal", "lat": 19.118, "lon": 72.847, "country": "IN"},
        {"ts": "2025-08-23T01:20:00", "amount": 149999, "currency": "INR", "mcc": "4829", "merchant": "Money Transfer", "lat": 28.556, "lon": 77.100, "country": "IN"},
        {"ts": "2025-08-23T01:55:00", "amount": 149900, "currency": "INR", "mcc": "4829", "merchant": "Money Transfer", "lat": 28.556, "lon": 77.100, "country": "IN"},
    ],
    "ACCT-221": [
        {"ts": "2025-08-22T10:05:00", "amount": 9500, "currency": "INR", "mcc": "5411", "merchant": "Grocery World", "lat": 12.971, "lon": 77.594, "country": "IN"},
        {"ts": "2025-08-22T15:25:00", "amount": 9700, "currency": "INR", "mcc": "5411", "merchant": "Grocery World", "lat": 12.971, "lon": 77.594, "country": "IN"},
        {"ts": "2025-08-22T19:05:00", "amount": 9800, "currency": "INR", "mcc": "5411", "merchant": "Grocery World", "lat": 12.971, "lon": 77.594, "country": "IN"},
        {"ts": "2025-08-22T22:35:00", "amount": 9900, "currency": "INR", "mcc": "5411", "merchant": "Grocery World", "lat": 12.971, "lon": 77.594, "country": "IN"},
        {"ts": "2025-08-23T00:05:00", "amount": 10000, "currency": "INR", "mcc": "5411", "merchant": "Grocery World", "lat": 12.971, "lon": 77.594, "country": "IN"},
    ],
    "ACCT-331": [
        {"ts": "2025-08-21T12:00:00", "amount": 400000, "currency": "INR", "mcc": "4829", "merchant": "Wire Transfer", "lat": 25.204, "lon": 55.271, "country": "AE"},
        {"ts": "2025-08-22T09:30:00", "amount": 385000, "currency": "INR", "mcc": "4829", "merchant": "Wire Transfer", "lat": 25.204, "lon": 55.271, "country": "AE"},
        {"ts": "2025-08-23T02:40:00", "amount": 410000, "currency": "INR", "mcc": "4829", "merchant": "Wire Transfer", "lat": 25.204, "lon": 55.271, "country": "AE"},
    ],
}

SANCTIONS = {
    "individuals": [
        {"name": "Mohammed Rahman", "dob": "1978-07-29", "country": "PK"},
        {"name": "Rahul Sharma", "dob": "1982-05-18", "country": "IN"}
    ],
    "entities": []
}

HIGH_RISK_MCC = {"4829", "6011"}
CTR_REPORTING_THRESHOLD = 100000

# ======================
# Utilities
# ======================
def haversine_km(lat1, lon1, lat2, lon2) -> float:
    R = 6371
    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1))*math.cos(math.radians(lat2))*math.sin(dlon/2)**2
    return 2*R*math.asin(math.sqrt(a))

def parse_ts(ts: str) -> datetime.datetime:
    return datetime.datetime.fromisoformat(ts)

def hours_between(a: str, b: str) -> float:
    return abs((parse_ts(b) - parse_ts(a)).total_seconds())/3600.0

def last_geojump_km(txns: List[Dict[str, Any]]) -> float:
    if len(txns) < 2: return 0.0
    last2 = sorted(txns, key=lambda x: x["ts"])[-2:]
    (a, b) = last2
    return haversine_km(a["lat"], a["lon"], b["lat"], b["lon"])

def near_threshold_structuring(txns: List[Dict[str, Any]], threshold: int, window_hours: float = 24.0) -> Dict[str, Any]:
    txns_sorted = sorted(txns, key=lambda x: x["ts"])
    recent = [t for t in txns_sorted if hours_between(t["ts"], f"{TODAY}T00:00:00") <= window_hours]
    near = [t for t in recent if 0.85*threshold <= t["amount"] <= threshold]
    return {"count": len(near), "sum": sum(t["amount"] for t in near), "examples": near[:3]}

def velocity_spend(txns: List[Dict[str, Any]], hours_window: float = 6.0) -> Dict[str, Any]:
    cutoff = parse_ts(f"{TODAY}T00:00:00") - datetime.timedelta(hours=hours_window)
    recent = [t for t in txns if parse_ts(t["ts"]) >= cutoff]
    return {"count": len(recent), "sum": sum(t["amount"] for t in recent)}

def mcc_risk(txns: List[Dict[str, Any]]) -> Dict[str, Any]:
    risky = [t for t in txns if t["mcc"] in HIGH_RISK_MCC]
    return {"risky_count": len(risky), "examples": risky[:3]}

def sanctions_name_match(name: str, dob: str) -> Dict[str, Any]:
    for p in SANCTIONS["individuals"]:
        if p["name"].lower() == name.lower() and p["dob"] == dob:
            return {"hit": True, "record": p}
    return {"hit": False}

# ======================
# Tools (simulated)
# ======================
def tool_get_customer(customer_id: str) -> Dict[str, Any]:
    time.sleep(0.02)
    c = CUSTOMERS.get(customer_id)
    return {"ok": bool(c), "data": c}

def tool_get_accounts(customer_id: str) -> Dict[str, Any]:
    time.sleep(0.02)
    c = CUSTOMERS.get(customer_id)
    if not c: return {"ok": False, "error": "customer not found"}
    return {"ok": True, "data": [ACCOUNTS[aid] for aid in c["account_ids"]]}

def tool_get_transactions(account_id: str, hours: int = 168) -> Dict[str, Any]:
    time.sleep(0.02)
    tx = TXNS.get(account_id, [])
    return {"ok": True, "data": tx}

def tool_compute_risk_signals(account_id: str) -> Dict[str, Any]:
    time.sleep(0.02)
    tx = TXNS.get(account_id, [])
    geo_jump = last_geojump_km(tx)
    vel = velocity_spend(tx, 6.0)
    mcc = mcc_risk(tx)
    struct = near_threshold_structuring(tx, CTR_REPORTING_THRESHOLD, 24.0)
    return {"ok": True, "data": {"geo_jump_km_last2": geo_jump, "velocity_6h": vel, "mcc_risk": mcc, "structuring_24h": struct}}

def tool_check_sanctions(name: str, dob: str) -> Dict[str, Any]:
    time.sleep(0.02)
    return {"ok": True, "data": sanctions_name_match(name, dob)}

# ======================
# LLM call
# ======================
def call_llm(messages: List[Dict[str, str]], tools=None, tool_choice="auto") -> Dict[str, Any]:
    payload = {"model": MODEL, "temperature": TEMPERATURE, "messages": messages}
    if tools is not None:
        payload["tools"] = tools
        if tool_choice is not None:
            payload["tool_choice"] = tool_choice
    return _post(payload)

# ======================
# Tool schemas (function-calling)
# ======================
TOOLS = [
    {"type":"function","function":{"name":"tool_get_customer","description":"Fetch KYC summary by customer_id.","parameters":{"type":"object","properties":{"customer_id":{"type":"string"}},"required":["customer_id"]}}},
    {"type":"function","function":{"name":"tool_get_accounts","description":"List accounts for a customer.","parameters":{"type":"object","properties":{"customer_id":{"type":"string"}},"required":["customer_id"]}}},
    {"type":"function","function":{"name":"tool_get_transactions","description":"Fetch recent transactions for an account.","parameters":{"type":"object","properties":{"account_id":{"type":"string"},"hours":{"type":"integer","default":168}},"required":["account_id"]}}},
    {"type":"function","function":{"name":"tool_compute_risk_signals","description":"Compute velocity, MCC, structuring, and geo-jump features.","parameters":{"type":"object","properties":{"account_id":{"type":"string"}},"required":["account_id"]}}},
    {"type":"function","function":{"name":"tool_check_sanctions","description":"Check simple sanctions/PEP name+dob match.","parameters":{"type":"object","properties":{"name":{"type":"string"},"dob":{"type":"string"}},"required":["name","dob"]}}}
]

# ======================
# System prompts (Resolver & Critic)
# ======================
RESOLVER_SYSTEM = """\
You are FinCrimeResolver v1 — a precise Fraud/AML case triage agent.

OBJECTIVE
- Given an alert describing a suspicious pattern, call tools to fetch KYC, accounts, transactions, risk signals, and sanctions status.
- Produce a structured case disposition with evidence and safe actions.

STRICT OUTPUT SCHEMA (JSON ONLY):
{
  "alert_id": "<string>",
  "customer_id": "<string>",
  "primary_account": "<string>",
  "hypothesis": "<string>",
  "confidence": <float 0..1>,
  "evidence": ["<bullet points>"],
  "signals": {
    "geo_jump_km_last2": <float>,
    "velocity_6h": {"count": <int>, "sum": <float>},
    "mcc_risk": {"risky_count": <int>, "examples": [<tx>]},
    "structuring_24h": {"count": <int>, "sum": <float>, "examples": [<tx>]},
    "sanctions_hit": true/false
  },
  "actions": {
    "immediate": ["<nondestructive steps: contact, soft-block card, VI call, additional auth>"],
    "with_approval": ["<disruptive steps: hard block, law enforcement escalation, SAR filing draft>"]
  },
  "case_notes": "<short narrative for case system>",
  "needs_followup": ["<specific missing information to request>"]
}

MANDATORY BEHAVIOR
- ALWAYS call tools: tool_get_customer, tool_get_accounts, tool_get_transactions (for primary), tool_compute_risk_signals (for primary), and tool_check_sanctions.
- Prefer conservative, reversible actions if confidence < 0.8.
- No hallucinated data; use only tool outputs.
- You are not giving legal advice; decisions must be reviewed by a human analyst.
"""

CRITIC_SYSTEM = """\
You are FinCrimeCritic v1 — strict auditor for triage quality.

Validate the draft JSON using this rubric:
1) All required tools were effectively used (KYC/accounts/txns/signals/sanctions).
2) Evidence references concrete signals (velocity, MCC risk, geo jump, structuring).
3) Actions are SAFE given confidence (disruptive steps only under 'with_approval' if confidence < 0.8).
4) Case notes are clear and minimal.
5) Needs_followup is specific (e.g., confirm travel, verify device, merchant receipts).

OUTPUT (JSON ONLY):
{
  "ok": true/false,
  "findings": ["<specific gap>"],
  "improved_draft": { <corrected JSON per schema> }
}
If destructive actions were listed under 'immediate' with confidence < 0.8, move them to 'with_approval' and justify.
"""

# ======================
# LLM orchestration
# ======================
def tool_router(name: str, args: Dict[str, Any]) -> Dict[str, Any]:
    if name == "tool_get_customer": return tool_get_customer(**args)
    if name == "tool_get_accounts": return tool_get_accounts(**args)
    if name == "tool_get_transactions": return tool_get_transactions(**args)
    if name == "tool_compute_risk_signals": return tool_compute_risk_signals(**args)
    if name == "tool_check_sanctions": return tool_check_sanctions(**args)
    return {"ok": False, "error": f"Unknown tool {name}"}

def safe_json_loads(s: str) -> Any:
    try:
        return json.loads(s)
    except Exception:
        import re
        m = re.search(r"\{.*\}", s, flags=re.DOTALL)
        if m:
            try:
                return json.loads(m.group(0))
            except Exception:
                pass
        return {"_raw": s, "_error": "Could not parse JSON"}

def llm_draft(alert_text: str) -> Dict[str, Any]:
    messages = [
        {"role": "system", "content": RESOLVER_SYSTEM},
        {"role": "user", "content": f"TODAY: {TODAY}\nALERT:\n{alert_text}\nOutput strictly JSON per schema."}
    ]
    # Tool-use loop (OpenAI-compatible)
    while True:
        resp = call_llm(messages, tools=TOOLS, tool_choice="auto")
        msg = resp["choices"][0]["message"]
        tcs = msg.get("tool_calls")
        if tcs:
            for tc in tcs:
                fn = tc["function"]["name"]
                args = json.loads(tc["function"]["arguments"])
                result = tool_router(fn, args)
                # tool message echoes back to the model
                messages.append({
                    "role": "tool",
                    "tool_call_id": tc["id"],
                    "content": json.dumps(result)
                })
            continue
        return safe_json_loads(msg.get("content","").strip())

def llm_critic(draft_json: Dict[str, Any]) -> Dict[str, Any]:
    messages = [
        {"role": "system", "content": CRITIC_SYSTEM},
        {"role": "user", "content": f"Evaluate and fix if needed:\n{json.dumps(draft_json, ensure_ascii=False, indent=2)}"}
    ]
    resp = call_llm(messages)
    return safe_json_loads(resp["choices"][0]["message"]["content"].strip())

# ======================
# Programmatic guardrails
# ======================
def guardrails(final_json: Dict[str, Any]) -> Dict[str, Any]:
    try:
        conf = float(final_json.get("confidence", 0))
        actions = final_json.get("actions", {"immediate": [], "with_approval": []})
        immediate = actions.get("immediate", [])
        with_approval = actions.get("with_approval", [])

        destructive_keywords = ["hard block", "close account", "freeze funds", "law enforcement", "report", "SAR", "FIR", "police"]
        if conf < 0.8:
            keep, move = [], []
            for step in immediate:
                if any(k in step.lower() for k in destructive_keywords):
                    move.append(step)
                else:
                    keep.append(step)
            if move:
                final_json["actions"]["immediate"] = keep
                final_json["actions"]["with_approval"] = list(dict.fromkeys(with_approval + move))
                final_json.setdefault("evidence", []).append(
                    "Moved potentially disruptive steps to 'with_approval' because confidence < 0.8."
                )
    except Exception:
        pass
    return final_json

# ======================
# Demo Cases
# ======================
CASES = [
    {
        "title": "ALRT-901: Sudden geo jump + high-risk money transfers",
        "alert": textwrap.dedent("""
            ALERT_ID: ALRT-901
            CUSTOMER_ID: CUST-1001
            PRIMARY_ACCOUNT: ACCT-111
            CONTEXT: Card used in Mumbai yesterday evening; within ~4 hours, two large money transfers originated from Delhi location.
            SYMPTOMS: High-risk MCC (4829), geo jump > 1000km in short interval; possible account takeover.
        """).strip()
    },
    {
        "title": "ALRT-902: Multiple deposits near CTR threshold (structuring)",
        "alert": textwrap.dedent("""
            ALERT_ID: ALRT-902
            CUSTOMER_ID: CUST-2002
            PRIMARY_ACCOUNT: ACCT-221
            CONTEXT: Repeated cash-like deposits under INR 100,000 clustered in < 24h.
            SYMPTOMS: Pattern suggests potential structuring to avoid reporting thresholds.
        """).strip()
    },
    {
        "title": "ALRT-903: Sanctions name collision false positive?",
        "alert": textwrap.dedent("""
            ALERT_ID: ALRT-903
            CUSTOMER_ID: CUST-3003
            PRIMARY_ACCOUNT: ACCT-331
            CONTEXT: Customer name appears similar to a listed person. Cross-border wires to AE corridor observed.
            SYMPTOMS: Possible PEP/sanctions match; need DOB verification and corridor risk assessment.
        """).strip()
    },
]

# ======================
# Orchestrator
# ======================
def run_case(alert_text: str) -> Dict[str, Any]:
    draft = llm_draft(alert_text)
    critic = llm_critic(draft)
    improved = critic.get("improved_draft", draft)
    final = guardrails(improved)
    return {"draft": draft, "critic": critic, "final": final}

def main():
    smoke_test()
    for case in CASES:
        print("\n" + "="*120)
        print("CASE:", case["title"])
        out = run_case(case["alert"])
        print("\n--- DRAFT -------------------")
        print(json.dumps(out["draft"], ensure_ascii=False, indent=2))
        print("\n--- CRITIC ------------------")
        print(json.dumps(out["critic"], ensure_ascii=False, indent=2))
        print("\n--- FINAL -------------------")
        print(json.dumps(out["final"], ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()
Key References

1. Self-Reflection in LLM Agents
M. Renze and E. Guven, "Self‑Reflection in LLM Agents: Effects on Problem‑Solving Performance," arXiv, May 2024.
2. Self-Refine: Iterative Refinement with Self-Feedback
A. Madaan et al., "Self‑Refine: Iterative Refinement with Self‑Feedback," arXiv, Mar. 2023.
3. Reflexion: Language Agents with Verbal Reinforcement Learning
N. Shinn et al., "Reflexion: Language Agents with Verbal Reinforcement Learning," Oct. 2023.
4. Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution
C. Fernando et al., "Promptbreeder: Self‑Referential Self‑Improvement Via Prompt Evolution," arXiv, Sep. 2023.
5. Agentic Large Language Models: A Survey (Self-Reflection)
“Agentic Large Language Models, a survey,” Leiden University, Mar. 2025.
Exploring AI-ML-NLP

Wednesday, August 27, 2025

This AI FIXES Its Own Mistakes?! Agentic LLMs & Self-Improving Prompts Explained

Introduction.

Video Tutorial.

Code.

Key References

No comments:

Post a Comment

Blog Archive