import os, json, requests, time
from collections import Counter
TF = os.environ["TYPEFORM_TOKEN"]
MV = os.environ["MAVERA_API_KEY"]
TF_BASE = "https://api.typeform.com"
MB = "https://app.mavera.io/api/v1"
TF_H = {"Authorization": f"Bearer {TF}"}
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
TYPEFORM_FORM_ID = os.environ.get("CALIBRATION_FORM_ID", "your_form_id")
PERSONA_IDS = os.environ.get("PERSONA_IDS", "").split(",")
SHARED_QUESTIONS = [
"What is the biggest challenge you face in your role today?",
"How would you describe our product to a colleague in one sentence?",
"What nearly stopped you from becoming a customer?",
"Rate your onboarding experience (1-10) and explain why.",
"If you could change one thing about our product, what would it be?",
]
# 1. Pull Typeform responses (assumes form already deployed with these questions)
form = requests.get(f"{TF_BASE}/forms/{TYPEFORM_FORM_ID}", headers=TF_H).json()
field_map = {}
for f in form.get("fields", []):
for q in SHARED_QUESTIONS:
if q.lower()[:30] in f.get("title", "").lower():
field_map[f["id"]] = q
break
responses_raw = []
params = {"page_size": 1000}
while True:
r = requests.get(f"{TF_BASE}/forms/{TYPEFORM_FORM_ID}/responses",
headers=TF_H, params=params)
if r.status_code == 429:
time.sleep(1); continue
r.raise_for_status()
data = r.json()
responses_raw.extend(data.get("items", []))
if len(data.get("items", [])) < 1000: break
params["before"] = data["items"][-1]["token"]
time.sleep(0.6)
# Parse real responses
real_answers = {q: [] for q in SHARED_QUESTIONS}
for resp in responses_raw:
for ans in resp.get("answers", []):
fid = ans.get("field", {}).get("id", "")
if fid in field_map:
text = ans.get("text", "") or str(ans.get("number", ""))
if text.strip():
real_answers[field_map[fid]].append(text.strip())
print(f"Real responses: {len(responses_raw)}")
for q, answers in real_answers.items():
print(f" {q[:50]}... → {len(answers)} answers")
# 2. Run Mavera Focus Group with same questions
if not PERSONA_IDS or PERSONA_IDS == [""]:
p = requests.post(f"{MB}/personas", headers=MV_H, json={
"name": "Calibration Persona",
"description": "General target customer for A/B calibration study.",
}).json()
PERSONA_IDS = [p["id"]]
fg = requests.post(f"{MB}/focus-groups", headers=MV_H, json={
"name": "Calibration: Typeform vs Mavera",
"persona_ids": PERSONA_IDS,
"questions": SHARED_QUESTIONS,
"context": "Answer these questions as you genuinely would. Be specific and honest.",
"responses_per_persona": 5,
}).json()
for _ in range(20):
time.sleep(5)
fg_result = requests.get(f"{MB}/focus-groups/{fg['id']}",
headers=MV_H).json()
if fg_result.get("status") == "completed":
break
# Parse synthetic responses
synthetic_answers = {q: [] for q in SHARED_QUESTIONS}
for resp in fg_result.get("responses", []):
q_text = resp.get("question", "")
for q in SHARED_QUESTIONS:
if q.lower()[:30] in q_text.lower():
synthetic_answers[q].append(resp.get("answer", ""))
break
# 3. Compare with Mave
comparison_prompt = "Compare real human survey responses vs synthetic persona responses.\n\n"
for q in SHARED_QUESTIONS:
real = real_answers.get(q, [])[:10]
synth = synthetic_answers.get(q, [])[:5]
comparison_prompt += f"\n**Question:** {q}\n"
comparison_prompt += f"REAL ({len(real_answers.get(q, []))} total): {'; '.join(r[:100] for r in real[:5])}\n"
comparison_prompt += f"SYNTHETIC ({len(synth)}): {'; '.join(s[:100] for s in synth[:3])}\n"
comparison_prompt += """
Analyze:
1) Theme overlap: What % of real themes appear in synthetic responses?
2) Sentiment alignment: Do synthetic responses match the emotional tone?
3) Specificity gap: Are real responses more/less specific?
4) Blind spots: What did real humans say that synthetics completely missed?
5) Synthetic advantages: Where did synthetics provide insight humans didn't?
6) Calibration score (0-100): How reliable is synthetic for this question set?
7) Recommendations: Which question types are safe to delegate to synthetic?"""
comparison = requests.post(f"{MB}/mave/chat", headers=MV_H,
json={"message": comparison_prompt}).json()
print(f"\n{'='*60}")
print("CALIBRATION REPORT: Typeform (Real) vs Mavera (Synthetic)")
print(f"{'='*60}")
print(comparison.get("content", "")[:3000])