import os, requests, time, base64
from collections import defaultdict
MP_SA = os.environ["MIXPANEL_SERVICE_ACCOUNT"]
MP_SECRET = os.environ["MIXPANEL_SECRET"]
MP_PROJECT = os.environ["MIXPANEL_PROJECT_ID"]
MV = os.environ["MAVERA_API_KEY"]
MB = "https://app.mavera.io/api/v1"
MH = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
mp_auth = (MP_SA, MP_SECRET)
profiles = []
page = 0
session_id = None
while True:
payload = {
"project_id": MP_PROJECT,
"filter_by_cohort": "",
"output_properties": [
"$email", "$name", "$city", "$country_code",
"plan", "company_size", "role", "signup_source",
"total_events", "last_seen", "sessions_count",
],
"page": page,
}
if session_id:
payload["session_id"] = session_id
r = requests.post(
"https://mixpanel.com/api/query/engage",
auth=mp_auth,
json=payload,
)
if r.status_code == 429:
time.sleep(60)
continue
r.raise_for_status()
data = r.json()
session_id = data.get("session_id")
results = data.get("results", [])
profiles.extend(results)
if len(results) < 1000 or len(profiles) >= 5000:
break
page += 1
time.sleep(1)
print(f"Fetched {len(profiles)} profiles")
clusters = {"power_user": [], "regular": [], "casual": [], "dormant": []}
for p in profiles:
props = p.get("$properties", {})
events = int(props.get("total_events", 0) or 0)
sessions = int(props.get("sessions_count", 0) or 0)
if events > 500 and sessions > 50:
clusters["power_user"].append(props)
elif events > 100 and sessions > 10:
clusters["regular"].append(props)
elif events > 10:
clusters["casual"].append(props)
else:
clusters["dormant"].append(props)
def cluster_profile(users):
roles = defaultdict(int)
plans = defaultdict(int)
sizes = defaultdict(int)
sources = defaultdict(int)
for u in users:
if u.get("role"): roles[u["role"]] += 1
if u.get("plan"): plans[u["plan"]] += 1
if u.get("company_size"): sizes[u["company_size"]] += 1
if u.get("signup_source"): sources[u["signup_source"]] += 1
top = lambda d, n=3: sorted(d, key=d.get, reverse=True)[:n]
avg_events = sum(int(u.get("total_events", 0) or 0) for u in users) / max(len(users), 1)
avg_sessions = sum(int(u.get("sessions_count", 0) or 0) for u in users) / max(len(users), 1)
return {
"n": len(users), "avg_events": avg_events, "avg_sessions": avg_sessions,
"top_roles": top(roles), "top_plans": top(plans),
"top_sizes": top(sizes), "top_sources": top(sources),
}
created = []
for cluster_name, users in clusters.items():
if not users:
continue
prof = cluster_profile(users)
label = cluster_name.replace("_", " ").title()
persona = requests.post(f"{MB}/personas", headers=MH, json={
"name": f"Mixpanel: {label}",
"description": (
f"{label} segment from Mixpanel ({prof['n']} users). "
f"Avg events: {prof['avg_events']:.0f}, Avg sessions: {prof['avg_sessions']:.0f}. "
f"Roles: {', '.join(prof['top_roles'])}. "
f"Plans: {', '.join(prof['top_plans'])}. "
f"Company sizes: {', '.join(prof['top_sizes'])}. "
f"Signup sources: {', '.join(prof['top_sources'])}."
),
"demographic": {
"job_titles": prof["top_roles"],
"company_sizes": prof["top_sizes"],
},
"psychographic": {
"usage_intensity": cluster_name,
"avg_events": prof["avg_events"],
"avg_sessions": prof["avg_sessions"],
},
}).json()
created.append({"cluster": label, "id": persona["id"], "n": prof["n"]})
print(f" {label}: {persona['id']} ({prof['n']} users, avg {prof['avg_events']:.0f} events)")
time.sleep(0.3)
print(f"\nCreated {len(created)} behavior-based personas")