import os, requests, time
from collections import defaultdict
LI = os.environ["LINKEDIN_ACCESS_TOKEN"]
MV = os.environ["MAVERA_API_KEY"]
LI_BASE = "https://api.linkedin.com/rest"
MV_BASE = "https://app.mavera.io/api/v1"
LI_H = {"Authorization": f"Bearer {LI}", "LinkedIn-Version": "202401", "X-Restli-Protocol-Version": "2.0.0"}
MV_H = {"Authorization": f"Bearer {MV}", "Content-Type": "application/json"}
SPONSORED_ACCOUNT_ID = "508000001"
# 1. Pull lead gen form responses (paginated)
responses, start = [], 0
while True:
r = requests.get(f"{LI_BASE}/leadFormResponses",
headers=LI_H,
params={
"q": "owner",
"owner": f"urn:li:sponsoredAccount:{SPONSORED_ACCOUNT_ID}",
"start": start, "count": 100,
})
if r.status_code == 429:
time.sleep(int(r.headers.get("Retry-After", 60)))
continue
r.raise_for_status()
data = r.json()
elements = data.get("elements", [])
responses.extend(elements)
if len(elements) < 100:
break
start += 100
time.sleep(0.5)
# 2. Extract and aggregate fields
segments = defaultdict(list)
for resp in responses:
answers = {a.get("fieldName", ""): a.get("fieldValue", "") for a in resp.get("answers", [])}
title = answers.get("jobTitle", "Unknown")
company_size = answers.get("companySize", "Unknown")
industry = answers.get("industry", "Unknown")
title_cluster = title.split(",")[0].strip()
segments[(title_cluster, industry, company_size)].append(answers)
# 3. Create personas from top segments
SIZE_MAP = {
"1-10": "Startup", "11-50": "Small Business", "51-200": "Mid-Market",
"201-500": "Mid-Market", "501-1000": "Enterprise", "1001+": "Enterprise",
}
created = []
for (title, industry, size), leads in sorted(segments.items(), key=lambda x: -len(x[1]))[:8]:
if len(leads) < 2:
continue
tier = SIZE_MAP.get(size, "Mid-Market")
emails = [l.get("email", "") for l in leads if l.get("email")]
p = requests.post(f"{MV_BASE}/personas", headers=MV_H, json={
"name": f"LinkedIn Lead: {title} ({tier})",
"description": (
f"Derived from {len(leads)} LinkedIn lead gen responses. "
f"Role: {title}. Industry: {industry}. Company size: {size} ({tier}). "
f"These leads actively filled out a form — high intent."
),
"demographic": {
"job_titles": [title],
"industries": [industry],
"company_size": tier,
},
"psychographic": {
"intent_level": "high",
"source": "linkedin_lead_gen_form",
},
}).json()
created.append({"persona_id": p["id"], "title": title, "industry": industry, "tier": tier, "n": len(leads)})
time.sleep(0.3)
print(f"Created {len(created)} personas from {len(responses)} lead gen responses")
for c in created:
print(f" {c['title']} | {c['industry']} | {c['tier']} | N={c['n']} | {c['persona_id']}")