#!/usr/bin/env python3
"""
Revenue Forecast Model
======================
Pipeline-based revenue forecasting for B2B SaaS.

Models:
  - Weighted pipeline (stage probability × deal value)
  - Historical win rate adjustment (calibrate to actuals)
  - Scenario analysis (conservative / base / upside)
  - Monthly and quarterly projection with confidence ranges

Usage:
  python revenue_forecast_model.py
  python revenue_forecast_model.py --csv pipeline.csv
  python revenue_forecast_model.py --scenario conservative

Input format (CSV):
  deal_id, name, stage, arr_value, close_date, rep, segment

Stdlib only. No dependencies.
"""

import csv
import sys
import json
import argparse
import statistics
from datetime import date, datetime, timedelta
from collections import defaultdict
from io import StringIO


# ---------------------------------------------------------------------------
# Stage configuration
# ---------------------------------------------------------------------------

DEFAULT_STAGE_PROBABILITIES = {
    "discovery":     0.10,
    "qualification": 0.25,
    "demo":          0.40,
    "proposal":      0.55,
    "poc":           0.65,
    "negotiation":   0.80,
    "verbal_commit": 0.92,
    "closed_won":    1.00,
    "closed_lost":   0.00,
}

SCENARIO_MULTIPLIERS = {
    "conservative": 0.85,  # Win rate 15% below historical
    "base":         1.00,  # Historical win rate
    "upside":       1.15,  # Win rate 15% above historical
}


# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------

class Deal:
    def __init__(self, deal_id, name, stage, arr_value, close_date, rep="", segment=""):
        self.deal_id = deal_id
        self.name = name
        self.stage = stage.lower().replace(" ", "_").replace("/", "_")
        self.arr_value = float(arr_value)
        self.close_date = self._parse_date(close_date)
        self.rep = rep
        self.segment = segment

    @staticmethod
    def _parse_date(value):
        for fmt in ("%Y-%m-%d", "%m/%d/%Y", "%d/%m/%Y", "%Y/%m/%d"):
            try:
                return datetime.strptime(str(value), fmt).date()
            except ValueError:
                continue
        raise ValueError(f"Cannot parse date: {value!r}")

    @property
    def quarter(self):
        q = (self.close_date.month - 1) // 3 + 1
        return f"Q{q} {self.close_date.year}"

    @property
    def month_key(self):
        return self.close_date.strftime("%Y-%m")

    def weighted_value(self, stage_probs, scenario="base"):
        prob = stage_probs.get(self.stage, 0.0)
        multiplier = SCENARIO_MULTIPLIERS.get(scenario, 1.0)
        # Clamp probability to [0, 1]
        adjusted = min(1.0, max(0.0, prob * multiplier))
        return self.arr_value * adjusted

    def is_open(self):
        return self.stage not in ("closed_won", "closed_lost")

    def is_closed_won(self):
        return self.stage == "closed_won"


# ---------------------------------------------------------------------------
# Win rate calibration
# ---------------------------------------------------------------------------

def calculate_historical_win_rates(deals):
    """
    Calculate actual win rates per stage from closed deals.
    Returns a dict: stage → win_rate (float).
    Requires deals that were at each stage and are now closed won/lost.
    """
    # In a real implementation, you'd have historical stage-at-point-in-time data.
    # Here we approximate: among closed deals, what fraction were won?
    closed = [d for d in deals if not d.is_open()]
    if not closed:
        return {}

    won = [d for d in closed if d.is_closed_won()]
    overall_rate = len(won) / len(closed) if closed else 0.0

    # Stage-level calibration: adjust default probs by actual overall rate
    # (In production: use CRM historical stage-level conversion data)
    calibrated = {}
    for stage, default_prob in DEFAULT_STAGE_PROBABILITIES.items():
        if overall_rate > 0:
            calibrated[stage] = min(1.0, default_prob * (overall_rate / 0.25))
        else:
            calibrated[stage] = default_prob

    return calibrated


# ---------------------------------------------------------------------------
# Forecast engine
# ---------------------------------------------------------------------------

class ForecastEngine:
    def __init__(self, deals, stage_probs=None):
        self.deals = deals
        self.stage_probs = stage_probs or DEFAULT_STAGE_PROBABILITIES

    def open_deals(self):
        return [d for d in self.deals if d.is_open()]

    def closed_won_deals(self):
        return [d for d in self.deals if d.is_closed_won()]

    def pipeline_by_month(self, scenario="base"):
        """Returns dict: month_key → weighted ARR."""
        result = defaultdict(float)
        for deal in self.open_deals():
            result[deal.month_key] += deal.weighted_value(self.stage_probs, scenario)
        return dict(sorted(result.items()))

    def pipeline_by_quarter(self, scenario="base"):
        """Returns dict: quarter → weighted ARR."""
        result = defaultdict(float)
        for deal in self.open_deals():
            result[deal.quarter] += deal.weighted_value(self.stage_probs, scenario)
        return dict(sorted(result.items()))

    def coverage_ratio(self, quota, period_filter=None):
        """
        Pipeline coverage = total pipeline ÷ quota.
        period_filter: if set, only include deals with close_date in that period.
        """
        pipeline = sum(
            d.arr_value for d in self.open_deals()
            if period_filter is None or d.quarter == period_filter
        )
        return pipeline / quota if quota else 0.0

    def scenario_summary(self, periods=None):
        """
        Returns dict: period → {conservative, base, upside, open_pipeline}.
        periods: list of month_keys to include; if None, all months.
        """
        summaries = {}
        all_months = sorted(set(d.month_key for d in self.open_deals()))
        target_months = periods or all_months

        for month in target_months:
            deals_in_month = [d for d in self.open_deals() if d.month_key == month]
            if not deals_in_month:
                continue
            summaries[month] = {
                "deal_count":    len(deals_in_month),
                "open_pipeline": sum(d.arr_value for d in deals_in_month),
                "conservative":  sum(d.weighted_value(self.stage_probs, "conservative") for d in deals_in_month),
                "base":          sum(d.weighted_value(self.stage_probs, "base") for d in deals_in_month),
                "upside":        sum(d.weighted_value(self.stage_probs, "upside") for d in deals_in_month),
            }
        return summaries

    def rep_performance(self):
        """Returns dict: rep → {pipeline, weighted_base, deal_count, avg_deal_size}."""
        rep_data = defaultdict(lambda: {"pipeline": 0.0, "weighted_base": 0.0,
                                        "deal_count": 0, "deals": []})
        for deal in self.open_deals():
            rep_data[deal.rep]["pipeline"] += deal.arr_value
            rep_data[deal.rep]["weighted_base"] += deal.weighted_value(self.stage_probs, "base")
            rep_data[deal.rep]["deal_count"] += 1
            rep_data[deal.rep]["deals"].append(deal.arr_value)

        result = {}
        for rep, data in rep_data.items():
            deals = data["deals"]
            result[rep] = {
                "pipeline":      data["pipeline"],
                "weighted_base": data["weighted_base"],
                "deal_count":    data["deal_count"],
                "avg_deal_size": statistics.mean(deals) if deals else 0.0,
            }
        return result

    def segment_breakdown(self, scenario="base"):
        """Returns dict: segment → weighted ARR."""
        result = defaultdict(float)
        for deal in self.open_deals():
            result[deal.segment or "unspecified"] += deal.weighted_value(self.stage_probs, scenario)
        return dict(result)

    def stage_distribution(self):
        """Returns dict: stage → {count, total_arr, avg_arr}."""
        result = defaultdict(lambda: {"count": 0, "total_arr": 0.0})
        for deal in self.open_deals():
            result[deal.stage]["count"] += 1
            result[deal.stage]["total_arr"] += deal.arr_value
        out = {}
        for stage, data in result.items():
            out[stage] = {
                "count":     data["count"],
                "total_arr": data["total_arr"],
                "avg_arr":   data["total_arr"] / data["count"] if data["count"] else 0,
                "probability": self.stage_probs.get(stage, 0.0),
            }
        return out

    def confidence_interval(self, scenario="base", iterations=1000):
        """
        Monte Carlo simulation to generate confidence interval around base forecast.
        Each deal wins/loses based on its probability; runs iterations times.
        Returns (p10, p50, p90) of total expected ARR.
        """
        import random
        random.seed(42)

        totals = []
        for _ in range(iterations):
            total = 0.0
            for deal in self.open_deals():
                prob = min(1.0, self.stage_probs.get(deal.stage, 0.0) * SCENARIO_MULTIPLIERS[scenario])
                if random.random() < prob:
                    total += deal.arr_value
            totals.append(total)

        totals.sort()
        n = len(totals)
        return (
            totals[int(n * 0.10)],  # P10 (conservative)
            totals[int(n * 0.50)],  # P50 (median)
            totals[int(n * 0.90)],  # P90 (upside)
        )


# ---------------------------------------------------------------------------
# Reporting
# ---------------------------------------------------------------------------

def fmt_currency(value):
    if value >= 1_000_000:
        return f"${value / 1_000_000:.2f}M"
    if value >= 1_000:
        return f"${value / 1_000:.1f}K"
    return f"${value:.0f}"


def fmt_pct(value):
    return f"{value * 100:.1f}%"


def print_header(title):
    width = 70
    print()
    print("=" * width)
    print(f"  {title}")
    print("=" * width)


def print_section(title):
    print(f"\n--- {title} ---")


def print_report(engine, quota=None, current_quarter=None):
    open_deals = engine.open_deals()
    won_deals = engine.closed_won_deals()

    print_header("REVENUE FORECAST MODEL")
    print(f"  Generated: {date.today().isoformat()}")
    print(f"  Open deals: {len(open_deals)}")
    print(f"  Closed Won (in dataset): {len(won_deals)}")
    total_pipeline = sum(d.arr_value for d in open_deals)
    total_won = sum(d.arr_value for d in won_deals)
    print(f"  Total open pipeline: {fmt_currency(total_pipeline)}")
    print(f"  Total closed won:    {fmt_currency(total_won)}")

    # ── Coverage ratio
    if quota:
        print_section("PIPELINE COVERAGE")
        q = current_quarter or "this quarter"
        ratio = engine.coverage_ratio(quota, period_filter=current_quarter)
        status = "✅ Healthy" if ratio >= 3.0 else ("⚠️  Thin" if ratio >= 2.0 else "🔴 Critical")
        print(f"  Quota target:    {fmt_currency(quota)}")
        print(f"  Coverage ratio:  {ratio:.1f}x  {status}")
        print(f"  (Minimum healthy = 3x; < 2x = pipeline emergency)")

    # ── Stage distribution
    print_section("STAGE DISTRIBUTION")
    stage_dist = engine.stage_distribution()
    col_w = [28, 8, 14, 12, 10]
    header = f"  {'Stage':<{col_w[0]}} {'Deals':>{col_w[1]}} {'Pipeline':>{col_w[2]}} {'Avg Size':>{col_w[3]}} {'Win Prob':>{col_w[4]}}"
    print(header)
    print("  " + "-" * (sum(col_w) + 4))
    for stage, data in sorted(stage_dist.items(), key=lambda x: -x[1]["total_arr"]):
        print(f"  {stage:<{col_w[0]}} {data['count']:>{col_w[1]}} "
              f"{fmt_currency(data['total_arr']):>{col_w[2]}} "
              f"{fmt_currency(data['avg_arr']):>{col_w[3]}} "
              f"{fmt_pct(data['probability']):>{col_w[4]}}")

    # ── Scenario forecast by month
    print_section("MONTHLY FORECAST — ALL SCENARIOS")
    summaries = engine.scenario_summary()
    col_w2 = [10, 8, 14, 14, 14, 14]
    h2 = (f"  {'Month':<{col_w2[0]}} {'Deals':>{col_w2[1]}} "
          f"{'Pipeline':>{col_w2[2]}} {'Conservative':>{col_w2[3]}} "
          f"{'Base':>{col_w2[4]}} {'Upside':>{col_w2[5]}}")
    print(h2)
    print("  " + "-" * (sum(col_w2) + 5))
    for month, data in summaries.items():
        print(f"  {month:<{col_w2[0]}} {data['deal_count']:>{col_w2[1]}} "
              f"{fmt_currency(data['open_pipeline']):>{col_w2[2]}} "
              f"{fmt_currency(data['conservative']):>{col_w2[3]}} "
              f"{fmt_currency(data['base']):>{col_w2[4]}} "
              f"{fmt_currency(data['upside']):>{col_w2[5]}}")

    # ── Quarterly rollup
    print_section("QUARTERLY FORECAST ROLLUP")
    q_conservative = defaultdict(float)
    q_base = defaultdict(float)
    q_upside = defaultdict(float)
    q_pipeline = defaultdict(float)
    q_count = defaultdict(int)
    for deal in open_deals:
        q_conservative[deal.quarter] += deal.weighted_value(engine.stage_probs, "conservative")
        q_base[deal.quarter] += deal.weighted_value(engine.stage_probs, "base")
        q_upside[deal.quarter] += deal.weighted_value(engine.stage_probs, "upside")
        q_pipeline[deal.quarter] += deal.arr_value
        q_count[deal.quarter] += 1

    quarters = sorted(q_base.keys())
    col_w3 = [10, 8, 14, 14, 14, 14]
    h3 = (f"  {'Quarter':<{col_w3[0]}} {'Deals':>{col_w3[1]}} "
          f"{'Pipeline':>{col_w3[2]}} {'Conservative':>{col_w3[3]}} "
          f"{'Base':>{col_w3[4]}} {'Upside':>{col_w3[5]}}")
    print(h3)
    print("  " + "-" * (sum(col_w3) + 5))
    for q in quarters:
        print(f"  {q:<{col_w3[0]}} {q_count[q]:>{col_w3[1]}} "
              f"{fmt_currency(q_pipeline[q]):>{col_w3[2]}} "
              f"{fmt_currency(q_conservative[q]):>{col_w3[3]}} "
              f"{fmt_currency(q_base[q]):>{col_w3[4]}} "
              f"{fmt_currency(q_upside[q]):>{col_w3[5]}}")

    # ── Monte Carlo confidence interval
    print_section("CONFIDENCE INTERVAL (Monte Carlo, 1,000 simulations)")
    p10, p50, p90 = engine.confidence_interval("base")
    print(f"  P10 (conservative floor): {fmt_currency(p10)}")
    print(f"  P50 (median expected):    {fmt_currency(p50)}")
    print(f"  P90 (upside ceiling):     {fmt_currency(p90)}")
    print(f"  Range spread: {fmt_currency(p90 - p10)}")

    # ── Rep performance
    print_section("REP PIPELINE PERFORMANCE")
    rep_perf = engine.rep_performance()
    if rep_perf:
        col_w4 = [20, 8, 14, 14, 12]
        h4 = (f"  {'Rep':<{col_w4[0]}} {'Deals':>{col_w4[1]}} "
              f"{'Pipeline':>{col_w4[2]}} {'Weighted':>{col_w4[3]}} {'Avg Size':>{col_w4[4]}}")
        print(h4)
        print("  " + "-" * (sum(col_w4) + 4))
        for rep, data in sorted(rep_perf.items(), key=lambda x: -x[1]["pipeline"]):
            print(f"  {rep:<{col_w4[0]}} {data['deal_count']:>{col_w4[1]}} "
                  f"{fmt_currency(data['pipeline']):>{col_w4[2]}} "
                  f"{fmt_currency(data['weighted_base']):>{col_w4[3]}} "
                  f"{fmt_currency(data['avg_deal_size']):>{col_w4[4]}}")

    # ── Segment breakdown
    print_section("SEGMENT BREAKDOWN (Base Forecast)")
    seg = engine.segment_breakdown("base")
    for segment, value in sorted(seg.items(), key=lambda x: -x[1]):
        bar_len = int((value / total_pipeline) * 30) if total_pipeline else 0
        bar = "█" * bar_len
        print(f"  {segment:<20} {fmt_currency(value):>12}  {bar}")

    # ── Red flags
    print_section("FORECAST HEALTH FLAGS")
    flags = []
    if total_pipeline > 0:
        coverage = total_pipeline / quota if quota else None
        if coverage and coverage < 2.0:
            flags.append("🔴 Pipeline coverage below 2x — serious shortfall risk this quarter")
        elif coverage and coverage < 3.0:
            flags.append("⚠️  Pipeline coverage below 3x — limited buffer for slippage")

        # Stage concentration risk
        early_stage_pct = sum(
            d.arr_value for d in open_deals
            if engine.stage_probs.get(d.stage, 0) < 0.30
        ) / total_pipeline
        if early_stage_pct > 0.60:
            flags.append(f"⚠️  {fmt_pct(early_stage_pct)} of pipeline in early stages (< 30% probability)")

        # Deal concentration
        deal_values = sorted([d.arr_value for d in open_deals], reverse=True)
        if deal_values and deal_values[0] / total_pipeline > 0.25:
            flags.append(f"⚠️  Top deal is {fmt_pct(deal_values[0]/total_pipeline)} of pipeline — concentration risk")

        # Spread between scenarios
        total_conservative = sum(d.weighted_value(engine.stage_probs, "conservative") for d in open_deals)
        total_upside = sum(d.weighted_value(engine.stage_probs, "upside") for d in open_deals)
        spread = (total_upside - total_conservative) / total_conservative if total_conservative else 0
        if spread > 0.40:
            flags.append(f"⚠️  High scenario spread ({fmt_pct(spread)}) — forecast confidence is low")

    if flags:
        for f in flags:
            print(f"  {f}")
    else:
        print("  ✅ No critical flags detected")

    print()


# ---------------------------------------------------------------------------
# Sample data
# ---------------------------------------------------------------------------

SAMPLE_CSV = """deal_id,name,stage,arr_value,close_date,rep,segment
D001,Acme Corp ERP Integration,negotiation,85000,2026-03-15,Sarah Chen,Enterprise
D002,TechStart PLG Expansion,proposal,28000,2026-03-28,Marcus Webb,Mid-Market
D003,Global Retail Co,verbal_commit,220000,2026-03-10,Sarah Chen,Enterprise
D004,BioLab Analytics,poc,62000,2026-04-05,Jamie Park,Mid-Market
D005,FinServ Holdings,demo,150000,2026-04-20,Sarah Chen,Enterprise
D006,MidWest Logistics,qualification,35000,2026-04-30,Marcus Webb,Mid-Market
D007,Edu Platform Inc,negotiation,42000,2026-03-25,Jamie Park,SMB
D008,Healthcare Connect,proposal,95000,2026-05-15,Sarah Chen,Enterprise
D009,Startup Hub Network,demo,18000,2026-04-10,Marcus Webb,SMB
D010,CloudOps Systems,poc,75000,2026-05-01,Jamie Park,Mid-Market
D011,National Bank Corp,verbal_commit,310000,2026-03-31,Sarah Chen,Enterprise
D012,RetailTech Co,qualification,22000,2026-05-20,Marcus Webb,SMB
D013,InsurTech Platform,negotiation,88000,2026-04-15,Jamie Park,Mid-Market
D014,GovTech Solutions,proposal,175000,2026-06-01,Sarah Chen,Enterprise
D015,AgriData Systems,demo,31000,2026-05-10,Marcus Webb,Mid-Market
D016,Legal AI Corp,poc,55000,2026-04-25,Jamie Park,Mid-Market
D017,Closed Won Deal,closed_won,120000,2026-02-15,Sarah Chen,Enterprise
D018,Lost Deal,closed_lost,45000,2026-02-20,Marcus Webb,Mid-Market
"""


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def load_deals_from_csv(csv_text):
    reader = csv.DictReader(StringIO(csv_text))
    deals = []
    errors = []
    for i, row in enumerate(reader, start=2):
        try:
            deal = Deal(
                deal_id=row.get("deal_id", f"row_{i}"),
                name=row.get("name", ""),
                stage=row.get("stage", ""),
                arr_value=row.get("arr_value", 0),
                close_date=row.get("close_date", ""),
                rep=row.get("rep", ""),
                segment=row.get("segment", ""),
            )
            deals.append(deal)
        except (ValueError, KeyError) as e:
            errors.append(f"  Row {i}: {e}")
    if errors:
        print("⚠️  Skipped rows with errors:")
        for err in errors:
            print(err)
    return deals


def main():
    parser = argparse.ArgumentParser(
        description="Revenue Forecast Model — pipeline-based ARR forecasting"
    )
    parser.add_argument(
        "--csv", metavar="FILE",
        help="CSV file with pipeline data (uses sample data if not provided)"
    )
    parser.add_argument(
        "--quota", type=float, default=1_000_000,
        help="Quarterly quota target in ARR (default: $1,000,000)"
    )
    parser.add_argument(
        "--quarter", metavar="QUARTER",
        help='Current quarter filter e.g. "Q2 2026" (optional)'
    )
    parser.add_argument(
        "--scenario", choices=["conservative", "base", "upside"],
        default="base",
        help="Primary scenario to report (default: base)"
    )
    parser.add_argument(
        "--json", action="store_true",
        help="Output forecast as JSON instead of formatted report"
    )
    args = parser.parse_args()

    # Load data
    if args.csv:
        try:
            with open(args.csv, "r", encoding="utf-8") as f:
                csv_text = f.read()
        except FileNotFoundError:
            print(f"Error: File not found: {args.csv}", file=sys.stderr)
            sys.exit(1)
    else:
        print("No --csv provided. Using sample pipeline data.\n")
        csv_text = SAMPLE_CSV

    deals = load_deals_from_csv(csv_text)
    if not deals:
        print("No deals loaded. Exiting.", file=sys.stderr)
        sys.exit(1)

    # Calibrate win rates from closed deals
    historical_probs = calculate_historical_win_rates(deals)
    stage_probs = historical_probs if historical_probs else DEFAULT_STAGE_PROBABILITIES

    engine = ForecastEngine(deals, stage_probs=stage_probs)

    if args.json:
        output = {
            "generated": date.today().isoformat(),
            "quota": args.quota,
            "open_pipeline": sum(d.arr_value for d in engine.open_deals()),
            "coverage_ratio": engine.coverage_ratio(args.quota, args.quarter),
            "monthly_forecast": engine.scenario_summary(),
            "quarterly_base": engine.pipeline_by_quarter("base"),
            "confidence_interval": dict(zip(
                ["p10", "p50", "p90"],
                engine.confidence_interval("base")
            )),
            "rep_performance": engine.rep_performance(),
            "segment_breakdown": engine.segment_breakdown("base"),
        }
        print(json.dumps(output, indent=2))
    else:
        print_report(engine, quota=args.quota, current_quarter=args.quarter)


if __name__ == "__main__":
    main()