#!/usr/bin/env python3
"""
DPIA Generator

Generates Data Protection Impact Assessment documentation based on
processing activity inputs. Creates structured DPIA reports following
GDPR Article 35 requirements.

Usage:
    python dpia_generator.py --interactive
    python dpia_generator.py --input processing_activity.json --output dpia_report.md
    python dpia_generator.py --template > template.json
"""

import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional


# DPIA threshold criteria (Art. 35(3) and WP29 Guidelines)
DPIA_TRIGGERS = {
    "systematic_monitoring": {
        "description": "Systematic monitoring of publicly accessible area",
        "article": "Art. 35(3)(c)",
        "weight": 10
    },
    "large_scale_special_category": {
        "description": "Large-scale processing of special category data (Art. 9)",
        "article": "Art. 35(3)(b)",
        "weight": 10
    },
    "automated_decision_making": {
        "description": "Automated decision-making with legal/significant effects",
        "article": "Art. 35(3)(a)",
        "weight": 10
    },
    "evaluation_scoring": {
        "description": "Evaluation or scoring of individuals",
        "article": "WP29 Guidelines",
        "weight": 7
    },
    "sensitive_data": {
        "description": "Processing of sensitive data or highly personal data",
        "article": "WP29 Guidelines",
        "weight": 7
    },
    "large_scale": {
        "description": "Data processed on a large scale",
        "article": "WP29 Guidelines",
        "weight": 6
    },
    "data_matching": {
        "description": "Matching or combining datasets",
        "article": "WP29 Guidelines",
        "weight": 5
    },
    "vulnerable_subjects": {
        "description": "Data concerning vulnerable data subjects",
        "article": "WP29 Guidelines",
        "weight": 7
    },
    "innovative_technology": {
        "description": "Innovative use or applying new technological solutions",
        "article": "WP29 Guidelines",
        "weight": 5
    },
    "cross_border_transfer": {
        "description": "Transfer of data outside the EU/EEA",
        "article": "GDPR Chapter V",
        "weight": 5
    }
}

# Risk categories and mitigation measures
RISK_CATEGORIES = {
    "unauthorized_access": {
        "description": "Risk of unauthorized access to personal data",
        "impact": "high",
        "mitigations": [
            "Implement access controls and authentication",
            "Use encryption for data at rest and in transit",
            "Maintain audit logs of access",
            "Implement least privilege principle"
        ]
    },
    "data_breach": {
        "description": "Risk of data breach or unauthorized disclosure",
        "impact": "high",
        "mitigations": [
            "Implement intrusion detection systems",
            "Establish incident response procedures",
            "Regular security assessments",
            "Employee security training"
        ]
    },
    "excessive_collection": {
        "description": "Risk of collecting more data than necessary",
        "impact": "medium",
        "mitigations": [
            "Implement data minimization principles",
            "Regular review of data collected",
            "Privacy by design approach",
            "Document purpose for each data element"
        ]
    },
    "purpose_creep": {
        "description": "Risk of using data for purposes beyond original scope",
        "impact": "medium",
        "mitigations": [
            "Clear purpose limitation policies",
            "Consent management for new purposes",
            "Technical controls on data access",
            "Regular purpose review"
        ]
    },
    "retention_violation": {
        "description": "Risk of retaining data longer than necessary",
        "impact": "medium",
        "mitigations": [
            "Implement retention schedules",
            "Automated deletion processes",
            "Regular data inventory audits",
            "Document retention justification"
        ]
    },
    "rights_violation": {
        "description": "Risk of failing to fulfill data subject rights",
        "impact": "high",
        "mitigations": [
            "Implement subject access request process",
            "Technical capability for data portability",
            "Deletion/erasure procedures",
            "Staff training on rights requests"
        ]
    },
    "inaccurate_data": {
        "description": "Risk of processing inaccurate or outdated data",
        "impact": "medium",
        "mitigations": [
            "Data quality checks at collection",
            "Regular data verification",
            "Easy update mechanisms for subjects",
            "Automated accuracy validation"
        ]
    },
    "third_party_risk": {
        "description": "Risk from third-party processors",
        "impact": "high",
        "mitigations": [
            "Due diligence on processors",
            "Data Processing Agreements",
            "Regular processor audits",
            "Clear processor instructions"
        ]
    }
}

# Legal bases under Article 6
LEGAL_BASES = {
    "consent": {
        "article": "Art. 6(1)(a)",
        "description": "Data subject has given consent",
        "requirements": [
            "Consent must be freely given",
            "Specific to the purpose",
            "Informed consent with clear information",
            "Unambiguous indication of wishes",
            "Easy to withdraw"
        ]
    },
    "contract": {
        "article": "Art. 6(1)(b)",
        "description": "Processing necessary for contract performance",
        "requirements": [
            "Contract must exist or be in negotiation",
            "Processing must be necessary for the contract",
            "Cannot process more than contractually needed"
        ]
    },
    "legal_obligation": {
        "article": "Art. 6(1)(c)",
        "description": "Processing necessary for legal obligation",
        "requirements": [
            "Legal obligation must be binding",
            "Must be EU or Member State law",
            "Processing must be necessary to comply"
        ]
    },
    "vital_interests": {
        "article": "Art. 6(1)(d)",
        "description": "Processing necessary to protect vital interests",
        "requirements": [
            "Life-threatening situation",
            "No other legal basis available",
            "Typically emergency situations"
        ]
    },
    "public_interest": {
        "article": "Art. 6(1)(e)",
        "description": "Processing necessary for public interest task",
        "requirements": [
            "Task in public interest or official authority",
            "Legal basis in EU or Member State law",
            "Processing must be necessary"
        ]
    },
    "legitimate_interests": {
        "article": "Art. 6(1)(f)",
        "description": "Processing necessary for legitimate interests",
        "requirements": [
            "Identify the legitimate interest",
            "Show processing is necessary",
            "Balance against data subject rights",
            "Not available for public authorities"
        ]
    }
}


def get_template() -> Dict:
    """Return a blank DPIA input template."""
    return {
        "project_name": "",
        "version": "1.0",
        "date": datetime.now().strftime("%Y-%m-%d"),
        "controller": {
            "name": "",
            "contact": "",
            "dpo_contact": ""
        },
        "processing_activity": {
            "description": "",
            "purposes": [],
            "legal_basis": "",
            "legal_basis_justification": ""
        },
        "data_subjects": {
            "categories": [],
            "estimated_number": "",
            "vulnerable_groups": False,
            "vulnerable_groups_details": ""
        },
        "personal_data": {
            "categories": [],
            "special_categories": [],
            "source": "",
            "retention_period": ""
        },
        "processing_operations": {
            "collection_method": "",
            "storage_location": "",
            "access_controls": "",
            "automated_decisions": False,
            "profiling": False
        },
        "data_recipients": {
            "internal": [],
            "external_processors": [],
            "third_countries": []
        },
        "dpia_triggers": [],
        "identified_risks": [],
        "mitigations_planned": []
    }


def assess_dpia_requirement(input_data: Dict) -> Dict:
    """Assess whether DPIA is required based on triggers."""
    triggers_present = input_data.get("dpia_triggers", [])
    total_weight = 0
    triggered_criteria = []

    for trigger in triggers_present:
        if trigger in DPIA_TRIGGERS:
            trigger_info = DPIA_TRIGGERS[trigger]
            total_weight += trigger_info["weight"]
            triggered_criteria.append({
                "trigger": trigger,
                "description": trigger_info["description"],
                "article": trigger_info["article"]
            })

    # Also check data characteristics
    if input_data.get("data_subjects", {}).get("vulnerable_groups"):
        if "vulnerable_subjects" not in triggers_present:
            total_weight += DPIA_TRIGGERS["vulnerable_subjects"]["weight"]
            triggered_criteria.append({
                "trigger": "vulnerable_subjects",
                "description": DPIA_TRIGGERS["vulnerable_subjects"]["description"],
                "article": DPIA_TRIGGERS["vulnerable_subjects"]["article"]
            })

    if input_data.get("personal_data", {}).get("special_categories"):
        if "sensitive_data" not in triggers_present:
            total_weight += DPIA_TRIGGERS["sensitive_data"]["weight"]
            triggered_criteria.append({
                "trigger": "sensitive_data",
                "description": DPIA_TRIGGERS["sensitive_data"]["description"],
                "article": DPIA_TRIGGERS["sensitive_data"]["article"]
            })

    if input_data.get("data_recipients", {}).get("third_countries"):
        if "cross_border_transfer" not in triggers_present:
            total_weight += DPIA_TRIGGERS["cross_border_transfer"]["weight"]
            triggered_criteria.append({
                "trigger": "cross_border_transfer",
                "description": DPIA_TRIGGERS["cross_border_transfer"]["description"],
                "article": DPIA_TRIGGERS["cross_border_transfer"]["article"]
            })

    # DPIA required if 2+ triggers or weight >= 10
    dpia_required = len(triggered_criteria) >= 2 or total_weight >= 10

    return {
        "dpia_required": dpia_required,
        "risk_score": total_weight,
        "triggered_criteria": triggered_criteria,
        "recommendation": "DPIA is mandatory" if dpia_required else "DPIA recommended as best practice"
    }


def assess_risks(input_data: Dict) -> List[Dict]:
    """Assess risks based on processing characteristics."""
    risks = []

    # Check each risk category
    processing = input_data.get("processing_operations", {})
    recipients = input_data.get("data_recipients", {})
    personal_data = input_data.get("personal_data", {})

    # Unauthorized access risk
    if processing.get("storage_location") or processing.get("collection_method"):
        risks.append({
            **RISK_CATEGORIES["unauthorized_access"],
            "likelihood": "medium",
            "residual_risk": "low" if processing.get("access_controls") else "medium"
        })

    # Data breach risk (always present)
    risks.append({
        **RISK_CATEGORIES["data_breach"],
        "likelihood": "medium",
        "residual_risk": "medium"
    })

    # Third party risk
    if recipients.get("external_processors") or recipients.get("third_countries"):
        risks.append({
            **RISK_CATEGORIES["third_party_risk"],
            "likelihood": "medium",
            "residual_risk": "medium"
        })

    # Rights violation risk
    risks.append({
        **RISK_CATEGORIES["rights_violation"],
        "likelihood": "low",
        "residual_risk": "low"
    })

    # Retention violation risk
    if not personal_data.get("retention_period"):
        risks.append({
            **RISK_CATEGORIES["retention_violation"],
            "likelihood": "high",
            "residual_risk": "high"
        })

    # Automated decision risk
    if processing.get("automated_decisions") or processing.get("profiling"):
        risks.append({
            "description": "Risk of unfair automated decisions affecting individuals",
            "impact": "high",
            "likelihood": "medium",
            "residual_risk": "medium",
            "mitigations": [
                "Human review of automated decisions",
                "Transparency about logic involved",
                "Right to contest decisions",
                "Regular algorithm audits"
            ]
        })

    return risks


def generate_dpia_report(input_data: Dict) -> str:
    """Generate DPIA report in Markdown format."""
    requirement = assess_dpia_requirement(input_data)
    risks = assess_risks(input_data)

    project = input_data.get("project_name", "Unnamed Project")
    controller = input_data.get("controller", {})
    processing = input_data.get("processing_activity", {})
    subjects = input_data.get("data_subjects", {})
    personal_data = input_data.get("personal_data", {})
    operations = input_data.get("processing_operations", {})
    recipients = input_data.get("data_recipients", {})

    legal_basis = processing.get("legal_basis", "")
    legal_info = LEGAL_BASES.get(legal_basis, {})

    report = f"""# Data Protection Impact Assessment (DPIA)

## Project: {project}

| Field | Value |
|-------|-------|
| Version | {input_data.get('version', '1.0')} |
| Date | {input_data.get('date', datetime.now().strftime('%Y-%m-%d'))} |
| Controller | {controller.get('name', 'N/A')} |
| DPO Contact | {controller.get('dpo_contact', 'N/A')} |

---

## 1. DPIA Threshold Assessment

**Result: {requirement['recommendation']}**

Risk Score: {requirement['risk_score']}/100

### Triggered Criteria

"""
    if requirement['triggered_criteria']:
        for criteria in requirement['triggered_criteria']:
            report += f"- **{criteria['description']}** ({criteria['article']})\n"
    else:
        report += "- No mandatory triggers identified\n"

    report += f"""
---

## 2. Description of Processing

### Purpose of Processing

{processing.get('description', 'Not specified')}

### Purposes

"""
    for purpose in processing.get('purposes', ['Not specified']):
        report += f"- {purpose}\n"

    report += f"""
### Legal Basis

**{legal_info.get('article', 'Not specified')}**: {legal_info.get('description', processing.get('legal_basis', 'Not specified'))}

**Justification**: {processing.get('legal_basis_justification', 'Not provided')}

"""
    if legal_info.get('requirements'):
        report += "**Requirements to satisfy:**\n"
        for req in legal_info['requirements']:
            report += f"- {req}\n"

    report += f"""
---

## 3. Data Subjects

| Aspect | Details |
|--------|---------|
| Categories | {', '.join(subjects.get('categories', ['Not specified']))} |
| Estimated Number | {subjects.get('estimated_number', 'Not specified')} |
| Vulnerable Groups | {'Yes - ' + subjects.get('vulnerable_groups_details', '') if subjects.get('vulnerable_groups') else 'No'} |

---

## 4. Personal Data Processed

### Data Categories

"""
    for category in personal_data.get('categories', ['Not specified']):
        report += f"- {category}\n"

    if personal_data.get('special_categories'):
        report += "\n### Special Category Data (Art. 9)\n\n"
        for category in personal_data['special_categories']:
            report += f"- **{category}** - Requires Art. 9(2) exception\n"

    report += f"""
### Data Source

{personal_data.get('source', 'Not specified')}

### Retention Period

{personal_data.get('retention_period', 'Not specified')}

---

## 5. Processing Operations

| Operation | Details |
|-----------|---------|
| Collection Method | {operations.get('collection_method', 'Not specified')} |
| Storage Location | {operations.get('storage_location', 'Not specified')} |
| Access Controls | {operations.get('access_controls', 'Not specified')} |
| Automated Decisions | {'Yes' if operations.get('automated_decisions') else 'No'} |
| Profiling | {'Yes' if operations.get('profiling') else 'No'} |

---

## 6. Data Recipients

### Internal Recipients

"""
    for recipient in recipients.get('internal', ['Not specified']):
        report += f"- {recipient}\n"

    report += "\n### External Processors\n\n"
    for processor in recipients.get('external_processors', ['None']):
        report += f"- {processor}\n"

    if recipients.get('third_countries'):
        report += "\n### Third Country Transfers\n\n"
        report += "**Warning**: Transfers require Chapter V safeguards\n\n"
        for country in recipients['third_countries']:
            report += f"- {country}\n"

    report += """
---

## 7. Risk Assessment

"""
    for i, risk in enumerate(risks, 1):
        report += f"""### Risk {i}: {risk['description']}

| Aspect | Assessment |
|--------|------------|
| Impact | {risk.get('impact', 'medium').upper()} |
| Likelihood | {risk.get('likelihood', 'medium').upper()} |
| Residual Risk | {risk.get('residual_risk', 'medium').upper()} |

**Recommended Mitigations:**

"""
        for mitigation in risk.get('mitigations', []):
            report += f"- {mitigation}\n"
        report += "\n"

    report += """---

## 8. Necessity and Proportionality

### Assessment Questions

1. **Is the processing necessary for the stated purpose?**
   - [ ] Yes, no less intrusive alternative exists
   - [ ] Alternative considered: _______________

2. **Is the data collection proportionate?**
   - [ ] Only necessary data is collected
   - [ ] Data minimization applied

3. **Are retention periods justified?**
   - [ ] Retention period is necessary
   - [ ] Deletion procedures in place

---

## 9. DPO Consultation

| Aspect | Details |
|--------|---------|
| DPO Consulted | [ ] Yes / [ ] No |
| DPO Name | |
| Consultation Date | |
| DPO Opinion | |

---

## 10. Sign-Off

| Role | Name | Signature | Date |
|------|------|-----------|------|
| Project Owner | | | |
| Data Protection Officer | | | |
| Controller Representative | | | |

---

## 11. Review Schedule

This DPIA should be reviewed:
- [ ] Annually
- [ ] When processing changes significantly
- [ ] Following a data incident
- [ ] As required by supervisory authority

Next Review Date: _______________

---

*Generated by DPIA Generator - This document requires completion and review by qualified personnel.*
"""
    return report


def main():
    parser = argparse.ArgumentParser(
        description="Generate DPIA documentation"
    )
    parser.add_argument(
        "--input", "-i",
        help="Path to JSON input file with processing activity details"
    )
    parser.add_argument(
        "--output", "-o",
        help="Path to output file (default: stdout)"
    )
    parser.add_argument(
        "--template",
        action="store_true",
        help="Output a blank JSON template"
    )
    parser.add_argument(
        "--interactive",
        action="store_true",
        help="Run in interactive mode"
    )

    args = parser.parse_args()

    if args.template:
        print(json.dumps(get_template(), indent=2))
        return

    if args.interactive:
        print("DPIA Generator - Interactive Mode")
        print("=" * 40)
        print("\nTo use this tool:")
        print("1. Generate a template: python dpia_generator.py --template > input.json")
        print("2. Fill in the template with your processing details")
        print("3. Generate DPIA: python dpia_generator.py --input input.json --output dpia.md")
        return

    if not args.input:
        print("Error: --input required (or use --template to get started)")
        sys.exit(1)

    input_path = Path(args.input)
    if not input_path.exists():
        print(f"Error: Input file not found: {input_path}")
        sys.exit(1)

    with open(input_path, "r") as f:
        input_data = json.load(f)

    report = generate_dpia_report(input_data)

    if args.output:
        with open(args.output, "w") as f:
            f.write(report)
        print(f"DPIA report written to {args.output}")
    else:
        print(report)


if __name__ == "__main__":
    main()
