#!/usr/bin/env python3
"""
Validation script for URIBurner SPARQL Agent Skill.

Validates:
- SKILL.md frontmatter (name, description)
- Directory structure (scripts/, references/, assets/)
- Reference file presence and content quality
- YAML format compliance
- Markdown syntax
"""

import os
import sys
import re
import yaml
from pathlib import Path

class SkillValidator:
    def __init__(self, skill_path):
        self.skill_path = Path(skill_path)
        self.errors = []
        self.warnings = []
        self.info = []
        
    def validate(self):
        """Run all validations."""
        self._check_directory_structure()
        self._validate_skill_md()
        self._check_reference_files()
        self._check_script_files()
        return self._report()
    
    def _check_directory_structure(self):
        """Verify required and optional directories exist."""
        if not self.skill_path.exists():
            self.errors.append(f"Skill path does not exist: {self.skill_path}")
            return
        
        if not self.skill_path.is_dir():
            self.errors.append(f"Skill path is not a directory: {self.skill_path}")
            return
        
        self.info.append(f"✓ Skill directory found: {self.skill_path}")
        
        # Check for SKILL.md
        skill_md = self.skill_path / "SKILL.md"
        if not skill_md.exists():
            self.errors.append("SKILL.md not found in skill root directory")
            return
        self.info.append("✓ SKILL.md present")
        
        # Check optional directories
        for dirname in ["scripts", "references", "assets"]:
            dirpath = self.skill_path / dirname
            if dirpath.exists() and dirpath.is_dir():
                self.info.append(f"✓ {dirname}/ directory present")
            else:
                self.warnings.append(f"Optional directory not found: {dirname}/")
    
    def _validate_skill_md(self):
        """Validate SKILL.md frontmatter and content."""
        skill_md = self.skill_path / "SKILL.md"
        
        try:
            with open(skill_md, 'r', encoding='utf-8') as f:
                content = f.read()
        except Exception as e:
            self.errors.append(f"Cannot read SKILL.md: {e}")
            return
        
        # Extract frontmatter
        if not content.startswith("---"):
            self.errors.append("SKILL.md does not start with '---' frontmatter marker")
            return
        
        try:
            frontmatter_end = content.find("\n---\n")
            if frontmatter_end == -1:
                self.errors.append("SKILL.md frontmatter is not properly closed")
                return
            
            frontmatter_str = content[3:frontmatter_end]
            frontmatter = yaml.safe_load(frontmatter_str)
            
            if not isinstance(frontmatter, dict):
                self.errors.append("SKILL.md frontmatter is not valid YAML")
                return
            
            self.info.append("✓ SKILL.md frontmatter is valid YAML")
            
        except yaml.YAMLError as e:
            self.errors.append(f"SKILL.md frontmatter YAML error: {e}")
            return
        
        # Validate required frontmatter fields
        if "name" not in frontmatter:
            self.errors.append("SKILL.md frontmatter missing 'name' field")
        else:
            name = frontmatter["name"]
            if not isinstance(name, str) or len(name.strip()) == 0:
                self.errors.append("SKILL.md 'name' field must be a non-empty string")
            else:
                self.info.append(f"✓ Skill name: {name}")
        
        if "description" not in frontmatter:
            self.errors.append("SKILL.md frontmatter missing 'description' field")
        else:
            desc = frontmatter["description"]
            if not isinstance(desc, str) or len(desc.strip()) == 0:
                self.errors.append("SKILL.md 'description' field must be a non-empty string")
            elif len(desc) < 50:
                self.warnings.append(f"SKILL.md description is quite short ({len(desc)} chars); consider expanding")
            else:
                self.info.append(f"✓ Description: {len(desc)} chars")
        
        # Check for license field (optional but recommended)
        if "license" not in frontmatter:
            self.warnings.append("SKILL.md frontmatter missing optional 'license' field")
        
        # Validate markdown body
        body = content[frontmatter_end + 5:]
        if len(body.strip()) < 100:
            self.warnings.append("SKILL.md body is very short; consider adding more guidance")
        else:
            self.info.append(f"✓ SKILL.md body: {len(body)} chars")
        
        # Check for required sections
        required_sections = ["Quick Start", "Workflow"]
        for section in required_sections:
            if f"## {section}" not in body:
                self.warnings.append(f"SKILL.md missing recommended section: '## {section}'")
        
        # Check for code blocks
        code_blocks = body.count("```")
        if code_blocks == 0:
            self.warnings.append("SKILL.md has no code examples")
        else:
            self.info.append(f"✓ SKILL.md includes {code_blocks // 2} code blocks")
    
    def _check_reference_files(self):
        """Validate reference files in references/ directory."""
        ref_dir = self.skill_path / "references"
        if not ref_dir.exists():
            self.warnings.append("No references/ directory found")
            return
        
        ref_files = list(ref_dir.glob("*.md"))
        if len(ref_files) == 0:
            self.warnings.append("references/ directory is empty")
            return
        
        self.info.append(f"✓ Found {len(ref_files)} reference file(s)")
        
        for ref_file in ref_files:
            try:
                with open(ref_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                # Check minimum content
                if len(content) < 100:
                    self.warnings.append(f"Reference file very short: {ref_file.name} ({len(content)} chars)")
                else:
                    self.info.append(f"  ✓ {ref_file.name}: {len(content)} chars")
                
                # Check for headers
                if not re.search(r'^#+\s', content, re.MULTILINE):
                    self.warnings.append(f"Reference file missing headers: {ref_file.name}")
                
                # Check for code blocks if it's a query reference
                if "query" in ref_file.name.lower() or "predefined" in ref_file.name.lower():
                    if "```" not in content:
                        self.warnings.append(f"Query reference file has no code blocks: {ref_file.name}")
                
            except Exception as e:
                self.errors.append(f"Cannot read reference file {ref_file.name}: {e}")
    
    def _check_script_files(self):
        """Validate script files in scripts/ directory."""
        script_dir = self.skill_path / "scripts"
        if not script_dir.exists():
            self.info.append("No scripts/ directory (optional)")
            return
        
        script_files = list(script_dir.glob("*"))
        if len(script_files) == 0:
            self.info.append("scripts/ directory is empty (optional)")
            return
        
        self.info.append(f"✓ Found {len(script_files)} script file(s)")
        
        for script_file in script_files:
            if script_file.is_file():
                try:
                    # Check if executable
                    is_executable = os.access(script_file, os.X_OK)
                    status = "executable" if is_executable else "not executable"
                    self.info.append(f"  ✓ {script_file.name} ({status})")
                    
                    # For Python scripts, check basic syntax
                    if script_file.suffix == ".py":
                        with open(script_file, 'r', encoding='utf-8') as f:
                            script_content = f.read()
                        try:
                            compile(script_content, str(script_file), 'exec')
                            self.info.append(f"    ✓ {script_file.name} has valid Python syntax")
                        except SyntaxError as e:
                            self.errors.append(f"Python syntax error in {script_file.name}: {e}")
                
                except Exception as e:
                    self.errors.append(f"Cannot read script file {script_file.name}: {e}")
    
    def _report(self):
        """Generate and print validation report."""
        print("\n" + "="*70)
        print("URIBurner SPARQL Skill Validation Report")
        print("="*70 + "\n")
        
        # Info
        if self.info:
            print("ℹ INFO:")
            for msg in self.info:
                print(f"  {msg}")
            print()
        
        # Warnings
        if self.warnings:
            print("⚠ WARNINGS:")
            for msg in self.warnings:
                print(f"  {msg}")
            print()
        
        # Errors
        if self.errors:
            print("✗ ERRORS:")
            for msg in self.errors:
                print(f"  {msg}")
            print()
        
        # Summary
        status = "PASS" if not self.errors else "FAIL"
        print("-"*70)
        if self.errors:
            print(f"Status: {status} ({len(self.errors)} error(s), {len(self.warnings)} warning(s))")
            return False
        elif self.warnings:
            print(f"Status: {status} ({len(self.warnings)} warning(s))")
            return True
        else:
            print("Status: PASS ✓")
            return True

def main():
    if len(sys.argv) < 2:
        print("Usage: validate-sparql.py <skill-path>")
        sys.exit(1)
    
    skill_path = sys.argv[1]
    validator = SkillValidator(skill_path)
    success = validator.validate()
    sys.exit(0 if success else 1)

if __name__ == "__main__":
    main()
