/**
 * ContextCompressor for OpenClaw Context Optimizer
 *
 * Compresses context by 40-60% while maintaining quality using multiple strategies:
 * - Deduplication: Remove redundant/similar text
 * - Pruning: Remove low-importance context
 * - Summarization: Condense verbose sections
 * - Template Removal: Remove boilerplate/repeated structures
 * - Hybrid: Combine multiple strategies
 */

/**
 * Calculate cosine similarity between two text strings using simple character n-grams
 * @param {string} textA - First text
 * @param {string} textB - Second text
 * @returns {number} Similarity score (0 to 1)
 */
function textCosineSimilarity(textA, textB) {
  if (!textA || !textB) return 0;

  // Create character n-gram vectors (n=3)
  const createNGrams = (text, n = 3) => {
    const grams = {};
    const normalized = text.toLowerCase().replace(/\s+/g, ' ');

    for (let i = 0; i <= normalized.length - n; i++) {
      const gram = normalized.substring(i, i + n);
      grams[gram] = (grams[gram] || 0) + 1;
    }

    return grams;
  };

  const gramsA = createNGrams(textA);
  const gramsB = createNGrams(textB);

  // Calculate cosine similarity
  const allGrams = new Set([...Object.keys(gramsA), ...Object.keys(gramsB)]);

  let dotProduct = 0;
  let magnitudeA = 0;
  let magnitudeB = 0;

  for (const gram of allGrams) {
    const freqA = gramsA[gram] || 0;
    const freqB = gramsB[gram] || 0;

    dotProduct += freqA * freqB;
    magnitudeA += freqA * freqA;
    magnitudeB += freqB * freqB;
  }

  magnitudeA = Math.sqrt(magnitudeA);
  magnitudeB = Math.sqrt(magnitudeB);

  if (magnitudeA === 0 || magnitudeB === 0) {
    return 0;
  }

  return dotProduct / (magnitudeA * magnitudeB);
}

/**
 * ContextCompressor class
 * Compresses context using various strategies
 */
export class ContextCompressor {
  constructor(storage = null, options = {}) {
    this.storage = storage;
    this.options = {
      targetCompressionRatio: options.targetCompressionRatio || 0.5, // 50% compression
      similarityThreshold: options.similarityThreshold || 0.9,
      minChunkSize: options.minChunkSize || 50,
      maxChunkSize: options.maxChunkSize || 500,
      preserveCodeBlocks: options.preserveCodeBlocks !== false,
      preserveImportantSections: options.preserveImportantSections !== false,
      ...options
    };

    // Low-value patterns for pruning
    this.lowValuePatterns = [
      /^(ok|okay|yes|no|sure)[.,!?]*$/i,
      /^(thanks|thank you).*[.,!?]*$/i,
      /^(hello|hi|hey|bye|goodbye)[.,!? ].*$/i,
      /^(please|could you|can you|would you).{0,30}\?/i,
      /^(i see|i understand|got it|makes sense)[.,!?]*$/i,
      /^(let me know|feel free|don't hesitate).{0,50}$/i
    ];

    // Boilerplate/template patterns
    this.templatePatterns = [
      /^-{3,}$/,                                      // Horizontal rules
      /^\*{3,}$/,                                     // Star dividers
      /^={3,}$/,                                      // Equal dividers
      /^<!-- .* -->$/,                                // HTML comments
      /^\/\/ -{10,}$/,                                // Comment dividers
      /^Copyright \(c\).*/i,                          // Copyright notices
      /^Licensed under.*/i,                           // License headers
      /^(Generated by|Auto-generated).*/i,            // Auto-gen notices
    ];

    // Important section markers (to preserve)
    this.importantMarkers = [
      'error', 'warning', 'critical', 'important', 'note',
      'todo', 'fixme', 'bug', 'issue', 'problem',
      'required', 'must', 'should', 'key point'
    ];
  }

  /**
   * Main compression method
   * @param {string|object} context - Context to compress (string or structured object)
   * @param {string} strategy - Compression strategy ('deduplication', 'pruning', 'summarization', 'template', 'hybrid')
   * @param {object} options - Additional options
   * @returns {Promise<object>} Compressed context with metrics
   */
  async compress(context, strategy = 'hybrid', options = {}) {
    const startTime = Date.now();

    // Normalize input
    const contextText = typeof context === 'string' ? context : JSON.stringify(context, null, 2);

    if (!contextText || contextText.trim().length === 0) {
      return {
        original: contextText,
        compressed: contextText,
        strategy,
        metrics: {
          originalTokens: 0,
          compressedTokens: 0,
          compressionRatio: 0,
          tokensRemoved: 0,
          compressionTime: Date.now() - startTime,
          qualityScore: 1.0
        }
      };
    }

    let compressed;

    // Apply selected strategy
    switch (strategy.toLowerCase()) {
      case 'deduplication':
        compressed = await this.deduplicateContext(contextText, options);
        break;

      case 'pruning':
        compressed = await this.pruneContext(contextText, options.patterns || this.lowValuePatterns, options);
        break;

      case 'summarization':
        compressed = await this.summarizeContext(contextText, options);
        break;

      case 'template':
        compressed = await this.removeTemplates(contextText, options);
        break;

      case 'hybrid':
      default:
        compressed = await this.hybridCompress(contextText, options);
        break;
    }

    // Calculate metrics
    const metrics = this.calculateMetrics(contextText, compressed);
    metrics.compressionTime = Date.now() - startTime;
    metrics.qualityScore = this.calculateQualityScore(contextText, compressed);

    return {
      original: contextText,
      compressed,
      strategy,
      metrics
    };
  }

  /**
   * Deduplication strategy - Remove redundant/similar text
   * @param {string} context - Context to deduplicate
   * @param {object} options - Options
   * @returns {Promise<string>} Deduplicated context
   */
  async deduplicateContext(context, options = {}) {
    const threshold = options.similarityThreshold || this.options.similarityThreshold;

    // Split into lines for more granular deduplication
    const lines = context.split('\n').map(l => l.trim()).filter(l => l.length > 0);

    if (lines.length === 0) {
      return context;
    }

    const uniqueLines = [];
    const seenHashes = new Set();

    for (const line of lines) {
      // Quick hash check first for exact duplicates
      const hash = this.simpleHash(line);

      if (seenHashes.has(hash)) {
        continue; // Exact duplicate
      }

      // Check similarity with existing unique lines
      let isDuplicate = false;

      for (const uniqueLine of uniqueLines) {
        const similarity = textCosineSimilarity(line, uniqueLine);

        if (similarity > threshold) {
          isDuplicate = true;
          break;
        }
      }

      if (!isDuplicate) {
        uniqueLines.push(line);
        seenHashes.add(hash);
      }
    }

    return uniqueLines.join('\n');
  }

  /**
   * Pruning strategy - Remove low-importance context
   * @param {string} context - Context to prune
   * @param {Array<RegExp>} patterns - Patterns to remove
   * @param {object} options - Options
   * @returns {Promise<string>} Pruned context
   */
  async pruneContext(context, patterns = null, options = {}) {
    const patternsToUse = patterns || this.lowValuePatterns;
    const preserveImportant = options.preserveImportantSections !== false;

    // Split into lines/sentences
    const lines = context.split('\n');
    const prunedLines = [];

    for (const line of lines) {
      const trimmed = line.trim();

      // Preserve empty lines
      if (trimmed.length === 0) {
        prunedLines.push(line);
        continue;
      }

      // Preserve important sections
      if (preserveImportant && this.isImportantSection(trimmed)) {
        prunedLines.push(line);
        continue;
      }

      // Check against low-value patterns
      let isLowValue = false;

      for (const pattern of patternsToUse) {
        if (pattern.test(trimmed)) {
          isLowValue = true;
          break;
        }
      }

      if (!isLowValue) {
        prunedLines.push(line);
      }
      // If isLowValue is true, skip this line (don't add to prunedLines)
    }

    return prunedLines.join('\n');
  }

  /**
   * Summarization strategy - Condense verbose sections
   * Uses extractive summarization (select most important sentences)
   * @param {string} context - Context to summarize
   * @param {object} options - Options
   * @returns {Promise<string>} Summarized context
   */
  async summarizeContext(context, options = {}) {
    const targetRatio = options.targetCompressionRatio || this.options.targetCompressionRatio;
    const preserveCode = options.preserveCodeBlocks !== false;

    // Extract and preserve code blocks
    const codeBlocks = [];
    let workingContext = context;

    if (preserveCode) {
      workingContext = context.replace(/```[\s\S]*?```/g, (match) => {
        const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`;
        codeBlocks.push(match);
        return placeholder;
      });
    }

    // Split into sentences
    const sentences = this.splitIntoSentences(workingContext);

    if (sentences.length === 0) {
      return context;
    }

    // Score each sentence
    const scoredSentences = sentences.map((sentence, index) => ({
      text: sentence,
      score: this.scoreSentenceImportance(sentence, sentences),
      index
    }));

    // Sort by score (descending) and select top sentences
    const targetCount = Math.max(1, Math.ceil(sentences.length * targetRatio));
    const topSentences = scoredSentences
      .sort((a, b) => b.score - a.score)
      .slice(0, targetCount)
      .sort((a, b) => a.index - b.index); // Restore original order

    // Reconstruct text
    let summarized = topSentences.map(s => s.text).join(' ');

    // Restore code blocks
    if (preserveCode) {
      codeBlocks.forEach((code, index) => {
        summarized = summarized.replace(`__CODE_BLOCK_${index}__`, code);
      });
    }

    return summarized;
  }

  /**
   * Template removal strategy - Remove boilerplate/repeated structures
   * @param {string} context - Context to clean
   * @param {object} options - Options
   * @returns {Promise<string>} Cleaned context
   */
  async removeTemplates(context, options = {}) {
    // Split into lines and filter out template lines
    const lines = context.split('\n');
    const cleanedLines = [];

    for (const line of lines) {
      const trimmed = line.trim();

      // Check if line matches any template pattern
      let isTemplate = false;

      for (const pattern of this.templatePatterns) {
        if (pattern.test(trimmed)) {
          isTemplate = true;
          break;
        }
      }

      if (!isTemplate && trimmed.length > 0) {
        cleanedLines.push(line);
      }
    }

    let cleaned = cleanedLines.join('\n');

    // Remove excessive whitespace
    cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
    cleaned = cleaned.replace(/[ \t]{2,}/g, ' ');

    // Detect and remove repeated structural patterns
    cleaned = this.removeRepeatedStructures(cleaned);

    return cleaned.trim();
  }

  /**
   * Hybrid strategy - Combine multiple strategies
   * @param {string} context - Context to compress
   * @param {object} options - Options
   * @returns {Promise<string>} Compressed context
   */
  async hybridCompress(context, options = {}) {
    let compressed = context;

    // Step 1: Remove templates/boilerplate
    compressed = await this.removeTemplates(compressed, options);

    // Step 2: Deduplicate similar sections
    compressed = await this.deduplicateContext(compressed, options);

    // Step 3: Prune low-value content
    compressed = await this.pruneContext(compressed, null, options);

    // Step 4: If still not meeting target, apply summarization
    const currentRatio = this.estimateTokens(compressed) / this.estimateTokens(context);
    const targetRatio = options.targetCompressionRatio || this.options.targetCompressionRatio;

    if (currentRatio > targetRatio) {
      // Adjust summarization target based on remaining compression needed
      const summarizationTarget = targetRatio / currentRatio;
      compressed = await this.summarizeContext(compressed, {
        ...options,
        targetCompressionRatio: summarizationTarget
      });
    }

    return compressed;
  }

  /**
   * Estimate token count (simple heuristic: ~4 chars per token)
   * @param {string} text - Text to estimate
   * @returns {number} Estimated token count
   */
  estimateTokens(text) {
    if (!text) return 0;

    // Simple heuristic: ~4 characters per token
    // More accurate for English text with mixed code
    const charCount = text.length;
    const wordCount = text.split(/\s+/).filter(w => w.length > 0).length;

    // Average of char-based and word-based estimates
    const charBasedEstimate = charCount / 4;
    const wordBasedEstimate = wordCount * 1.3; // ~1.3 tokens per word on average

    return Math.round((charBasedEstimate + wordBasedEstimate) / 2);
  }

  /**
   * Calculate compression metrics
   * @param {string} originalContext - Original context
   * @param {string} compressedContext - Compressed context
   * @returns {object} Metrics object
   */
  calculateMetrics(originalContext, compressedContext) {
    const originalTokens = this.estimateTokens(originalContext);
    const compressedTokens = this.estimateTokens(compressedContext);
    const tokensRemoved = originalTokens - compressedTokens;
    const compressionRatio = originalTokens > 0 ? 1 - (compressedTokens / originalTokens) : 0;

    return {
      originalTokens,
      compressedTokens,
      tokensRemoved,
      compressionRatio: parseFloat(compressionRatio.toFixed(3)),
      percentageReduction: parseFloat((compressionRatio * 100).toFixed(1))
    };
  }

  /**
   * Calculate quality score (based on information density)
   * @param {string} original - Original context
   * @param {string} compressed - Compressed context
   * @returns {number} Quality score (0 to 1)
   */
  calculateQualityScore(original, compressed) {
    if (!original || !compressed) return 0;

    // Extract key elements
    const getKeyElements = (text) => {
      const elements = {
        entities: (text.match(/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g) || []).length,
        codeBlocks: (text.match(/```[\s\S]*?```/g) || []).length,
        numbers: (text.match(/\b\d+(?:\.\d+)?\b/g) || []).length,
        keywords: (text.match(/\b[a-z]{5,}\b/gi) || []).length,
        sentences: this.splitIntoSentences(text).length
      };

      return elements;
    };

    const originalElements = getKeyElements(original);
    const compressedElements = getKeyElements(compressed);

    // Calculate retention ratios
    const retentionScores = [];

    for (const key of Object.keys(originalElements)) {
      if (originalElements[key] > 0) {
        const retention = compressedElements[key] / originalElements[key];
        retentionScores.push(Math.min(1, retention)); // Cap at 1.0
      }
    }

    // Average retention across all element types
    const avgRetention = retentionScores.length > 0
      ? retentionScores.reduce((a, b) => a + b, 0) / retentionScores.length
      : 0.5;

    // Penalize if too much compression (likely lost important info)
    const compressionRatio = this.calculateMetrics(original, compressed).compressionRatio;

    let qualityScore = avgRetention;

    if (compressionRatio > 0.8) {
      // Very high compression, penalize quality
      qualityScore *= 0.7;
    } else if (compressionRatio < 0.3) {
      // Low compression, slightly penalize (not doing much)
      qualityScore *= 0.9;
    }

    return parseFloat(qualityScore.toFixed(3));
  }

  // ============================================================================
  // Helper Methods
  // ============================================================================

  /**
   * Split text into chunks (paragraphs or logical sections)
   * @param {string} text - Text to split
   * @returns {Array<string>} Chunks
   */
  splitIntoChunks(text) {
    // Split by double newlines (paragraphs) or single newlines if no paragraphs
    let chunks = text.split(/\n\n+/).map(c => c.trim()).filter(c => c.length > 0);

    if (chunks.length === 0) {
      chunks = text.split(/\n/).map(c => c.trim()).filter(c => c.length > 0);
    }

    return chunks;
  }

  /**
   * Split text into sentences
   * @param {string} text - Text to split
   * @returns {Array<string>} Sentences
   */
  splitIntoSentences(text) {
    // Split on sentence boundaries, but preserve code blocks
    return text
      .split(/(?<=[.!?])\s+(?=[A-Z])/)
      .map(s => s.trim())
      .filter(s => s.length > 0);
  }

  /**
   * Calculate simple hash for quick duplicate detection
   * @param {string} text - Text to hash
   * @returns {string} Hash string
   */
  simpleHash(text) {
    const normalized = text.toLowerCase().replace(/\s+/g, '');
    let hash = 0;

    for (let i = 0; i < normalized.length; i++) {
      const char = normalized.charCodeAt(i);
      hash = ((hash << 5) - hash) + char;
      hash = hash & hash; // Convert to 32-bit integer
    }

    return hash.toString(36);
  }

  /**
   * Check if section is important (should be preserved)
   * @param {string} text - Text to check
   * @returns {boolean} True if important
   */
  isImportantSection(text) {
    const lower = text.toLowerCase();

    // Check for important markers
    for (const marker of this.importantMarkers) {
      if (lower.includes(marker)) {
        return true;
      }
    }

    // Preserve code blocks
    if (text.includes('```') || text.match(/^\s*[a-z_$][a-z0-9_$]*\s*[=:({]/i)) {
      return true;
    }

    // Preserve numbered lists and bullet points
    if (text.match(/^\s*[\d.)\-*•]\s+/)) {
      return true;
    }

    return false;
  }

  /**
   * Score sentence importance for extractive summarization
   * @param {string} sentence - Sentence to score
   * @param {Array<string>} allSentences - All sentences for context
   * @returns {number} Importance score
   */
  scoreSentenceImportance(sentence, allSentences) {
    let score = 0;

    // Length factor (prefer medium-length sentences)
    const words = sentence.split(/\s+/).filter(w => w.length > 0);
    const wordCount = words.length;

    if (wordCount >= 5 && wordCount <= 25) {
      score += 0.3;
    } else if (wordCount > 25) {
      score += 0.1;
    }

    // Position factor (first and last sentences often important)
    const index = allSentences.indexOf(sentence);
    if (index === 0 || index === allSentences.length - 1) {
      score += 0.2;
    }

    // Content factors
    const lower = sentence.toLowerCase();

    // Important markers
    for (const marker of this.importantMarkers) {
      if (lower.includes(marker)) {
        score += 0.3;
        break;
      }
    }

    // Named entities (capitalized words)
    const entities = sentence.match(/\b[A-Z][a-z]+/g);
    if (entities && entities.length > 0) {
      score += 0.1 * Math.min(entities.length, 3);
    }

    // Numbers and data
    if (sentence.match(/\b\d+/)) {
      score += 0.1;
    }

    // Technical terms and keywords
    const technicalTerms = sentence.match(/\b[a-z_$][a-z0-9_$]*\s*\(/gi);
    if (technicalTerms && technicalTerms.length > 0) {
      score += 0.15;
    }

    // Code snippets
    if (sentence.includes('```') || sentence.match(/[{}[\]();]/)) {
      score += 0.2;
    }

    return score;
  }

  /**
   * Remove repeated structural patterns
   * @param {string} text - Text to process
   * @returns {string} Text with repeated structures removed
   */
  removeRepeatedStructures(text) {
    // Detect repeated patterns (e.g., repeated headers, footers)
    const lines = text.split('\n');
    const lineFrequency = {};

    // Count line occurrences (normalized)
    for (const line of lines) {
      const normalized = line.trim().toLowerCase();

      if (normalized.length > 10) { // Only count substantial lines
        lineFrequency[normalized] = (lineFrequency[normalized] || 0) + 1;
      }
    }

    // Find highly repeated lines (appear 3+ times)
    const repeatedLines = new Set();

    for (const [line, count] of Object.entries(lineFrequency)) {
      if (count >= 3) {
        repeatedLines.add(line);
      }
    }

    // Remove repeated occurrences (keep first instance)
    const seenRepeated = new Set();
    const filtered = [];

    for (const line of lines) {
      const normalized = line.trim().toLowerCase();

      if (repeatedLines.has(normalized)) {
        if (!seenRepeated.has(normalized)) {
          filtered.push(line);
          seenRepeated.add(normalized);
        }
        // Skip subsequent occurrences
      } else {
        filtered.push(line);
      }
    }

    return filtered.join('\n');
  }
}

/**
 * Factory function to create a compressor instance
 * @param {object} storage - Storage instance (optional)
 * @param {object} options - Configuration options
 * @returns {ContextCompressor} Compressor instance
 */
export function createCompressor(storage = null, options = {}) {
  return new ContextCompressor(storage, options);
}
