import { read, utils } from 'xlsx';
import mammoth from 'mammoth';
import * as pdfjs from 'pdfjs-dist'
import { TextItem } from 'pdfjs-dist/types/src/display/api'

// Initialize PDF.js with the worker
pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.js`

export const readFileContent = async (file: File): Promise<string> => {
  const extension = file.name.split('.').pop()?.toLowerCase()

  switch (extension) {
    case 'txt':
      return await file.text()
      
    case 'docx':
      const docxBuffer = await file.arrayBuffer()
      const docxResult = await mammoth.convertToHtml({ arrayBuffer: docxBuffer })
      return docxResult.value
      
    case 'pdf':
      return await readPDFContent(file)
      
    case 'rtf':
      return await readRTFContent(file)
      
    case 'md':
    case 'markdown':
      return await file.text()
      
    case 'html':
    case 'htm':
      return await readHTMLContent(file)
      
    case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
      const wordBuffer = await file.arrayBuffer();
      const result = await mammoth.convertToHtml({ arrayBuffer: wordBuffer });
      return result.value;
      
    case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
    case 'application/vnd.ms-excel':
      const excelBuffer = await file.arrayBuffer();
      const workbook = read(excelBuffer);
      const firstSheet = workbook.Sheets[workbook.SheetNames[0]];
      const data = utils.sheet_to_json(firstSheet);
      return JSON.stringify(data, null, 2);
      
    default:
      throw new Error('Unsupported file format')
  }
};

async function readPDFContent(file: File): Promise<string> {
  try {
    const arrayBuffer = await file.arrayBuffer()
    const loadingTask = pdfjs.getDocument(new Uint8Array(arrayBuffer))
    const pdf = await loadingTask.promise
    
    let fullText = ''
    
    // Get total pages
    const numPages = pdf.numPages
    
    // Extract text from each page
    for (let i = 1; i <= numPages; i++) {
      const page = await pdf.getPage(i)
      const textContent = await page.getTextContent()
      const pageText = textContent.items
        .map((item) => (item as TextItem).str)
        .join(' ')
      fullText += pageText + '\n\n'
    }

    return fullText.trim()
  } catch (error) {
    console.error('Error reading PDF:', error)
    throw new Error('Failed to read PDF file. The file might be corrupted or password protected.')
  }
}

async function readRTFContent(file: File): Promise<string> {
  try {
    const text = await file.text()
    // Basic RTF to text conversion - you might want to use a more robust library
    return text
      .replace(/[\\](?:rtf[\\]|[a-z]+)[0-9]?/g, '') // Remove RTF commands
      .replace(/[{}]/g, '') // Remove braces
      .trim()
  } catch (error) {
    throw new Error('Failed to read RTF file')
  }
}

async function readHTMLContent(file: File): Promise<string> {
  try {
    const text = await file.text()
    const parser = new DOMParser()
    const doc = parser.parseFromString(text, 'text/html')
    return doc.body.textContent || ''
  } catch (error) {
    throw new Error('Failed to read HTML file')
  }
}