import { getDocument } from 'pdfjs-dist'
import OpenAI from 'openai'
import { zodResponseFormat } from 'openai/helpers/zod'
import { z } from 'zod'
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
import { AttributeStructure } from '@core/domain/models/data-extraction-plan-attribute'
import { useStorage } from '@vueuse/core'

export function useDataExtractionAssistant(studyId: number) {
  const model = 'gpt-4o'
  const extraction = useStorage<
    {
      question: string
      answers: {
        answer: string
        originalText: string
      }[]
      id: string
    }[]
  >('bulk-extraction-' + studyId, [])

  const systemPrompt = useStorage<string>(
    'system-prompt',
    `You are tasked with extracting information from the provided document to answer specific questions.
Rely solely on the document's content; do not use external sources.
If the document does not contain an answer, respond with a single dash "-".`,
  )

  const summarySystemPrompt = useStorage<string>(
    'summary-system-prompt',
    `You are tasked with summarizing the following question-answer pairs extracted from a document.
Provide concise summaries for each question based solely on the provided answers.
If no answers are available, respond with a dash "-".`,
  )

  const modelTemperature = useStorage<number>('model-temperature', 0.2)
  const modelPresencePenalty = useStorage<number>('model-presence-penalty', 0)

  const openai = new OpenAI({
    baseURL: location.protocol + '//' + location.host + '/ai-assistant',
    dangerouslyAllowBrowser: true,
    apiKey: '',
  })

  const DataExtraction = z.object({
    questionAnswerPairs: z.array(
      z.object({
        question: z.string(),
        answer: z.string(),
        id: z.string(),
      }),
    ),
  })

  async function generate(pdf: Blob, attributes: AttributeStructure[]) {
    const blob = pdf
    const dataUrl = URL.createObjectURL(blob)
    const pdfDocument = await getDocument(dataUrl).promise
    let pdfContent = ''
    for (let i = 1; i <= pdfDocument.numPages; i++) {
      const page = await pdfDocument.getPage(i)
      const textContent = await page.getTextContent()
      pdfContent += textContent.items
        .map(function (s: any) {
          return s?.str ?? ''
        })
        .join(' ')
    }

    const textSplitter = new RecursiveCharacterTextSplitter({
      chunkSize: 20000,
      chunkOverlap: 1000,
    })
    const texts = await textSplitter.splitText(pdfContent)
    const questions = JSON.stringify(attributes)
    const promises = texts.map(async (text) => {
      const response = await openai.beta.chat.completions.parse({
        model,
        temperature: modelTemperature.value,
        presence_penalty: modelPresencePenalty.value,
        messages: [
          {
            role: 'developer',
            content: systemPrompt.value,
          },
          {
            role: 'user',
            content: questions + ' Document: ' + text,
          },
        ],
        response_format: zodResponseFormat(DataExtraction, 'data_extraction'),
      })
      return response.choices[0].message.parsed?.questionAnswerPairs.map(
        (v: any) => {
          return {
            question: v.question,
            answer: v.answer,
            id: v.id,
            originalText: text,
          }
        },
      )
    })

    const response = await Promise.all(promises)
    const mergedAnswers = response.flat().filter((a) => a) as {
      question: string
      answer: string
      id: string
      originalText: string
    }[]

    const mergedById = mergedAnswers.reduce<{
      [key: string]: {
        id: string
        question: string
        answers: {
          answer: string
          originalText: string
        }[]
      }
    }>((acc, { id, question, answer, originalText }) => {
      if (!acc[id]) {
        acc[id] = {
          id,
          question,
          answers: [
            {
              answer,
              originalText,
            },
          ],
        }
      } else {
        acc[id].answers.push({
          answer,
          originalText,
        })
      }
      return acc
    }, {})

    extraction.value = Object.values(mergedById)
    const summaryPromptData = extraction.value.map((v) => {
      return {
        question: v.question,
        answers: v.answers.map((a) => a.answer),
        id: v.id,
      }
    })

    const userPrompt = JSON.stringify(summaryPromptData, null, 2)
    const summaryResponse = await openai.beta.chat.completions.parse({
      model,
      temperature: modelTemperature.value,
      presence_penalty: modelPresencePenalty.value,
      messages: [
        {
          role: 'developer',
          content: summarySystemPrompt.value,
        },
        {
          role: 'user',
          content: userPrompt,
        },
      ],
      response_format: zodResponseFormat(DataExtraction, 'data_extraction'),
    })
    const summarizedAnswers =
      summaryResponse.choices[0].message.parsed?.questionAnswerPairs.map(
        (v: any) => {
          return {
            question: v.question,
            answer: v.answer,
            id: v.id,
          }
        },
      )
    return summarizedAnswers
  }

  return {
    generate,
    extraction,
    systemPrompt,
    modelTemperature,
    modelPresencePenalty,
    summarySystemPrompt,
  }
}
