import { pdfjs } from 'react-pdf';

pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.js`;

const pdfDocumentOptions: any = {
    disableCombineTextItems: true, // Prevent combining adjacent text items
    includeBOM: false,              // Include Byte Order Mark character
    normalizeWhitespace: true,     // Normalize whitespace characters
    disableNewlines: false,        // Enable newline characters
    disableTrimText: true,        // Enable trimming of whitespace
};

function getDocumentText(resultString: string, page: number, setNewTextFormatted: any) {
    pdfjs.getDocument({ url: resultString })
    .promise.then(async (pdfDocument) => {
        pdfDocument.getPage(page)
            .then((page) => {
            return page.getTextContent(pdfDocumentOptions);
            })
            .then((textContent) => {
              var text = splitText(textContent)
              formatText(text, setNewTextFormatted, page);
            })
            .catch((error) => {
            console.error('Error extracting text from page:', error);
            }); 
    });
  }

  const itemPush = (item: any, array: any) => {
if(item.str !== ''){
      array.push(item);
}
  }

  const splitText = (textContent: any) => {
    // Split the pdf text into different lines
    var text: any[] = []
    var prev: any[] = [];

    for (var i = 0; i < textContent.items.length; i++) {
      var item = (textContent.items[i] as any);
      
      // Check if the text is on a different line
      if(prev.length > 0 && (item.transform[5] !== prev[prev.length-1].transform[5] || i === textContent.items.length-1)){

        // Calculate the new string
        const concatenatedString = prev.map(obj => obj.str).join(' ');

        // Calculate the new width
        let newWidth = 0;
        prev.forEach(element => {
          newWidth += element.width;        
        });

        // Alter existing and push the last complete line in
        let aItem = {...prev[0]};
        aItem.str = concatenatedString;
        aItem.width = newWidth;
        itemPush(aItem, text);
        
        // Reset previous line items
        prev = [];
        itemPush(item, prev);
        if(i === textContent.items.length-1){
          itemPush(prev[0], text);
        }
      }else{
        itemPush(item, prev);
      }
    }

    return text
  }

  const recursion = (periodSplitText: any[], item: any) => {
    const pattern = /(?<=(?<!Mr|Mrs)[\.\!\?])/g;
    const indices = [...item.str.matchAll(pattern)].map(match => match.index);

    if(indices.length > 0){
      const position = indices[0];
      
      // Create new item left of .
      let leftString = item.str.substring(0, position)
      let leftWidth = (item.width * ((position)/item.str.length)); // need to remove white spaces as well
      let leftItem = JSON.parse(JSON.stringify(item));
      leftItem.str = leftString
      leftItem.width = leftWidth;
      itemPush(leftItem, periodSplitText)

      // Call itself on the right side
      let rightString = item.str.substring(position)
      let rightWidth = item.width - leftWidth;
      
      let rightItem = JSON.parse(JSON.stringify(item));
      rightItem.str = rightString
      rightItem.width = rightWidth;
      rightItem.transform[4] += leftWidth;
      recursion(periodSplitText, rightItem);
    }else{
      itemPush(item, periodSplitText)
    }
  }

  const formatText = (text: any[], setNewTextFormatted: any, page: number) => {
    const periodSplitText: any[] = [];

    const punctuationRegex = /[!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]/g;
    for(let i=0; i<text.length; i++){
      let item = text[i]
      recursion(periodSplitText, item)
    }

    let prev: any[] = [];
    let newTextFormattedTemp: {[key: string]: any[]} = {};
    for (let i=0; i< periodSplitText.length; i++){
      const pattern = /(?<=(?<!Mr|Mrs)[.!?])/g;
      const indices = [...periodSplitText[i].str.matchAll(pattern)].map(match => match.index);

      var sentenceItem = periodSplitText[i]
      sentenceItem['page'] = page
      if(indices.length > 0 || periodSplitText.length-1 === i){
        prev.push(sentenceItem)
        const concatenatedString = prev.map(item => item.str).join(' ').replace(punctuationRegex, '');;
        newTextFormattedTemp[concatenatedString] = prev;
        prev = [];
      }else{
        prev.push(sentenceItem)
      }
    }

    setNewTextFormatted((prevValue: any) => {
      return { ...prevValue, ...newTextFormattedTemp };
    });
  }

  export default getDocumentText;