interface CheckboxItem {
  label: string;
  value: string;
  description: string;
}
export interface ILanguageModelOption {
  name: string;
  items?: CheckboxItem[];
}

export interface IStringObject {
  [key: string]: string;
}

export const languageModelsOptions: ILanguageModelOption[] = [
  {
    name: 'Heuristic Algorithms',
    items: [
      {
        label: 'LexRank',
        value: 'heuristic_lex_rank',
        description:
          "It is a graph-based method for text summarization. It represents the sentences of a document as nodes in a graph. The edges between these nodes are based on the similarity between sentences, calculated using a measure like cosine similarity. It then uses a method similar to Google's PageRank algorithm to determine the importance of each sentence. The most important sentences are used in the summary.",
      },
      {
        label: 'Luhn',
        value: 'heuristic_luhn',
        description:
          'The Luhn algorithm is an older, heuristic-based method.It relies on the frequency of words to identify significant sentences. It assumes that the frequency of certain words in a sentence can determine its importance. It also uses a fixed-size window to find clusters of frequent words. Luhn is like looking for hotspots in a conversation. It picks sentences that have commonly used important words, thinking these sentences are likely to be key points in the text.',
      },
      {
        label: 'LSA (Latent Semantic Analysis)',
        value: 'heuristic_lsa',
        description:
          'LSA is a advance version of Luhn. Utilizing singular value decomposition (SVD) on a term-sentence matrix to reduce its dimensionality. This process helps in identifying patterns and relationships between terms and sentences, allowing the algorithm to determine the main topics in the text and select sentences that best represent these topics.',
      },
      {
        label: 'TextRank',
        value: 'heuristic_text_rank',
        description:
          'Similar to LexRank, TextRank is also a graph-based method. It builds a graph where nodes are sentences, and edges represent the similarity between sentences. It uses a weighted graph and a voting or recommendation system where each sentence adds to the importance of sentences similar to it. TextRank is like asking each sentence in a document to vote on the importance of other sentences. Sentences that get more votes are considered more important and are likely to be included in the summary.',
      },
    ],
  },
  {
    name: 'LLM Models',
    items: [
      {
        label: 'GPT',
        value: 'llm_mistral',
        description:
          'GPT is a language model that uses deep learning to generate human-like text. It is trained on a diverse range of internet text and can perform a variety of natural language processing tasks, including text summarization. GPT is known for its ability to generate coherent and contextually relevant text, making it a powerful tool for summarizing long documents or articles.',
      },
    ],
  },
  {
    name: 'Transformer Models',
    items: [
      {
        label: 'Bert-based extractive summarizer',
        value: 'transformers_bert',
        description:
          'This method uses a pre-trained bert language model. BERT (Bidirectional Encoder Representations from Transformers) analyze text in both directions (left to right and right to left) to understand the context better. It analyzes the text to find which sentences are most representative of the overall content, based on factors like how often certain words are used and how sentences relate to each other within the context of the text. BERT pre-trained model has learned to extract the important information. ',
      },
      {
        label: 'Falconsai (Flan-T5 based)',
        value: 'transformers_flan_t5',
        description:
          'This method also uses a pre-trained FlanT5 language model. Falconsai is fine-tuned version of Flan-T5 (Text-to-Text Transfer Transformer) model specifically trained for summarization task. Like BERT, it also helps to generate the extractive summary.',
      },
    ],
  },
  {
    name: 'Naive Algorithms',
    items: [
      {
        label: 'NLTK',
        value: 'naive_nltk',
        description:
          'The Natural Language Toolkit (NLTK) is an open-source Python package for natural language processing. It provides tools for various natural language processing tasks, including tokenization, part-of-speech tagging, and parsing, which can be used for text summarization through custom algorithms.',
      },
    ],
  },
];

export const languageModels: IStringObject = {
  heuristic_lex_rank: 'LexRank',
  heuristic_luhn: 'Luhn',
  heuristic_lsa: 'LSA',
  heuristic_text_rank: 'TextRank',
  llm_mistral: 'GPT',
  transformers_bert: 'Bert-based extractive summarizer',
  transformers_flan_t5: 'Falconsai (Flan-T5 based)',
  naive_nltk: 'NLTK',
  existing_summary: 'Existing Summary',
};

export const SummaryMetrics: IStringObject = {
  readability_score: 'Readability Score',
  topic_coherence: 'Topic Coherence',
  compression_ratio: 'Compression Ratio',
  sentence_similarity: 'Sentence Similarity',
};

export const stepperSteps = ['Provide input', 'Summarization methods', 'Outputs'];
