import subprocess
import sys
import nltk
from nltk.corpus import wordnet

# List of required packages (for documentation or verification purposes)
REQUIRED_PACKAGES = ["nltk"]
def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")
# Ensure required packages are installed
install_packages()

# Download required NLTK resources (run once)
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')

def parse_word_pos(tagged_words, set_stop_word=None, set_negation_word=None, UNK='x', return_string=False, join_negation=False):
    """
    Parses a list of (WORD, POSTAG) tuples and checks if the WORD exists in WordNet.
    - If the word exists in WordNet, keep it.
    - If the word is in set_negation_word, retain it.
    - If the word is a stopword, return (UNK, POS).
    - Otherwise, return (UNK, POS).

    If join_negation=True, then if the next word after a negation word is not UNK, 
    join the negation word and next word with '_'.

    If return_string=True, return a space-separated string of words instead of tuples.

    :param tagged_words: List of tuples (word, postag)
    :param set_stop_word: Set of words to be treated as stopwords (replaced with UNK).
    :param set_negation_word: Set of words considered negations (retain them in the output).
    :param UNK: Token to replace unknown words or stopwords (default='x').
    :param return_string: If True, returns a string of words instead of tuples.
    :param join_negation: If True, joins negation words with the next valid word.
    :return: List of (word or UNK, POS) tuples OR a string of words.
    """
    if set_stop_word is None:
        set_stop_word = set()  # Default to an empty set

    if set_negation_word is None:
        set_negation_word = set()  # Default to an empty set

    # Mapping NLTK POS tags to WordNet POS tags
    pos_map = {
        'NN': wordnet.NOUN,  # Noun
        'NNS': wordnet.NOUN,
        'VB': wordnet.VERB,  # Verb
        'VBD': wordnet.VERB,
        'VBG': wordnet.VERB,
        'VBN': wordnet.VERB,
        'VBP': wordnet.VERB,
        'VBZ': wordnet.VERB,
        'JJ': wordnet.ADJ,  # Adjective
        'JJR': wordnet.ADJ,
        'JJS': wordnet.ADJ,
        'RB': wordnet.ADV,  # Adverb
        'RBR': wordnet.ADV,
        'RBS': wordnet.ADV
    }

    parsed_words = []
    i = 0

    while i < len(tagged_words):
        word, pos = tagged_words[i]

        # If the word is a negation word
        if word in set_negation_word:
            # Check if join_negation is enabled and there's a next word
            if join_negation and (i + 1) < len(tagged_words):
                next_word, next_pos = tagged_words[i + 1]
                wn_pos = pos_map.get(next_pos, None)

                # Check if the next word exists in WordNet and is not UNK
                if wn_pos and wordnet.synsets(next_word, pos=wn_pos):
                    combined_word = f"{word}_{next_word}"
                    parsed_words.append((combined_word, next_pos))
                    i += 2  # Skip the next word since it's now combined
                    continue

            # If not joining or next word is not valid, retain negation word
            parsed_words.append((word, pos))
            i += 1
            continue

        # If the word is in the stopword list, return UNK with its POS
        if word in set_stop_word:
            parsed_words.append((UNK, pos))
            i += 1
            continue

        wn_pos = pos_map.get(pos, None)  # Get corresponding WordNet POS
        
        if wn_pos:
            synsets = wordnet.synsets(word, pos=wn_pos)  # Retrieve synsets for the correct POS
            if synsets:
                parsed_words.append((word, pos))  # If a matching synset exists, keep the word
            else:
                parsed_words.append((UNK, pos))  # If no synset matches, return UNK with POS
        else:
            parsed_words.append((UNK, pos))  # If POS doesn't map to WordNet, return UNK with POS

        i += 1

    if return_string:
        return " ".join([word for word, _ in parsed_words])  # Convert to space-separated string

    return parsed_words