import os
import sys
import subprocess
import pyzipper
import requests
import re
from pathlib import Path
import pandas as pd

# List of required packages (for documentation or verification purposes)
REQUIRED_PACKAGES = ["os", "pyzipper", "requests", "re", "subprocess","pandas"]
def install_packages():
    """Install missing required packages."""
    for package in REQUIRED_PACKAGES:
        try:
            __import__(package)
        except ImportError:
            print(f"Package '{package}' is missing. Attempting to install...")
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
            except Exception as e:
                print(f"Failed to install package '{package}': {e}")

# Ensure required packages are installed
install_packages()

import os
import pyzipper

import os
import pyzipper
from datetime import datetime

def zip_files(source_path, zip_file_path=None, timestamp=False, password=None):
    """
    Compress a file or directory into a zip archive, with optional AES encryption and timestamped naming.
    If zip_file_path is not provided, creates the zip in the parent directory of the source.

    Parameters:
        source_path (str): Path to the file or directory to zip.
        zip_file_path (str): Optional full path to the resulting zip file.
        timestamp (bool): If True, appends _{yyyyddmmThhmm} to the zip filename.
        password (str): Optional password for AES encryption.

    Returns:
        str or None: Path to the created zip file, or None on failure.
    """
    try:
        # Resolve source path
        source_path = os.path.abspath(source_path)
        if not os.path.exists(source_path):
            raise FileNotFoundError(f"Source path does not exist: {source_path}")
        print(f"[INFO] Source path: {source_path}")

        # Determine base name and destination directory
        source_name = os.path.basename(os.path.normpath(source_path))
        name_suffix = f"_{datetime.now().strftime('%Y%m%dT%H%M')}" if timestamp else ""
        zip_file_name = f"{source_name}{name_suffix}.zip"

        if zip_file_path is None:
            parent_dir = os.path.dirname(source_path)
            zip_file_path = os.path.join(parent_dir, zip_file_name)
        else:
            zip_file_path = os.path.abspath(zip_file_path)

        print(f"[INFO] Output zip path: {zip_file_path}")

        # Ensure destination directory exists
        zip_dir = os.path.dirname(zip_file_path)
        if zip_dir and not os.path.exists(zip_dir):
            os.makedirs(zip_dir, exist_ok=True)

        # Create the zip file
        with pyzipper.AESZipFile(zip_file_path, 'w', compression=pyzipper.ZIP_DEFLATED) as zf:
            if password:
                zf.setpassword(password.encode('utf-8'))
                zf.encryption = pyzipper.WZ_AES

            if os.path.isdir(source_path):
                for root, _, files in os.walk(source_path):
                    for file in files:
                        file_path = os.path.join(root, file)
                        arcname = os.path.relpath(file_path, start=source_path)
                        print(f"[ZIP] Adding: {file_path} -> {arcname}")
                        zf.write(file_path, arcname)
            else:
                print(f"[ZIP] Adding file: {source_path}")
                zf.write(source_path, os.path.basename(source_path))

        print(f"[SUCCESS] Zip created at: {zip_file_path}")
        return zip_file_path

    except Exception as e:
        print(f"[ERROR] Failed to create zip: {e}")
        return None





# Function to create a password-protected zip file containing multiple CSV files
def zip_multiple_files(csv_paths, zip_file_path, password):
    """
    Create a password-protected zip file containing multiple CSV files.
    """
    try:
        if not csv_paths:
            raise ValueError("The list of CSV paths is empty.")
        if not password:
            raise ValueError("Password cannot be empty.")

        os.makedirs(os.path.dirname(zip_file_path), exist_ok=True)

        with pyzipper.AESZipFile(zip_file_path, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=pyzipper.WZ_AES) as zf:
            zf.setpassword(password.encode('utf-8'))
            for csv_path in csv_paths:
                if not os.path.exists(csv_path):
                    raise FileNotFoundError(f"File not found: {csv_path}")
                if not csv_path.lower().endswith('.csv'):
                    raise ValueError(f"File is not a CSV: {csv_path}")
                zf.write(csv_path, os.path.basename(csv_path))

        # Display success message
        print(f"Successfully created zip file at '{zip_file_path}'.")
        return zip_file_path  # Return the zip file path on success

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None  # Return None on error


# Function to unzip a password-protected or regular zip file
def unzip(zip_file_path, target_path=None, password=None):
    """
    Unzip a password-protected or regular zip file to a specified target directory.

    If only one file is unzipped, return the file name.
    If multiple files are unzipped, return a list of file names.
    """
    try:
        if not os.path.exists(zip_file_path):
            raise FileNotFoundError(f"Zip file not found: {zip_file_path}")

        if target_path is None:
            target_path = os.path.dirname(zip_file_path)

        os.makedirs(target_path, exist_ok=True)

        with pyzipper.AESZipFile(zip_file_path, 'r') as zf:
            if password is not None:
                zf.setpassword(password.encode())
            zf.extractall(path=target_path)

            # Get a list of all extracted files
            extracted_files = zf.namelist()

        # Display success message
        if extracted_files:
            if len(extracted_files) == 1:
                print("File extracted successfully:", extracted_files[0])
                return extracted_files[0]  # Return single file name if only one file extracted
            else:
                print("Files extracted successfully:", extracted_files)
                return extracted_files  # Return list of file names if multiple files extracted

    except RuntimeError:
        print("ERROR: Incorrect password or failed to extract files. Please check the password and try again.")
        return None  # Return None on failure
    except Exception as e:
        print(f"ERROR: An unexpected error occurred: {str(e)}")
        return None  # Return None on other errors

# Function to download a file from Google Drive and save it with its actual name
def download_gdrive_file(url, save_folder="downloads"):
    """
    Downloads a file from a Google Drive URL and saves it with its actual name.
    """
    try:
        # Extract the file ID from the URL using regex
        match = re.search(r"/d/([a-zA-Z0-9_-]+)", url)
        if not match:
            raise ValueError("Invalid Google Drive URL")
        file_id = match.group(1)

        # Generate the direct download URL
        download_url = f"https://drive.google.com/uc?id={file_id}&export=download"

        # Create the save folder if it doesn't exist
        os.makedirs(save_folder, exist_ok=True)

        # Fetch the HTML content to extract the file name
        session = requests.Session()
        response = session.get(f"https://drive.google.com/file/d/{file_id}/view", allow_redirects=True)
        if response.status_code != 200:
            raise RuntimeError(f"Failed to fetch file metadata: HTTP {response.status_code}")

        # Extract file name from the HTML title or fallback
        file_name_match = re.search(r'<title>(.*?)</title>', response.text)
        if file_name_match:
            file_name = file_name_match.group(1).replace(" - Google Drive", "").strip()
        else:
            file_name = f"file_{file_id}"  # Fallback to a default file name

        # Full path to save the file
        save_path = os.path.join(save_folder, file_name)

        # Use gdown to download the file
        subprocess.run(["gdown", download_url, "-O", save_path], check=True)

        # Display success message
        print(f"Successfully downloaded file to '{save_path}'.")
        return save_path  # Return the path of the downloaded file
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None  # Return None on error
    
def download_file(url, save_folder="downloads"):
    # Extract the file name from the URL
    file_name = os.path.basename(url)
    

    # Create the save folder if it doesn't exist
    os.makedirs(save_folder, exist_ok=True)

    # Full path to save the file
    save_path = os.path.join(save_folder, file_name)

    # # Local path to save the file
    # local_path = os.path.join(dir_import, file_name)
    
    try:
        # Download the file
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)
        
        # Save the content to a local file
        with open(save_path, 'wb') as file:
            file.write(response.content)
        
        print("File downloaded successfully:", save_path)
        return save_path  # Return the file name after successful download
    
    except requests.exceptions.HTTPError as http_err:
        print("HTTP error occurred:", http_err)
        return None  # Return None if there was an HTTP error
    except Exception as err:
        print("An error occurred:", err)
        return None  # Return None for any other errors


def save_dfdataset(df, path, file_name, file_format, password):
    """
    Saves a DataFrame to a file and creates a password-protected zip archive.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        path (str): Name of the dataset folder.
        file_name (str): Name of the file to save (without extension).
        file_format (str): Format to save the file ('parquet').
        password (str): Password for the zip file.

    Returns:
        None
    """
    # Define directories and file paths
    output_dir = Path(path)
    output_dir.mkdir(parents=True, exist_ok=True)
    zip_dir = output_dir / f"zip"
    zip_dir.mkdir(parents=True, exist_ok=True)


    # Save the dataset
    full_file_name = f"{file_name}.{file_format}"
    file_path = output_dir / full_file_name
    print(f"Dataset will be saved at: {file_path}")

    # Save the DataFrame
    try:
        if file_format == "parquet":
            df.to_parquet(file_path)
        elif file_format == "csv":
            df.to_csv(file_path, header=True, index=False)
        else:
            raise ValueError(f"Unsupported file format: {file_format}")
        print(f"File saved successfully as {file_format}.")
    except ValueError as e:
        print(e)

    # Create a zip archive with password protection
    zip_path = zip_dir / f"{file_name}.zip"
    print(f"Dataset will be zipped at: {zip_path}")
    zip_files(file_path, zip_file_path=zip_path, password=password)

    print("Save and zip completed successfully.")