import os
import shutil
import time
import re
import requests
import PyPDF2
import anthropic
import hashlib
from pathlib import Path
import bid_prep_automation as bpa
import bid_queries as bq

import argparse
import sys
import json

from google import genai
from google.genai import types

from document_extractor import extract_documents_text_compatible, create_document_extractor

# Configuration
COMPANY_INFO_DOC = "/Path/to/Company/Info/summary"
ANTHROPIC_API_KEY = "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"
GEMINI_API_KEY = "AIzaSyCzr6L3E8yywy8Ls2errRBOPx740VcjV1g"

# Initialize Gemini client
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
# Initialize Claude client
claude_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

llm_model = "claude" #"gemini" #

class color:
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

def extract_links_from_pdf(pdf_path):
    """Extract embedded hyperlinks from PDF files, skipping specific display text"""
    # Define the display texts to skip
    skip_phrases = [
        'attached categories',
        'General Terms and Conditions',
        'Service Level Agreement'
    ]
    
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)

            # Initialize list to store all links
            links = []

            # Extract links from each page
            for page_num, page in enumerate(pdf_reader.pages):
                # Check if we're in a disclaimer section - simple text-based check
                page_text = page.extract_text().lower()
                if "disclaimer" in page_text:
                    continue  # Skip disclaimer sections

                # Access annotations (which include hyperlinks)
                if '/Annots' in page:
                    annotations = page['/Annots']
                    if annotations:
                        # Process each annotation
                        for annotation in annotations:
                            annotation_object = annotation.get_object()
                            # Check if it's a link annotation
                            if annotation_object.get('/Subtype') == '/Link':
                                # Extract the actual URL
                                if '/A' in annotation_object and '/URI' in annotation_object['/A']:
                                    uri = annotation_object['/A']['/URI']
                                    if isinstance(uri, str):
                                        # Check if this link should be skipped based on its display text
                                        skip_link = False
                                        
                                        # Try to get the display text from the annotation
                                        display_text = None
                                        
                                        # Look for text in various annotation properties
                                        if '/Contents' in annotation_object:
                                            display_text = annotation_object['/Contents']
                                        elif '/T' in annotation_object:
                                            display_text = annotation_object['/T']
                                        elif '/TU' in annotation_object:
                                            display_text = annotation_object['/TU']
                                        
                                        # If we still don't have display text, try to extract it from the annotation's text
                                        if not display_text and '/Rect' in annotation_object:
                                            try:
                                                # Get text in the annotation rectangle
                                                rect = annotation_object['/Rect']
                                                # Extract text from the page and look for text near the rectangle
                                                page_text = page.extract_text()
                                                # This is a simplified approach - in practice, you might need more sophisticated text extraction
                                                display_text = page_text
                                            except:
                                                pass
                                        
                                        # Check if display text contains any skip phrases
                                        if display_text and isinstance(display_text, str):
                                            display_text_lower = display_text.lower()
                                            for skip_phrase in skip_phrases:
                                                if skip_phrase.lower() in display_text_lower:
                                                    skip_link = True
                                                    break
                                        
                                        # Only add the link if it shouldn't be skipped
                                        if not skip_link:
                                            links.append({
                                                'url': uri,
                                                'page': page_num + 1
                                            })

                # Alternative method for newer PyPDF2 versions
                try:
                    page_links = page.get_links()
                    for link in page_links:
                        if hasattr(link, 'url') and link.url:
                            # Check if this link should be skipped
                            skip_link = False
                            
                            # Try to get display text for this link
                            if hasattr(link, 'text'):
                                display_text = link.text
                                if display_text and isinstance(display_text, str):
                                    display_text_lower = display_text.lower()
                                    for skip_phrase in skip_phrases:
                                        if skip_phrase.lower() in display_text_lower:
                                            skip_link = True
                                            break
                            
                            # Only add the link if it shouldn't be skipped
                            if not skip_link:
                                links.append({
                                    'url': link.url,
                                    'page': page_num + 1
                                })
                                
                except (AttributeError, TypeError):
                    # get_links method not available or failed
                    pass

        # Fall back to text extraction for visible URLs if no embedded links found
        if not links:
            import re
            for page_num, page in enumerate(pdf_reader.pages):
                page_text = page.extract_text()

                # Skip disclaimer sections
                if "disclaimer" in page_text.lower():
                    continue

                # Extract URLs from text as a fallback
                # Split text into lines to analyze context
                lines = page_text.split('\n')
                
                for i, line in enumerate(lines):
                    urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', line)
                    for url in urls:
                        # Check if the line or surrounding lines contain skip phrases
                        skip_url = False
                        
                        # Check current line and previous/next lines for context
                        context_lines = []
                        if i > 0:
                            context_lines.append(lines[i-1])
                        context_lines.append(line)
                        if i < len(lines) - 1:
                            context_lines.append(lines[i+1])
                        
                        context_text = ' '.join(context_lines).lower()
                        
                        for skip_phrase in skip_phrases:
                            if skip_phrase.lower() in context_text:
                                skip_url = True
                                break
                        
                        # Only add the URL if it shouldn't be skipped
                        if not skip_url:
                            links.append({
                                'url': url,
                                'page': page_num + 1,
                                'note': 'Extracted from text (not embedded)'
                            })

        # Format the results
        formatted_links = []
        for link in links:
            formatted_links.append(f"{link['url']}")

        if not formatted_links:
            return print("No links found in the PDF document")

        return formatted_links

    except Exception as e:
        print(f"Error extracting links from PDF: {str(e)}")
        return print(f"Failed to extract links from PDF: {str(e)}")

def download_linked_documents(bid_dir, links):
    """
    Download additional documents from links found in tender documents
    
    Args:
        bid_dir (str): Directory to save downloaded files to
        links (list): List of links to download
        
    Returns:
        list: List of paths to downloaded files
    """
    downloaded_files = []
    
    for link in links:
        try:
            # Skip if not a web URL (simple check)
            if not link.startswith(('http://', 'https://')):
                continue
                
            # Extract filename from URL
            filename = os.path.basename(link)
            if not filename.endswith('.pdf'):
                filename = f"linked_doc_{len(downloaded_files)+1}.pdf"
                
            # Download the file
            response = requests.get(link, stream=True)
            if response.status_code == 200:
                file_path = os.path.join(bid_dir, filename)
                
                with open(file_path, 'wb') as f:
                    f.write(response.content)
                    
                downloaded_files.append(file_path)
                print(f"Downloaded {filename} from {link}")
            else:
                print(f"Failed to download {link}, status code: {response.status_code}")
                
        except Exception as e:
            print(f"Error downloading {link}: {str(e)}")
    
    return downloaded_files

def calculate_file_hash(file_path):
    """
    Calculate SHA-256 hash of file content
    
    Args:
        file_path (Path): Path to the file
        
    Returns:
        str: Hexadecimal hash of the file content
    """
    sha256_hash = hashlib.sha256()
    
    # Read and update hash in chunks to handle large files efficiently
    with open(file_path, "rb") as f:
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
    
    return sha256_hash.hexdigest()

def remove_duplicate_pdfs(directory_path):
    """
    Remove duplicate PDF files from a directory based on content.
    For each set of identical files, the first one found is kept and others are removed.
    
    Args:
        directory_path (str): Path to directory containing PDF files
        
    Returns:
        tuple: (kept_files, removed_files) lists of filenames
    """
    print(f"🔍 Checking for duplicate PDFs in: {directory_path}")
    
    # Get all PDF files in the directory
    pdf_files = list(Path(directory_path).glob("*.pdf"))
    
    if not pdf_files:
        print("No PDF files found")
        return [], []
    
    # Dictionary to store hash -> [file_paths]
    hash_map = {}
    
    # Calculate hash for each file and group by hash
    for file_path in pdf_files:
        print(f"Analyzing: {file_path.name}")
        file_hash = calculate_file_hash(file_path)
        
        if file_hash in hash_map:
            hash_map[file_hash].append(file_path)
        else:
            hash_map[file_hash] = [file_path]
    
    # Keep track of which files were kept and which were removed
    kept_files = []
    removed_files = []
    
    # Process each group of files with the same hash
    for file_hash, file_paths in hash_map.items():
        # Keep the first file
        kept_file = file_paths[0]
        kept_files.append(kept_file)
        
        # Remove all duplicates
        for duplicate in file_paths[1:]:
            try:
                os.remove(duplicate)
                removed_files.append(duplicate)
                print(f"Removed duplicate: {duplicate.name} (same as {kept_file.name})")
            except Exception as e:
                print(f"Error removing {duplicate.name}: {str(e)}")
    
    print(f"Kept {len(kept_files)} unique files, removed {len(removed_files)} duplicates")
    return [f.name for f in kept_files], [f.name for f in removed_files]

def list_files_in_directory(directory_path):
    """
    Create a list of all files in the specified directory with their full paths.
    
    Args:
        directory_path (str): Path to the directory to scan
        
    Returns:
        list: List of full paths to all files in the directory
    """
    # Convert to Path object for easier handling
    dir_path = Path(directory_path)
    
    # Check if the directory exists
    if not dir_path.exists():
        print(f"Directory does not exist: {directory_path}")
        return []
    
    if not dir_path.is_dir():
        print(f"Path is not a directory: {directory_path}")
        return []
    
    # List to store the full paths
    file_paths = []
    
    # Iterate through all items in the directory
    for item in dir_path.iterdir():
        # Only include files, not directories
        if item.is_file():
            # Add the full path as a string
            file_paths.append(str(item.absolute()))
    
    print(f"Found {len(file_paths)} files in {directory_path}")
    return file_paths

def analyze_tender_with_LLM(documents_text):
    """
    Analyze tender documents with Claude LLM
    
    Args:
        documents_text (dict): Dictionary mapping file paths to their text content
        
    Returns:
        dict: Extracted information from tender documents
    """
    # Information to extract
    info_to_extract = [
        "Eligibility/Qualification Criteria or conditions for bidder",
        "Evaluation criteria or method",
        "Documents needed to submit the bid",
        "Scope of work of the whole project",
        "Amount of EMD fee",
        "Relaxation or preference given to any kind of company or bidder",
        "Payment terms",
        "BOQ requirements",
        "Annexures or forms or formats"
    ]
    
    # Initialize the result dictionary with empty strings
    extracted_info = {item: "" for item in info_to_extract}
    
    # Process all documents, not just the main one
    # First, calculate the total text size to determine chunking strategy
    total_text_size = sum(len(text) for text in documents_text.values())
    print(f"Total text size across all documents: {total_text_size} characters")
    
    # Approach: Process each document separately and then combine the results
    doc_analyses = []
    
    for doc_path, doc_text in documents_text.items():
        doc_name = os.path.basename(doc_path)
        print(f"Analyzing document: {doc_name} ({len(doc_text)} characters)")
        
        # Skip empty documents
        if not doc_text.strip():
            print(f"Skipping empty document: {doc_name}")
            continue
        
        # Create chunks based on document size
        chunk_size = 50000  # Adjust based on LLM's token limits
        doc_chunks = []
        
        if len(doc_text) > chunk_size:
            # Split into chunks, but try to break at paragraph boundaries
            start = 0
            while start < len(doc_text):
                end = start + chunk_size
                
                # Adjust to end at paragraph boundary if possible
                if end < len(doc_text):
                    # Look for double newline (paragraph break) before the cutoff
                    paragraph_end = doc_text.rfind('\n\n', start, end)
                    # If found and not too far from the chunk size, use it
                    if paragraph_end > start + (chunk_size * 0.7):
                        end = paragraph_end
                    else:
                        # Otherwise look for single newline
                        line_end = doc_text.rfind('\n', start, end)
                        if line_end > start + (chunk_size * 0.8):
                            end = line_end
                
                # Add the chunk
                doc_chunks.append(doc_text[start:end])
                start = end
        else:
            # Document is small enough to process in one chunk
            doc_chunks = [doc_text]
        
        print(f"Split document into {len(doc_chunks)} chunks")
        
        # Process each chunk with Claude
        for chunk_idx, chunk in enumerate(doc_chunks):
            prompt = f"""
            You are analyzing tender documents. I'll provide you with a chunk ({chunk_idx+1}/{len(doc_chunks)}) 
            from the document {doc_name}.
            
            Please extract the following information if present in this chunk:
            1. Eligibility/Qualification Criteria or conditions for bidder
            2. Evaluation criteria or method
            3. Documents needed to submit the bid 
            4. Scope of work of the whole project
            5. Amount of EMD fee
            6. Relaxation or preference given to any kind of company or bidder
            7. Payment terms
            8. BOQ requirements
            9. Annexures or forms or formats
            
            For each category, provide the exact text from the document. If the information isn't in this chunk, 
            just say "Not found in this chunk." Please structure your response clearly with appropriate headers
            for each section. For evaluation criteria or method, extract the complete scoring table if specified.
            For Annexures or specific document formats, separate all annexures found with their names or numbers.
            Also, for the Annexures or formats found, please mention the 'Doc Page Numbers' on which the annexure
            format is spread in the RFP documents. 'Doc Page Number:' is given in the document's text provided.
            DO NOT mention the Doc Page Numbers where just the reference of Annexure is given and not the actual
            Annexure format.
            
            Here is the document chunk:
            {chunk}
            """
            
            try:
                if(llm_model == 'gemini'):
                    response = gemini_client.models.generate_content(
                        model="gemini-2.5-flash-preview-04-17", #"gemini-2.0-flash",
                        contents=[prompt],
                        config=types.GenerateContentConfig(
                            system_instruction="You are an expert in analyzing tender documents. Extract the requested information accurately.",
                            max_output_tokens=6000,
                            temperature=0.1
                        )
                    )
                    response_text = response.text

                elif(llm_model == 'claude'):
                    # Call Claude API
                    response = claude_client.messages.create(
                        model="claude-3-7-sonnet-latest", #claude-3-5-haiku-20241022
                        max_tokens=6000,
                        temperature=0,
                        system="You are an expert in analyzing tender documents. Extract the requested information accurately.",
                        messages=[
                            {"role": "user", "content": prompt}
                        ]
                    )
                    
                    # Get the response text
                    response_text = response.content[0].text
                
                # Save this analysis
                doc_analyses.append({
                    "doc_name": doc_name,
                    "chunk_idx": chunk_idx,
                    "response": response_text
                })
                
            except Exception as e:
                print(f"Error analyzing chunk {chunk_idx+1} with Claude: {str(e)}")
    
    # Now process all the analysis responses to extract the information
    # Use a more robust approach to extract information from Claude's responses
    for analysis in doc_analyses:
        response_text = analysis["response"]
        doc_name = analysis["doc_name"]
        chunk_idx = analysis["chunk_idx"]
        
        print(f"Processing analysis of {doc_name} (chunk {chunk_idx+1})")
        
        # Process each category of information to extract
        for idx, item in enumerate(info_to_extract):
            # Possible section headers Claude might use
            section_markers = [
                f"{idx+1}. {item}",  # 1. Eligibility/Qualification Criteria
                f"## {idx+1}. {item}",  # ## 1. Eligibility/Qualification Criteria 
                f"**{idx+1}. {item}**",  # **1. Eligibility/Qualification Criteria**
                f"#{idx+1} {item}",  # #1 Eligibility/Qualification Criteria
                f"{item}:",  # Eligibility/Qualification Criteria:
                f"**{item}**",  # **Eligibility/Qualification Criteria**
                f"## {item}",  # ## Eligibility/Qualification Criteria
                f"### {item}",  # ### Eligibility/Qualification Criteria
                item  # Plain text
            ]
            
            # Find the section
            section_start = -1
            used_marker = ""
            
            for marker in section_markers:
                pos = response_text.find(marker)
                if pos != -1:
                    section_start = pos
                    used_marker = marker
                    break
            
            if section_start == -1:
                # Section not found
                continue
            
            # Find the end of this section (start of next section or end of response)
            section_end = len(response_text)
            
            # Check where the next section starts
            for next_idx, next_item in enumerate(info_to_extract):
                if next_idx <= idx:  # Skip current and previous sections
                    continue
                
                # Check all possible markers for the next section
                for marker in [
                    f"{next_idx+1}. {next_item}", 
                    f"## {next_idx+1}. {next_item}", 
                    f"**{next_idx+1}. {next_item}**",
                    f"#{next_idx+1} {next_item}",
                    f"{next_item}:", 
                    f"**{next_item}**",
                    f"## {next_item}",
                    f"### {next_item}",
                    next_item
                ]:
                    next_pos = response_text.find(marker, section_start)
                    if next_pos != -1 and next_pos < section_end:
                        section_end = next_pos
                        break
            
            # Extract the section content
            section_content = response_text[section_start + len(used_marker):section_end].strip()
            
            # Skip if the content indicates "not found"
            if any(phrase in section_content.lower() for phrase in [
                "not found in this chunk", 
                "not mentioned in this chunk",
                "no information found",
                "not provided in this chunk",
                "not specified in this chunk"
            ]):
                continue
            
            # Add the extracted content to the result
            if section_content:
                # If we already have content for this item, add a separator
                if extracted_info[item]:
                    extracted_info[item] += f"\n\n--- From {doc_name} (chunk {chunk_idx+1}) ---\n"
                else:
                    extracted_info[item] += f"--- From {doc_name} (chunk {chunk_idx+1}) ---\n"
                
                extracted_info[item] += section_content
    
    # Final cleanup - remove any empty sections and format for readability
    for item in info_to_extract:
        if not extracted_info[item]:
            extracted_info[item] = "Not found in any document"
        else:
            # Clean up formatting and remove duplicative information
            lines = extracted_info[item].split('\n')
            cleaned_lines = []
            seen_content = set()
            
            for line in lines:
                # Skip empty lines and source markers at this stage
                if not line.strip() or line.strip().startswith('---'):
                    cleaned_lines.append(line)
                    continue
                
                # Normalize and hash the line for deduplication
                normalized = ' '.join(line.lower().split())
                if normalized not in seen_content and len(normalized) > 5:
                    seen_content.add(normalized)
                    cleaned_lines.append(line)
            
            # Combine back while preserving source markers
            extracted_info[item] = '\n'.join(cleaned_lines)
    
    # Print a summary of what was found
    found_items = [item for item, content in extracted_info.items() if content != "Not found in any document"]
    print(f"Successfully extracted information for {len(found_items)} categories:")
    for item in found_items:
        content_preview = extracted_info[item].split('\n', 1)[0]
        print(f"- {item}: {content_preview[:50]}...")
    
    return extracted_info, doc_analyses

def save_extracted_info(bid_dir, extracted_info):
    """
    Save extracted information to a file
    
    Args:
        bid_dir (str): Directory to save the file to
        extracted_info (dict): Extracted information to save
        
    Returns:
        str: Path to the saved file
    """
    output_path = os.path.join(bid_dir, "tender_analysis")
    os.makedirs(output_path, exist_ok=True) # Create the directory. If the target directory already exists, do not raise an exception.
    output_text_file = os.path.join(output_path, "tender_analysis.txt")
    output_json_file = os.path.join(output_path, "tender_analysis.json")

    # Write to a JSON file
    with open(output_json_file, "w") as file:
        json.dump(extracted_info, file, indent=4)
    
    # Write to a TXT file
    with open(output_text_file, 'w', encoding='utf-8') as f:
        f.write("TENDER ANALYSIS REPORT\n")
        f.write("=" * 50 + "\n\n")
        
        for category, info in extracted_info.items():
            f.write(f"{category}\n")
            f.write("-" * len(category) + "\n")
            f.write(info.strip() or "Not found in the documents")
            f.write("\n\n" + "=" * 50 + "\n\n")
            if("Eligibility" in category):
                eligibility = info.strip()
    
    print(f"Saved extracted information to {output_text_file}")
    return output_json_file, eligibility

def get_company_info():
    """
    Get company information from Google Docs
    
    Returns:
        str: Company information text
    """
    # In a real implementation, this would use the Google Docs API
    # For now, we'll simulate this with a placeholder
    print(f"Getting company information from {COMPANY_INFO_DOC}")
    
    # This is a placeholder - in a real implementation, you would:
    # 1. Authenticate with Google
    # 2. Use the Docs API to get the document content
    # 3. Parse and return the content
    
    # For demo purposes, let's return a sample company info
    return """
    Yugasa Company Information for Government tendering:

Company Name: Yugasa Software Labs Pvt Ltd
Office addresses: 
Gurgaon Address: Yugasa Software Labs, 3rd floor, Tower B, Unitech Cyber Park, Sector 39, Gurgaon 122001, Haryana
Lucknow Address: Yugasa Software Labs, 3rd floor, TC-14, Vibhuti Khand, Gomti Nagar, Lucknow, Uttar Pradesh 226010
US Address: Yugasa Software LLC, 370 Campus Drive, Somerset, New Jersey 08873

Company registration:
Yugasa Software Labs Pvt Ltd is a legal entity in India registered under Indian Companies Act, 2013. Registered as Private Limited Company with Registrar of Companies, Delhi.
The CIN of the company is U72900HR2015PTC056837

Company website: www.yugasa.com
Company Phone: +918800522257
Company Email: contact@yugasa.com
Contact Person: Dharmesh Jaggi
Person Authorized to sign Bid Documents: Dharmesh Jaggi

PAN of Yugasa: AAACY7582J

Certifications:
CMMI 3
ISO 27001:2022
ISO 9001:2015

Valid GST registration. GST Number of Yugasa: 06AAACY7582J1ZU

Yugasa is the official Meta Business Partner as ISV solution provider for WhatsApp.

Turnover of previous years:

2024-25: INR 3.52 Crores
2023-24: INR 3.29 Crores
2022-23: INR 3.19 Crores
2021-22: INR 3.35 Crores
2020-21: INR 2.18 Crores

Yugasa software Labs Pvt Ltd is not barred or blacklisted by any PSU, government department, or private sector entity. 

Yugasa software labs pvt ltd is an MSME and registered Startup

Manpower on Yugasa’s payroll:
Currently Yugasa has 40 employees on its payroll.

Some previously done projects of Yugasa:

Project 1
Client Name: Narayana Hospitals
Project Title: Development and Implementation of WhatsApp Business API Solution and AI-enabled Chatbot for support automation
Project Scope:
- Integration of WhatsApp Business API with NH's existing systems
- Development of AI-enabled chatbot for patient interaction and support management
- Multi-language support including English and regional languages
- Real-time response and query resolution for patients
- Continuous support and maintenance of the chatbot system
Project Value: The total value of the project till date is INR 30,35,605, and the project is ongoing.
Project Duration: September 30th, 2022 to Present (Ongoing)

Project 2
Client Name: NSC Guwahati
NSC Guwahati is a Ministry of Culture organization 
Project Title: Cashless ticket booking chatbot on WhatsApp.
Project Value: INR 14 lakhs 

    """

def check_eligibility(extracted_info, company_info):
    """
    Check if the company is eligible for the bid
    
    Args:
        extracted_info (dict): Extracted tender information
        company_info (str): Company information
        
    Returns:
        tuple: (is_eligible, reason)
    """
    # Prepare prompt for Claude to assess eligibility
    eligibility_criteria = extracted_info.get("Eligibility/Qualification Criteria or conditions for bidder", "")
    
    prompt = f"""
    You need to determine if the company is eligible to apply for a tender based on the eligibility criteria and company information.
    
    Eligibility Criteria:
    {eligibility_criteria}
    
    Company Information:
    {company_info}
    
    Please analyze if the company meets all the eligibility criteria. Return your answer in the following format:
    
    Eligible: [Yes/No]
    Reason: [Detailed explanation of why the company is eligible or not]
    Missing Requirements: [List any requirements the company doesn't meet, if applicable]
    """
    
    try:
        if(llm_model == 'gemini'):
            response = gemini_client.models.generate_content(
                model="gemini-2.5-flash-preview-04-17", #"gemini-2.0-flash",
                contents=[prompt],
                config=types.GenerateContentConfig(
                    system_instruction="You are an expert in tender eligibility assessment. Be thorough and accurate in your analysis.",
                    max_output_tokens=2000,
                    temperature=0.1
                )
            )
            response_text = response.text

        elif(llm_model == 'claude'):
            # Call Claude API
            response = claude_client.messages.create(
                model="claude-3-7-sonnet-latest",
                max_tokens=2000,
                temperature=0,
                system="You are an expert in tender eligibility assessment. Be thorough and accurate in your analysis.",
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            
            # Parse response
            response_text = response.content[0].text
        
        # Extract eligibility decision
        is_eligible = "eligible: yes" in response_text.lower()
        
        # Extract reason
        reason_match = re.search(r'Reason:\s*(.*?)(?:\n\n|\n[A-Z]|$)', response_text, re.DOTALL)
        reason = reason_match.group(1).strip() if reason_match else "No detailed reason provided"
        
        return is_eligible, reason
        
    except Exception as e:
        print(f"Error checking eligibility with Claude: {str(e)}")
        return False, f"Error during eligibility check: {str(e)}"

def create_tender_docs_directory(bid_dir):
    """
    Create directory for tender documents if eligible
    
    Args:
        bid_dir (str): Bid directory
        
    Returns:
        str: Path to the created directory
    """
    tender_docs_dir = os.path.join(bid_dir, "tender_documents")
    os.makedirs(tender_docs_dir, exist_ok=True)
    print(f"Created tender documents directory: {tender_docs_dir}")
    return tender_docs_dir

def process_tender_from_dir(bid_dir):
    """
    Process tender analysis workflow
    
    Args:
        bid_dir (str): Directory path containing RFP documents 
        
    Returns:
        list: List of processed bid numbers
    """
    
    processed_bids = []
    
    # Process the bid
    print(f"\nProcessing bid from: {bid_dir}")

    kept, removed = remove_duplicate_pdfs(bid_dir)
    print(f"Kept files: {kept}")
    print(f"Removed Duplicate files: {removed}")

    # Use the unified document extractor instead of individual PDF processing
    output_dir = os.path.join(bid_dir, "tender_analysis")
    os.makedirs(output_dir, exist_ok=True)
    documents_text = extract_documents_text_compatible(bid_dir, output_dir, ANTHROPIC_API_KEY)
    
    if not documents_text:
        print("No text was extracted from documents")
        return processed_bids

    # Create document extractor instance for word counting
    extractor = create_document_extractor(ANTHROPIC_API_KEY)
    word_count = extractor.count_total_words(documents_text)

    # Analyze documents with Claude (rest remains the same)
    extracted_info, docs_analysis = analyze_tender_with_LLM(documents_text)
    
    # Save extracted information
    analysis_file, eligibility = save_extracted_info(bid_dir, extracted_info)
    
    # Get company information
    company_info = get_company_info()
    
    # Check eligibility
    is_eligible, reason = check_eligibility(extracted_info, company_info)
    
    # Create tender documents directory if eligible
    if is_eligible:
        # tender_docs_dir = create_tender_docs_directory(bid_dir)
        print(f"🚀 Company is eligible for the RFP {bid_dir}. Reason: {reason}")
    else:
        print(f"⚠️ Company is NOT eligible for RFP {bid_dir}. Reason: {reason}")
    
    processed_bids.append(bid_dir)
    
    return processed_bids


def main():
    """Main function to run the tender processing workflow"""
    # Set up argument parser
    parser = argparse.ArgumentParser(description='Tender Processing Tool')
    parser.add_argument('--rfp-docs-dir', type=str, help='Directory path containing RFP documents')
    
    # Parse arguments
    args = parser.parse_args()
    
    # If directory path is provided as argument, directly process the tenders
    if args.rfp_docs_dir:
        print("\n" + "="*50)
        print(f"🔄 Processing tenders from directory: {args.rfp_docs_dir}")
        print("="*50)
        processed_bids = process_tender_from_dir(args.rfp_docs_dir)
        
        print(f"\n✅ Completed processing {len(processed_bids)} bids:")
        for bid in processed_bids:
            print(f"- {bid}")
    else:
        # Ask user what they want to do
        while True:
            print("\nWhat would you like to do?")
            print("1. Search for new tenders")
            print("2. Process existing RFP documents")
            print("3. Prepare bid documents")
            
            choice = input("Enter your choice (1 or 2 or 3): ").strip()
            
            if choice == '1':
                # Get keywords from user
                keywords_input = input("Enter comma-separated keywords: ").strip()
                
                # Process keywords
                if keywords_input:
                    keywords = [keyword.strip() for keyword in keywords_input.split(',')]
                    print(f"Searching for tenders with keywords: {keywords}")
                    
                    # Call search_tender function
                    # result = search_tender(keywords)
                    print(f"Search completed.")
                else:
                    print("❌ No keywords provided. Please try again.")
                    continue
                break
                
            elif choice == '2':
                # Get directory path from user
                rfp_docs_dir = input("Enter the path to the directory containing RFP documents: ").strip()
                
                if rfp_docs_dir:
                    print("\n" + "="*50)
                    print(f"🔄 Processing tenders from directory: {rfp_docs_dir}")
                    print("="*50)
                    processed_bids = process_tender_from_dir(rfp_docs_dir)
                    
                    print(f"\n✅ Completed processing {len(processed_bids)} bids:")
                    for bid in processed_bids:
                        print(f"- {bid}")
                else:
                    print("❌ No directory path provided. Please try again.")
                    continue
                break
                
            elif choice == '3':
                # Get directory path from user
                rfp_docs_dir = input("Enter the path to the directory containing RFP documents: ").strip()
                std_company_docs = input("Enter the path to the directory containing Company documents: ").strip()
                
                if rfp_docs_dir:
                    print(f"Preparing bid documents for RFP documents from directory: {rfp_docs_dir}")
                    final_docs_dir = bpa.prepare_bid_documents(rfp_docs_dir, std_company_docs, get_company_info())
                    
                    print(f"\nCompleted preparing documents at: {final_docs_dir}")
                else:
                    print("❌ No directory path provided. Please try again.")
                    continue
                break

            elif choice == '4':
                # Get directory path from user
                rfp_docs_dir = input("Enter the path to the directory containing RFP documents: ").strip()
                
                if rfp_docs_dir:
                    print(f"Preparing for bid documentation RAG from: {rfp_docs_dir}")
                    status = bq.process_task(rfp_docs_dir)
                    
                    print(f"\nCompleted preparing chunks at: {rfp_docs_dir}/tender_analysis/chunks.xlsx")
                else:
                    print("❌ No directory path provided. Please try again.")
                    continue
                break
                
            else:
                print("Invalid choice. Please enter 1 or 2 or 3.")

if __name__ == "__main__":
    main()
