# coding: UTF-8
from dotenv import load_dotenv
import os
import logging
from flask import Response, Flask, request, render_template
from flask_httpauth import HTTPBasicAuth
import pandas as pd
import requests
from bs4 import BeautifulSoup
from openai import OpenAI
import json
import csv
from io import StringIO
import re

# Load environment variables from .env file
load_dotenv()
BASE_URL = os.getenv('BASE_URL')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


client = OpenAI(api_key="sk-proj-g5KUovMAPblK2YVhYFWYT3BlbkFJbRnn3OubcnZrWyodkjQk")

# Logging in terminal
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__, static_url_path='/ai-form-bot/static')
app.config['APPLICATION_ROOT'] = '/ai-form-bot'
auth = HTTPBasicAuth()

users = {
    "admin": "secret"
}

@auth.verify_password
def verify_password(username, password):
    if username in users and users[username] == password:
        return username
    else:
        logger.warning(f"Failed authentication for user: {username}")
        return None

@app.route('/favicon.ico')
def favicon():
    return '', 204  # No Content

@app.route('/', methods=['GET', 'POST'])
@auth.login_required
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        if file and file.filename:
            df = pd.read_csv(file)
            urls = df['URL'].tolist()
            user_info = {
                'company_name': request.form.get('company_name'),
                'full_name': request.form.get('full_name'),
                'kana': request.form.get('kana'),
                'email': request.form.get('email'),
                'phone_number': request.form.get('phone_number'),
                'address': request.form.get('address'),
                'postal_code': request.form.get('postal_code'),
                'department': request.form.get('department'),
                'position': request.form.get('position'),
                'site_url': request.form.get('site_url'),
                'message': request.form.get('message')
            }

            primary_navigation_links = get_primary_navigation_links(urls)
            forms_data = get_forms_data(primary_navigation_links)
            mapped_forms_data = map_forms_data(forms_data, user_info)

            # Generate the CSV report
            report_csv = generate_csv_report(urls, primary_navigation_links, forms_data, mapped_forms_data)

            # Create a response with the CSV file
            response = Response(report_csv.getvalue(), mimetype='text/csv')
            response.headers.set('Content-Disposition', 'attachment', filename='form_processing_report.csv')
            return response
        
    return render_template('upload.html')

def get_primary_navigation_links(urls):
    primary_navigation_dict = {}
    for url in urls:
        logger.info(f"Processing: {url}")
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            links = [a.get('href') for a in soup.find_all('a', href=True)]
            primary_links = []
            for link in links:
                if link and not any(link.endswith(ext) for ext in ['.pdf', '.jpg', '.jpeg', '.png', '.gif', '.bmp']) and '#' not in link:
                    if link.startswith('/'):
                        full_url = os.path.join(url, link.lstrip('/'))
                        primary_links.append(full_url)
                    elif link.startswith('http'):
                        primary_links.append(link)
            primary_navigation_dict[url] = primary_links
        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching URL {url}: {e}")
    return primary_navigation_dict

def get_forms_data(primary_navigation_dict):
    forms_data = {}
    for main_url, primary_urls in primary_navigation_dict.items():
        forms_data[main_url] = {}
        for primary_url in primary_urls:
            logger.info(f"Processing form data for: {primary_url}")
            try:
                response = requests.get(primary_url)
                soup = BeautifulSoup(response.content, 'html.parser')
                forms = soup.find_all('form')
                form_info_list = []
                for form in forms:
                    fields = []
                    for input_field in form.find_all(['input', 'textarea', 'select']):
                        if input_field.get('name'):
                            field_info = {
                                'tag': str(input_field),
                                'name': input_field.get('name'),
                            }
                            
                            # Get associated label
                            label = input_field.find_previous('label')
                            if label and label.get('for') == input_field.get('id'):
                                field_info['label'] = str(label)
                            
                            fields.append(field_info)
                    
                    form_info = {
                        'action': form.get('action'),
                        'method': form.get('method', 'get').lower(),
                        'fields': fields
                    }
                    # Ensure the action has the base URL if it's a relative path
                    if form_info['action'] and form_info['action'].startswith('/'):
                        form_info['action'] = os.path.join(primary_url, form_info['action'].lstrip('/'))
                    form_info_list.append(form_info)
                forms_data[main_url][primary_url] = form_info_list
            except requests.exceptions.RequestException as e:
                logger.error(f"Error fetching URL {primary_url}: {e}")
    return forms_data

def map_forms_data(forms_data, user_info):
    mapped_forms_data = {}
    for main_url, primary_forms in forms_data.items():
        mapped_forms_data[main_url] = {}
        for primary_url, forms in primary_forms.items():
            for form in forms:
                fields = form['fields']
                form_mapping = map_fields_with_openai(fields, user_info)
                if form_mapping['is_contact_form']:
                    form['fields'] = form_mapping['form_mapping']
                    mapped_forms_data[main_url][primary_url] = form
                    break
            else:
                # If no contact form found in the current primary URL, continue to the next one
                continue
            # If a contact form is found, break out of the loop for the current main URL
            break
    return mapped_forms_data

def map_fields_with_openai(fields, user_info):
    prompt = f"""
    Given the following form fields and user information, identify the corresponding input names in the form and map them to the provided values. Handle checkboxes, radio buttons, and select fields appropriately. Don't leave anything empty, fill all inputs and select the most appropriate option in checkboxes or radio buttons.
    select the option Other for the question that means what you would like to inquire about, or what is the purpose of your inquiry.
    
    Determine if the form is a contact form. If it is, provide the form mapping in the following JSON format:
    {{
        "is_contact_form": true,
        "form_mapping": {{
            "field_name_1": "value_1",
            "field_name_2": "value_2",
            ...
        }}
    }}
    If it is not a contact form, return:
    {{
        "is_contact_form": false
    }}
    
    Form Fields:
    {json.dumps(fields, ensure_ascii=False, indent=2)}
    
    User Information:
    会社名: {user_info['company_name']}
    名前（姓名）: {user_info['full_name']}
    ふりがな: {user_info['kana']}
    フリガナ: {user_info['kana']}
    メールアドレス: {user_info['email']}
    電話番号: {user_info['phone_number']}
    住所: {user_info['address']}
    郵便番号: {user_info['postal_code']}
    部署名: {user_info['department']}
    役職: {user_info['position']}
    サイトURL: {user_info['site_url']}
    メッセージ: {user_info['message']}

    For checkboxes and radio buttons, set the value to "true" for the closest option to the user information, otherwise "false".
    For select fields, choose the most appropriate option based on the user information.
    """
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a sales representative. Please respond with a JSON object mapping form field names to user information values. For checkboxes and radio buttons, use boolean values. For select fields, provide the value of the most appropriate option. Indicate if the form is a contact form."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500
    )
    response_content = response.choices[0].message.content
    logger.info(f"OpenAI Response: {response_content}")  # Log the response content

    # Remove triple backticks and "json" if present
    cleaned_response = re.sub(r'^```json\s*|\s*```$', '', response_content.strip(), flags=re.MULTILINE)
    
    try:
        response_json = json.loads(cleaned_response)
    except json.JSONDecodeError as e:
        logger.error(f"JSON decode error: {e}")
        return {"is_contact_form": False, "form_mapping": {}}
    
    return response_json

def generate_csv_report(urls, primary_navigation_links, forms_data, mapped_forms_data):
    output = StringIO()
    writer = csv.writer(output)
    writer.writerow([
        'Main URL', 
        'Number of Primary URLs Found', 
        'Primary URLs with Forms', 
        'Number of Forms Found per Primary URL', 
        'Primary URL with Contact Form', 
        'Mapped Form'
    ])

    for main_url in urls:
        primary_urls = primary_navigation_links.get(main_url, [])
        num_primary_urls = len(primary_urls)
        
        primary_urls_with_forms = []
        num_forms_per_primary_url = []
        primary_url_with_contact_form = None
        mapped_form = None

        if main_url in mapped_forms_data:
            for primary_url, form_info in mapped_forms_data[main_url].items():
                primary_urls_with_forms.append(primary_url)
                num_forms_per_primary_url.append(len(forms_data[main_url].get(primary_url, [])))
                if form_info:
                    primary_url_with_contact_form = primary_url
                    mapped_form = json.dumps(form_info['fields'], ensure_ascii=False)
                    break

        writer.writerow([
            main_url, 
            num_primary_urls, 
            ', '.join(primary_urls_with_forms), 
            ', '.join(map(str, num_forms_per_primary_url)), 
            primary_url_with_contact_form, 
            mapped_form
        ])

    output.seek(0)
    return output

if __name__ == '__main__':
    app.run(debug=True)
