import re
import json
import asyncio
from datetime import datetime
import aiohttp
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

from config import LOGIN_URL, QUESTION_URL, BASE_URL, OUTPUT_FILE, MAX_CONSECUTIVE_EMPTY
from image_utils import build_full_url, download_image
from bot_handlers import send_message

executor = ThreadPoolExecutor(max_workers=4)
PARSER = "html.parser"  # یا "lxml" اگه نصب کردی


async def login() -> aiohttp.ClientSession:
    """ورود به سایت و برگردوندن session"""
    timeout = aiohttp.ClientTimeout(total=30, sock_read=10)
    connector = aiohttp.TCPConnector(limit=10, limit_per_host=5, ttl_dns_cache=300)
    session = aiohttp.ClientSession(connector=connector, timeout=timeout)
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'fa-IR,fa;q=0.9,en;q=0.8',
            'Connection': 'keep-alive'
        }
        
        async with session.get(LOGIN_URL, headers=headers) as resp:
            soup = BeautifulSoup(await resp.text(), PARSER)
        
        form = soup.find("form")
        if not form:
            await session.close()
            return None
        
        payload = {}
        for inp in form.find_all("input"):
            name = inp.get("name")
            if not name:
                continue
            if name in ["username", "user", "email"]:
                payload[name] = "0923254927"
            elif name in ["password", "pass", "pwd"]:
                payload[name] = "0923254927"
            else:
                payload[name] = inp.get("value", "")
        
        payload["login"] = "true"
        
        action = form.get("action", "")
        if action.startswith("/"):
            post_url = f"https://lms14.tvu.ac.ir{action}"
        elif not action.startswith("http"):
            post_url = LOGIN_URL
        else:
            post_url = action if action else LOGIN_URL
        
        async with session.post(post_url, data=payload, allow_redirects=True) as resp:
            final_url = str(resp.url)
        
        if "/teacher/" in final_url or "/student/" in final_url:
            print(f"✅ ورود موفق!")
            return session
        else:
            await session.close()
            return None
            
    except Exception as e:
        print(f"💥 خطا در لاگین: {e}")
        await session.close()
        return None


def check_empty_page(html: str) -> bool:
    """بررسی صفحه خالی - بررسی دقیق div.studentQuestions با متن 'هیچ سوالی وجود ندارد'"""
    soup = BeautifulSoup(html, PARSER)
    empty_div = soup.find("div", class_="studentQuestions")
    if empty_div:
        text = empty_div.get_text(strip=True)
        if "هیچ سوالی وجود ندارد" in text:
            return True
    return False


def extract_images_from_h3(soup: BeautifulSoup) -> list:
    """استخراج تمام عکس‌های داخل تگ h3"""
    images = []
    
    h3_tag = soup.find("h3", style=re.compile(r"font-family:Tahoma,Arial"))
    if not h3_tag:
        h3_tag = soup.find("h3")
    
    if not h3_tag:
        return images
    
    for a_tag in h3_tag.find_all("a", recursive=False):
        img_tag = a_tag.find("img")
        if img_tag:
            src = img_tag.get("src", "")
            images.append({
                "src": src,
                "href": a_tag.get("href", ""),
                "alt": img_tag.get("alt", "")
            })
    
    for img_tag in h3_tag.find_all("img", recursive=False):
        src = img_tag.get("src", "")
        if src and not any(img["src"] == src for img in images):
            images.append({
                "src": src,
                "href": "",
                "alt": img_tag.get("alt", "")
            })
    
    return images


def extract_text_from_h3(soup: BeautifulSoup) -> str:
    """استخراج متن سوال از تگ h3"""
    h3_tag = soup.find("h3", style=re.compile(r"font-family:Tahoma,Arial"))
    if not h3_tag:
        h3_tag = soup.find("h3")
    
    if not h3_tag:
        return ""
    
    text_parts = []
    p_tags = h3_tag.find_all("p")
    
    for p in p_tags:
        img_in_p = p.find("img")
        a_in_p = p.find("a")
        
        if not img_in_p and not a_in_p:
            p_text = p.get_text(strip=True)
            if p_text:
                text_parts.append(p_text)
        elif not img_in_p:
            p_text = p.get_text(strip=True)
            if p_text:
                text_parts.append(p_text)
    
    if not text_parts:
        h3_copy = BeautifulSoup(str(h3_tag), PARSER)
        for tag in h3_copy.find_all(["img", "a"]):
            tag.decompose()
        direct_text = h3_copy.get_text(strip=True)
        if direct_text:
            text_parts.append(direct_text)
    
    if not text_parts:
        all_text = h3_tag.get_text(strip=True)
        if all_text:
            text_parts.append(all_text)
    
    return " ".join(text_parts) if text_parts else "متن سوال پیدا نشد"


def extract_options_and_answer(soup: BeautifulSoup, question_id: int):
    """استخراج گزینه‌ها و جواب درست"""
    options = {}
    correct_answer = None
    
    all_radios = soup.find_all("input", {"type": "radio"})
    
    if not all_radios:
        return options, correct_answer
    
    labels_by_id = {label.get("for", ""): label for label in soup.find_all("label") if label.get("for")}
    
    for radio in all_radios:
        value = radio.get("value", "")
        is_checked = radio.get("checked") is not None
        radio_id = radio.get("id", "")
        
        option_text = None
        
        if radio_id and radio_id in labels_by_id:
            label = labels_by_id[radio_id]
            for img in label.find_all("img"):
                img.decompose()
            option_text = label.get_text(strip=True)
        
        if not option_text:
            parent_label = radio.find_parent("label")
            if parent_label:
                for img in parent_label.find_all("img"):
                    img.decompose()
                option_text = parent_label.get_text(strip=True)
        
        if not option_text:
            next_sib = radio.next_sibling
            if next_sib and hasattr(next_sib, 'string') and next_sib.string:
                option_text = next_sib.string.strip()
            elif next_sib and hasattr(next_sib, 'get_text'):
                option_text = next_sib.get_text(strip=True)
        
        if not option_text:
            option_text = f"گزینه {value}"
        
        options[f"option_{value}"] = option_text
        if is_checked:
            correct_answer = option_text
    
    return options, correct_answer


async def extract_question(session: aiohttp.ClientSession, question_id: int) -> dict:
    """استخراج کامل یک سوال"""
    url = QUESTION_URL.format(question_id)
    
    try:
        headers = {
            'Cache-Control': 'no-cache',
            'Pragma': 'no-cache'
        }
        
        async with session.get(url, headers=headers) as resp:
            if resp.status != 200:
                return "empty"
            
            html = await resp.text()
            
            # بررسی صفحه خالی
            if check_empty_page(html):
                return "empty"
            
            loop = asyncio.get_event_loop()
            soup = await loop.run_in_executor(executor, BeautifulSoup, html, PARSER)
            
            is_essay = bool(soup.find("textarea", id=re.compile(r"q_\d+_textarea")))
            
            question_text, h3_images = await asyncio.gather(
                loop.run_in_executor(executor, extract_text_from_h3, soup),
                loop.run_in_executor(executor, extract_images_from_h3, soup)
            )
            
            downloaded_images = []
            image_full_urls = []
            
            if h3_images:
                image_srcs = [img["href"] if img["href"] else img["src"] for img in h3_images if img.get("href") or img.get("src")]
                
                if image_srcs:
                    tasks = [download_image(session, src, question_id) for src in image_srcs[:3]]
                    results = await asyncio.gather(*tasks, return_exceptions=True)
                    
                    for result in results:
                        if result and not isinstance(result, Exception):
                            downloaded_images.append(result)
                    
                    for img in h3_images:
                        src = img["href"] if img["href"] else img["src"]
                        if src:
                            full_url = build_full_url(src)
                            if full_url not in image_full_urls:
                                image_full_urls.append(full_url)
            
            options = {}
            correct_answer = None
            if not is_essay:
                options, correct_answer = await loop.run_in_executor(
                    executor, extract_options_and_answer, soup, question_id
                )
            
            return {
                "id": question_id,
                "question": question_text,
                "type": "essay" if is_essay else "multiple_choice",
                "has_images": len(downloaded_images) > 0,
                "images": downloaded_images,
                "image_urls": image_full_urls,
                "options": options if options else None,
                "correct_answer": correct_answer,
                "url": url,
                "extracted_at": datetime.now().isoformat()
            }
            
    except asyncio.TimeoutError:
        print(f"  ⏱️ تایم اوت سوال {question_id}")
        return None
    except Exception as e:
        print(f"  ⚠️ خطا در استخراج سوال {question_id}: {e}")
        return None


async def auto_update_questions(lms_session: aiohttp.ClientSession, chat_id: int, last_known_id: int):
    """
    آپدیت خودکار - چک کردن سوالات جدید از last_known_id به بعد
    وقتی ۳۰ سوال خالی متوالی پیدا شد، میگه به انتهای دیتابیس رسیدیم
    """
    from config import MAX_CONSECUTIVE_EMPTY, OUTPUT_FILE
    import json
    
    # لود کردن دیتابیس فعلی
    try:
        with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
            all_questions = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        all_questions = []
    
    existing_ids = {q["id"] for q in all_questions}
    
    # پیدا کردن حداکثر ID ذخیره شده
    if all_questions:
        max_saved_id = max(q["id"] for q in all_questions)
        start_id = max(max_saved_id + 1, last_known_id)
    else:
        start_id = last_known_id
    
    await send_message(chat_id, f"🔄 **آپدیت خودکار**\n🔍 چک کردن سوالات جدید از ID {start_id}...")
    
    BATCH_SIZE = 5
    new_count = 0
    current_id = start_id
    reached_end = False
    consecutive_empty = 0
    last_valid_id = max(existing_ids) if existing_ids else 0  # ✅ آخریـن ID معتبر فعلی
    
    while not reached_end:
        # ایجاد بچ از سوالات
        batch_ids = []
        for i in range(BATCH_SIZE):
            if current_id + i not in existing_ids:
                batch_ids.append(current_id + i)
            if len(batch_ids) >= BATCH_SIZE:
                break
        
        if not batch_ids:
            current_id += BATCH_SIZE
            continue
        
        # پردازش موازی بچ
        tasks = [extract_question(lms_session, qid) for qid in batch_ids]
        results = await asyncio.gather(*tasks)
        
        for qid, result in zip(batch_ids, results):
            if result == "empty":
                consecutive_empty += 1
                print(f"❌ [ID: {qid}] صفحه خالی ({consecutive_empty}/{MAX_CONSECUTIVE_EMPTY})")
                
                # بررسی رسیدن به ۳۰ سوال خالی متوالی
                if consecutive_empty >= MAX_CONSECUTIVE_EMPTY:
                    reached_end = True
                    await send_message(chat_id, f"🏁 **به انتهای دیتابیس رسیدیم!**\n\n{MAX_CONSECUTIVE_EMPTY} سوال خالی پشت سر هم در آیدی‌های {qid - MAX_CONSECUTIVE_EMPTY + 1} تا {qid}\n📝 آخرین سوال معتبر: ID {last_valid_id}")
                    break
            elif result:
                new_count += 1
                all_questions.append(result)
                existing_ids.add(result["id"])
                last_valid_id = result["id"]  # ✅ آپدیت آخرین ID معتبر
                consecutive_empty = 0  # ریست شمارنده خالی
                
                # ذخیره هر ۱۰ سوال یکبار
                if new_count % 10 == 0:
                    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
                        json.dump(all_questions, f, ensure_ascii=False, indent=2)
                    print(f"💾 ذخیره شد: {new_count} سوال جدید")
            
            current_id = qid + 1
        
        # تاخیر کم
        await asyncio.sleep(0.02)
    
    # ذخیره نهایی
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(all_questions, f, ensure_ascii=False, indent=2)
    
    # ✅ استفاده از last_valid_id به جای current_id
    new_last_id = last_valid_id if last_valid_id > 0 else (current_id - 1)
    
    # گزارش
    if new_count > 0:
        report = f"""
✅ **آپدیت خودکار کامل شد!**

📊 **سوالات جدید:** {new_count}
🔢 **بررسی تا ID {current_id}**
🏁 {'به انتهای دیتابیس رسیدیم' if reached_end else 'ادامه دارد'}

📝 **آخرین سوال معتبر:** ID {new_last_id}
📁 **کل سوالات:** {len(all_questions)}
        """
        await send_message(chat_id, report)
    elif reached_end:
        await send_message(chat_id, f"🏁 **پایان دیتابیس**\n\n✅ سوال جدیدی وجود ندارد.\n📝 آخرین ID معتبر: {new_last_id}")
    else:
        await send_message(chat_id, f"ℹ️ **آپدیت خودکار:** سوال جدیدی پیدا نشد.\n📝 آخرین ID بررسی شده: {current_id - 1}\n📝 آخرین ID معتبر: {new_last_id}")
    
    return new_count, reached_end, new_last_id  # ✅ برگردوندن last_valid_id
    
async def process_range(lms_session: aiohttp.ClientSession, chat_id: int, start: int, end: int):
    """پردازش بازه سوالات - با قابلیت تشخیص پایان دیتابیس"""
    await send_message(chat_id, f"🔍 شروع استخراج سوالات از {start} تا {end}")
    
    try:
        with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
            all_questions = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        all_questions = []
    
    existing_ids = {q["id"] for q in all_questions}
    
    BATCH_SIZE = 10
    found_count = 0
    essay_count = 0
    image_count = 0
    consecutive_empty = 0
    reached_end = False
    last_valid_id = max(existing_ids) if existing_ids else 0  # ✅ آخریـن ID معتبر
    
    for batch_start in range(start, end + 1, BATCH_SIZE):
        if reached_end:
            break
            
        batch_end = min(batch_start + BATCH_SIZE - 1, end)
        batch_ids = [qid for qid in range(batch_start, batch_end + 1) if qid not in existing_ids]
        
        if not batch_ids:
            continue
        
        # پردازش موازی بچ
        tasks = [extract_question(lms_session, qid) for qid in batch_ids]
        results = await asyncio.gather(*tasks)
        
        for qid, result in zip(batch_ids, results):
            if result == "empty":
                consecutive_empty += 1
                print(f"❌ [ID: {qid}] صفحه خالی ({consecutive_empty}/{MAX_CONSECUTIVE_EMPTY})")
                
                if consecutive_empty >= MAX_CONSECUTIVE_EMPTY:
                    reached_end = True
                    await send_message(chat_id, f"🏁 **به انتهای دیتابیس رسیدیم!**\n\n{MAX_CONSECUTIVE_EMPTY} سوال خالی پشت سر هم در آیدی‌های {qid - MAX_CONSECUTIVE_EMPTY + 1} تا {qid}\n📝 آخرین سوال معتبر: ID {last_valid_id}")
                    break
            elif result:
                found_count += 1
                all_questions.append(result)
                last_valid_id = result["id"]  # ✅ آپدیت آخرین ID معتبر
                consecutive_empty = 0  # ریست شمارنده
                
                if result['type'] == 'essay':
                    essay_count += 1
                if result['has_images']:
                    image_count += 1
                
                # ذخیره هر ۲۰ سوال
                if found_count % 20 == 0:
                    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
                        json.dump(all_questions, f, ensure_ascii=False, indent=2)
                    await send_message(chat_id, f"📊 پیشرفت: {found_count} سوال جدید پیدا شد")
        
        # تاخیر کم بین بچ‌ها
        await asyncio.sleep(0.05)
    
    # ذخیره نهایی
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(all_questions, f, ensure_ascii=False, indent=2)
    
    # گزارش نهایی
    if reached_end:
        report = f"""
🏁 **پایان دیتابیس شناسایی شد!**

📊 **گزارش نهایی:**
✅ جدید: {found_count} | 📋 تستی: {found_count - essay_count} | 📝 تشریحی: {essay_count} | 📸 با عکس: {image_count}
🏁 {MAX_CONSECUTIVE_EMPTY} سوال خالی متوالی

📝 **کل سوالات در دیتابیس:** {len(all_questions)}
📝 **آخرین ID معتبر:** {last_valid_id}
        """
    else:
        report = f"""
📊 **گزارش نهایی بازه {start} تا {end}:**

✅ جدید: {found_count} | 📋 تستی: {found_count - essay_count} | 📝 تشریحی: {essay_count} | 📸 با عکس: {image_count}
❌ خالی: {len([r for r in results if r == 'empty']) if 'results' in locals() else 0}

📝 **کل سوالات در دیتابیس:** {len(all_questions)}
        """
    
    await send_message(chat_id, report)