from datetime import datetime

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs
import time
import logging
import sys
import argparse
from concurrent.futures import ThreadPoolExecutor

# Configure basic logging to stay informed of the scraper's actions
logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s')

# --- TOP LEVEL VARIABLES ---
LISTING_INTERVAL = 1  # seconds between listing polls (cheap endpoint)
MAX_BOOKING_WORKERS = 16
BOOKING_RETRIES_ON_429 = 2
BOOKING_RETRY_SLEEP = 5  # seconds between booking retries on 429

# List of movies IDs / tokens to book. Pairs of (movie_id_string, is_active_bool)
# Keep is_active_bool as True to keep searching for it
WHITELIST  : set[str] = {
    # VESNA
    "e%2BphcoRJftQ=",
    # Full Fill
    "zaToVEQJXwQ=",
    # Papertiger
    "IulaCwl7slM="
}

BLACKLIST : set[str] = {
    # Full Fill - bad dates
    "PIuErCLSYDE=",
    "f37DhI1GWZA=",
    # Papertiger - bad dates
    "ToK8woDWBTA=",
    "U6eIxxUZ13g=",
    "CmzQEk0EGos=",
    # Fjord
    "I41agJEDnoE=",
}

DATE_BLACKLIST : dict[str, set[tuple[datetime, datetime]]] = {
    "18" : {
        (datetime.strptime("2024-05-18 16:00", "%Y-%m-%d %H:%M"), datetime.strptime("2024-05-18 17:45", "%Y-%m-%d %H:%M")),
    },
}

# --- ENDPOINTS ---
LOGIN_URL = "https://ticketonline.festival-cannes.com/login"

# Using a generic URL template assuming ID is passed either in URL or as a parameter
# You might need to adjust these paths slightly based on the exact route format
MOVIE_PAGE_URL_TEMPLATE = "https://ticketonline3.festival-cannes.com/fiche?idproj={movie_id}"
BOOKING_ACTION_URL = "https://ticketonline3.festival-cannes.com/fiche?action=reserver"
SEANCES_URL_TEMPLATE = "https://ticketonline3.festival-cannes.com/seances?jour={date}&action=changeDate"

def list_all_available(session: requests.Session, date: str) -> set[str]:
    """
    Given a specific date string, requests the list of all available sessions.
    Returns a set of extracted movie (idproj) strings.
    """
    logging.info(f"Listing all available movies for date: {date}")
    available_ids = set()
    try:
        url = SEANCES_URL_TEMPLATE.format(date=date)
        
        # First GET to retrieve the RequestVerificationToken
        get_response = session.get(url)
        soup_get = BeautifulSoup(get_response.text, 'lxml')
        token_input = soup_get.find('input', {'name': '__RequestVerificationToken'})
        
        post_data = {'voirbilletsdispos': 'on'}
        if token_input:
            post_data['__RequestVerificationToken'] = token_input.get('value')
        
        # Now POST to apply the 'voirbilletsdispos' filter and actually switch the date securely
        response = session.post(url, data=post_data)
        
        if response.status_code != 200:
            logging.warning(f"Non-200 response when listing available tickets: {response.status_code}")
            return available_ids
            
        soup = BeautifulSoup(response.text, 'lxml')
        
        # They might reuse <div id="seance"> or it might wrap everything.
        # It's safest to look for all links that point to 'fiche?idproj='
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']
            parsed_href = urlparse(href)
            if "fiche" in parsed_href.path:
                # Skip tickets that we already successfully booked
                if a_tag.find('img', {'src': '/content/images/demande-satisfaite.svg'}):
                    continue
                    
                qs = parse_qs(parsed_href.query)
                if 'idproj' in qs:
                    movie_id = qs['idproj'][0]
                    
                    is_blacklisted = False
                    ## whitelisted override booking time check
                    if date in DATE_BLACKLIST and movie_id not in WHITELIST:
                        heure_div = a_tag.find('div', class_='Heure')
                        if heure_div:
                            labels = heure_div.find_all('label')
                            if len(labels) >= 3:
                                start_time_str = labels[0].text.strip()
                                end_time_str = labels[2].text.strip()
                                try:
                                    # Form datetimes to compare with DATE_BLACKLIST elements
                                    try:
                                        start_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {start_time_str}", "%Y-%m-%d %I:%M %p")
                                    except ValueError:
                                        start_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {start_time_str}", "%Y-%m-%d %H:%M")
                                        
                                    try:
                                        end_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {end_time_str}", "%Y-%m-%d %I:%M %p")
                                    except ValueError:
                                        end_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {end_time_str}", "%Y-%m-%d %H:%M")
                                    
                                    for bl_start, bl_end in DATE_BLACKLIST[date]:
                                        # Check if the screening intersects with any blacklisted time slot
                                        if start_dt < bl_end and end_dt > bl_start:
                                            is_blacklisted = True
                                            break
                                except Exception as e:
                                    logging.error(f"Error parsing times for {movie_id}: {e}")
                                    
                    if not is_blacklisted:
                        available_ids.add(movie_id)
                    
    except Exception as e:
        logging.error(f"Error while listing available tickets for {date}: {e}")
        
    return available_ids

def login_to_cannes(session: requests.Session, username: str, password: str) -> tuple[bool, str | None]:
    logging.info("Initiating login sequence...")
    try:
        # First GET request to grab potential anti-CSRF token on the login page itself
        response = session.get(LOGIN_URL)
        soup = BeautifulSoup(response.text, 'lxml')
        
        login_payload = {
            'UserName': username,
            'Password': password
        }
        
        # If there's a RequestVerificationToken on the login page, include it
        token_input = soup.find('input', {'name': '__RequestVerificationToken'})
        if token_input:
            login_payload['__RequestVerificationToken'] = token_input.get('value')

        login_response = session.post(LOGIN_URL, data=login_payload, allow_redirects=True)
        
        # We look out for a redirect to welcome?key=... as indicated
        parsed_url = urlparse(login_response.url)
        if "welcome" in parsed_url.path and "key" in parse_qs(parsed_url.query):
            welcome_key = parse_qs(parsed_url.query)['key'][0]
            logging.info(f"Successfully logged in. Extracted welcome key: {welcome_key}")
            return True, welcome_key
        elif login_response.status_code == 200:
            logging.info("Successfully logged in, but no welcome key found in the URL.")
            return True, None
        else:
            logging.error(f"Login might have failed. Current URL after POST: {login_response.url}")
            return False, None
            
    except Exception as e:
        logging.error(f"An error occurred during login: {e}")
        return False, None

def try_book_movie(session: requests.Session, movie_id: str) -> bool:
    """
    Checks the movie page. If seats are available, extracts token and POSTs to book.
    Returns True if successfully booked, False otherwise.
    Retries a few times on 429 since this is our one shot per newly-appearing ID.
    """
    movie_page_url = MOVIE_PAGE_URL_TEMPLATE.format(movie_id=movie_id)

    try:
        response = None
        for attempt in range(BOOKING_RETRIES_ON_429):
            response = session.get(movie_page_url)
            if response.status_code != 429:
                break
            logging.warning(f"[{movie_id}] 429 on fiche fetch (attempt {attempt + 1}/{BOOKING_RETRIES_ON_429}), backing off {BOOKING_RETRY_SLEEP}s")
            time.sleep(BOOKING_RETRY_SLEEP)

        if response is None:
            logging.warning(f"[{movie_id}] returned: no response")
            return False
        if response.status_code != 200:
            logging.warning(f"[{movie_id}] returned: {response.status_code}")
            return False

        soup = BeautifulSoup(response.text, 'lxml')
        
        movie_title = movie_id
        title_tag = soup.find('title')
        if title_tag and title_tag.text:
            movie_title = title_tag.text.strip()
        
        # Check if the "Book" button is present on the page by looking for the explicit 'formaction' or class
        book_button = soup.find('input', {'formaction': '/fiche?action=reserver'})
        if not book_button:
            # Fallback search just in case
            book_button = soup.find('input', {'class': 'ButtonFullBigRounded', 'type': 'submit'})
            
        if not book_button:
            logging.info(f"[{movie_title}] Book button not found. No tickets available yet. Skipping.")
            return False
        
        # The book button/form supposedly renders this hidden input when seats are available
        token_input = soup.find('input', {'name': '__RequestVerificationToken'})
        
        if not token_input:
            logging.warning(f"[{movie_title}] Book button found, but RequestVerificationToken is missing. Skipping.")
            return False
            
        token_value = token_input.get('value')
        
        booking_payload = {
            '__RequestVerificationToken': token_value
            # Depending on platform there might be 'id': movie_id needed here, add it if the request fails without it
        }
        
        # Send POST request to finalize the booking
        booking_response = session.post(BOOKING_ACTION_URL, data=booking_payload, allow_redirects=True)
        
        if "This ticket is no longer available" in booking_response.text:
            logging.warning(f"[{movie_title}] Booking failed: Ticket grabbed by someone else or no longer available.")
            return False
        else:
            logging.warning(f"[{movie_title}] Booking seemingly SUCCESSFUL! Check your account to confirm.")
            return True

    except Exception as e:
        logging.error(f"[{movie_id}] Exception occurred while booking: {e}")
        return False

def main():
    parser = argparse.ArgumentParser(description="Cannes Ticket Scraper")
    parser.add_argument("--date", type=int, nargs='+', help="Dates to use for 'available' booking sweeps (e.g. 14 15 16)")
    parser.add_argument("--username", type=str, help="Username to login")
    parser.add_argument("--password", type=str, help="Password to login")
    args = parser.parse_args()

    # Setup persistent session to keep cookies alive
    session = requests.Session()
    # Mask scraper with standard browser user-agent
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
    })
    
    login_result, welcome_key = login_to_cannes(session, args.username, args.password)

    if not login_result:
        logging.critical("Cannot proceed without a successful login. Exiting.")
        sys.exit(1)

    if not args.date:
        logging.error("No dates provided. Exiting.")
        sys.exit(1)

    # Per-date set of IDs seen in the previous listing sweep (so we can spot new ones).
    lastly_seen_per_date: dict[str, set[str]] = {str(d): set() for d in args.date}

    executor = ThreadPoolExecutor(max_workers=MAX_BOOKING_WORKERS)

    # Fire whitelist once at startup. After that, whitelisted IDs that appear in
    # the listing will be picked up through the normal new-id flow.

    try:
        while True:
            for d in args.date:
                d_str = str(d)
                current_ids = list_all_available(session, d_str)
                new_ids = current_ids - lastly_seen_per_date[d_str] - BLACKLIST

                if new_ids:
                    logging.warning(f"[date {d_str}] {len(new_ids)} new movie(s) detected, dispatching booking workers: {new_ids}")
                    for mid in new_ids:
                        executor.submit(try_book_movie, session, mid)

                ## save what we saw this round so that we don't try the same ID twice in the future.
                lastly_seen_per_date[d_str] = current_ids

            time.sleep(LISTING_INTERVAL)
    except KeyboardInterrupt:
        logging.warning("Interrupted, shutting down workers...")
        executor.shutdown(wait=True)

if __name__ == "__main__":
    main()
