from datetime import datetime

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs
import time
import logging
import sys
import argparse
from concurrent.futures import ThreadPoolExecutor

# Configure basic logging to stay informed of the scraper's actions
logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s')

# --- TOP LEVEL VARIABLES ---
LISTING_INTERVAL = 1  # seconds between listing polls (cheap endpoint)
MAX_BOOKING_WORKERS = 16
BOOKING_RETRIES_ON_429 = 3
BOOKING_RETRY_SLEEP = 5  # seconds between booking retries on 429

BLACKLIST : set[str] = {
    # Full Fill - bad dates
    "PIuErCLSYDE=",
    "f37DhI1GWZA=",
    # Papertiger - bad dates
    "ToK8woDWBTA=",
    "U6eIxxUZ13g=",
    "CmzQEk0EGos=",
    # Fjord
    "I41agJEDnoE=",
    # noise
    "WzerQ+4FLxc=",
    "b0yY+v7SJpA=",
    "faRg/ZNcX+0=",
}

# --- ENDPOINTS ---
LOGIN_URL = "https://ticketonline.festival-cannes.com/login"

# Using a generic URL template assuming ID is passed either in URL or as a parameter
# You might need to adjust these paths slightly based on the exact route format
MOVIE_PAGE_URL_TEMPLATE = "https://ticketonline3.festival-cannes.com/fiche?idproj={movie_id}"
BOOKING_ACTION_URL = "https://ticketonline3.festival-cannes.com/fiche?action=reserver"
SEANCES_URL_TEMPLATE = "https://ticketonline3.festival-cannes.com/seances?jour={date}&action=changeDate"

def list_all_available(session: requests.Session, date: str, time_constraints: tuple[datetime | None, datetime | None] = (None, None)) -> set[str]:
    """
    Given a specific date string, requests the list of all available sessions.
    Returns a set of extracted movie (idproj) strings.
    """
    logging.info(f"Listing all available movies for date: {date}")
    available_ids = set()
    start_limit, end_limit = time_constraints
    
    try:
        url = SEANCES_URL_TEMPLATE.format(date=date)
        
        # First GET to retrieve the RequestVerificationToken
        get_response = session.get(url)
        soup_get = BeautifulSoup(get_response.text, 'lxml')
        token_input = soup_get.find('input', {'name': '__RequestVerificationToken'})
        
        post_data = {'voirbilletsdispos': 'on'}
        if token_input:
            post_data['__RequestVerificationToken'] = token_input.get('value')
        
        # Now POST to apply the 'voirbilletsdispos' filter and actually switch the date securely
        response = session.post(url, data=post_data)
        
        if response.status_code != 200:
            logging.warning(f"Non-200 response when listing available tickets: {response.status_code}")
            return available_ids
            
        soup = BeautifulSoup(response.text, 'lxml')
        
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']
            
            # Fast string checks before heavy parsing
            if "fiche" not in href or "idproj=" not in href:
                continue

            # Skip tickets that we already successfully booked
            if a_tag.find('img', {'src': '/content/images/demande-satisfaite.svg'}):
                continue
                
            qs = parse_qs(urlparse(href).query)
            movie_id = qs.get('idproj', [None])[0]
            
            # Skip blacklisted IDs that we know are bad from previous runs or manual checks
            if not movie_id or movie_id in BLACKLIST:
                continue
                
            # Check Time Constraints
            if start_limit or end_limit:
                heure_div = a_tag.find('div', class_='Heure')
                if not heure_div:
                    continue
                    
                labels = heure_div.find_all('label')
                if len(labels) < 3:
                    continue
                    
                start_time_str = labels[0].text.strip()
                end_time_str = labels[2].text.strip()
                
                try:
                    # Try 24h format first, fallback to 12h AM/PM
                    if start_time_str.lower().endswith(('am', 'pm')) or end_time_str.lower().endswith(('am', 'pm')):
                        start_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {start_time_str}", "%Y-%m-%d %I:%M %p")
                        end_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {end_time_str}", "%Y-%m-%d %I:%M %p")
                    else:
                        start_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {start_time_str}", "%Y-%m-%d %H:%M")
                        end_dt = datetime.strptime(f"2024-05-{date.zfill(2)} {end_time_str}", "%Y-%m-%d %H:%M")
                        
                    if start_limit and start_dt < start_limit:
                        continue
                    if end_limit and end_dt > end_limit:
                        continue
                except Exception as e:
                    logging.error(f"Error parsing times for {movie_id}: {e}")
                    continue

            available_ids.add(movie_id)
                    
    except Exception as e:
        logging.error(f"Error while listing available tickets for {date}: {e}")
        
    return available_ids

def login_to_cannes(session: requests.Session, username: str, password: str) -> tuple[bool, str | None]:
    logging.info("Initiating login sequence...")
    try:
        # First GET request to grab potential anti-CSRF token on the login page itself
        response = session.get(LOGIN_URL)
        soup = BeautifulSoup(response.text, 'lxml')
        
        login_payload = {
            'UserName': username,
            'Password': password
        }
        
        # If there's a RequestVerificationToken on the login page, include it
        token_input = soup.find('input', {'name': '__RequestVerificationToken'})
        if token_input:
            login_payload['__RequestVerificationToken'] = token_input.get('value')

        login_response = session.post(LOGIN_URL, data=login_payload, allow_redirects=True)
        
        # We look out for a redirect to welcome?key=... as indicated
        parsed_url = urlparse(login_response.url)
        if "welcome" in parsed_url.path and "key" in parse_qs(parsed_url.query):
            welcome_key = parse_qs(parsed_url.query)['key'][0]
            logging.info(f"Successfully logged in. Extracted welcome key: {welcome_key}")
            return True, welcome_key
        elif login_response.status_code == 200:
            logging.info("Successfully logged in, but no welcome key found in the URL.")
            return True, None
        else:
            logging.error(f"Login might have failed. Current URL after POST: {login_response.url}")
            return False, None
            
    except Exception as e:
        logging.error(f"An error occurred during login: {e}")
        return False, None

def try_book_movie(session: requests.Session, movie_id: str) -> bool:
    """
    Checks the movie page. If seats are available, extracts token and POSTs to book.
    Returns True if successfully booked, False otherwise.
    Retries a few times on 429 since this is our one shot per newly-appearing ID.
    """
    movie_page_url = MOVIE_PAGE_URL_TEMPLATE.format(movie_id=movie_id)

    try:
        response = None
        for attempt in range(BOOKING_RETRIES_ON_429):
            response = session.get(movie_page_url)
            if response.status_code != 429:
                break
            logging.warning(f"[{movie_id}] 429 on fiche fetch (attempt {attempt + 1}/{BOOKING_RETRIES_ON_429}), backing off {BOOKING_RETRY_SLEEP}s")
            time.sleep(BOOKING_RETRY_SLEEP)

        if response is None:
            logging.warning(f"[{movie_id}] returned: no response")
            return False
        if response.status_code != 200:
            logging.warning(f"[{movie_id}] returned: {response.status_code}")
            return False

        soup = BeautifulSoup(response.text, 'lxml')
        
        movie_title = movie_id
        title_tag = soup.find('title')
        if title_tag and title_tag.text:
            movie_title = title_tag.text.strip()
        
        # Check if the "Book" button is present on the page by looking for the explicit 'formaction' or class
        book_button = soup.find('input', {'formaction': '/fiche?action=reserver'})
        if not book_button:
            # Fallback search just in case
            book_button = soup.find('input', {'class': 'ButtonFullBigRounded', 'type': 'submit'})
            
        if not book_button:
            logging.info(f"[{movie_title}] Book button not found. No tickets available yet. Skipping.")
            return False
        
        # The book button/form supposedly renders this hidden input when seats are available
        token_input = soup.find('input', {'name': '__RequestVerificationToken'})
        
        if not token_input:
            logging.warning(f"[{movie_title}] Book button found, but RequestVerificationToken is missing. Skipping.")
            return False
            
        token_value = token_input.get('value')
        
        booking_payload = {
            '__RequestVerificationToken': token_value
            # Depending on platform there might be 'id': movie_id needed here, add it if the request fails without it
        }
        
        # Send POST request to finalize the booking
        booking_response = session.post(BOOKING_ACTION_URL, data=booking_payload, allow_redirects=True)
        
        if "This ticket is no longer available" in booking_response.text:
            logging.warning(f"[{movie_title}] Booking failed: Ticket grabbed by someone else or no longer available.")
            return False
        else:
            logging.warning(f"[{movie_title}] Booking seemingly SUCCESSFUL! Check your account to confirm.")
            return True

    except Exception as e:
        logging.error(f"[{movie_id}] Exception occurred while booking: {e}")
        return False

def main():
    parser = argparse.ArgumentParser(description="Cannes Ticket Scraper")
    parser.add_argument("--date", type=str, nargs='+', help="Dates to use for 'available' booking sweeps. Formats: DD or DD,START,END (e.g. 14 or 15,10:00,16:00 or 16,None,12:00)")
    parser.add_argument("--username", type=str, help="Username to login")
    parser.add_argument("--password", type=str, help="Password to login")
    args = parser.parse_args()

    # Setup persistent session to keep cookies alive
    session = requests.Session()
    # Mask scraper with standard browser user-agent
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
    })
    
    login_result, welcome_key = login_to_cannes(session, args.username, args.password)

    if not login_result:
        logging.critical("Cannot proceed without a successful login. Exiting.")
        sys.exit(1)

    if not args.date:
        logging.error("No dates provided. Exiting.")
        sys.exit(1)

    parsed_dates = {}
    for d_arg in args.date:
        parts = d_arg.split(',')
        date_str = parts[0]
        start_dt = None
        end_dt = None
        
        if len(parts) >= 3:
            start_str, end_str = parts[1], parts[2]
            if start_str and start_str.lower() != 'none':
                start_dt = datetime.strptime(f"2024-05-{date_str.zfill(2)} {start_str}", "%Y-%m-%d %H:%M")
            if end_str and end_str.lower() != 'none':
                end_dt = datetime.strptime(f"2024-05-{date_str.zfill(2)} {end_str}", "%Y-%m-%d %H:%M")
                
        parsed_dates[date_str] = (start_dt, end_dt)

    # Per-date set of IDs seen in the previous listing sweep (so we can spot new ones).
    lastly_seen_per_date: dict[str, set[str]] = {d: set() for d in parsed_dates.keys()}

    executor = ThreadPoolExecutor(max_workers=MAX_BOOKING_WORKERS)

    # Fire whitelist once at startup. After that, whitelisted IDs that appear in
    # the listing will be picked up through the normal new-id flow.

    ## print parsed dates for confirmation
    logging.critical("Monitoring the following dates with constraints:")
    for d_str, (start, end) in parsed_dates.items():
        logging.critical(f"  - {d_str}: start={start.time() if start else 'None'}, end={end.time() if end else 'None'}")

    try:
        while True:
            for d_str, constraints in parsed_dates.items():
                current_ids = list_all_available(session, d_str, constraints)
                new_ids = current_ids - lastly_seen_per_date[d_str] - BLACKLIST

                if new_ids:
                    logging.warning(f"[date {d_str}] {len(new_ids)} new movie(s) detected, dispatching booking workers: {new_ids}")
                    for mid in new_ids:
                        executor.submit(try_book_movie, session, mid)

                ## save what we saw this round so that we don't try the same ID twice in the future.
                lastly_seen_per_date[d_str] = current_ids

            time.sleep(LISTING_INTERVAL)
    except KeyboardInterrupt:
        logging.warning("Interrupted, shutting down workers...")
        executor.shutdown(wait=True)

if __name__ == "__main__":
    main()