#!/usr/bin/env python3
"""
British Gas Tariff Scraper

This script uses Playwright to navigate the British Gas website and extract
current tariff information for different UK regions.

Usage:
    python british_gas_scraper.py                    # Scrape all regions
    python british_gas_scraper.py --postcode SW1A1AA # Scrape single postcode
    python british_gas_scraper.py --health-check     # Run health check only

Requirements:
    pip install playwright
    playwright install chromium
"""

import json
import logging
import argparse
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

try:
    from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
    PLAYWRIGHT_AVAILABLE = True
except ImportError:
    PLAYWRIGHT_AVAILABLE = False
    print("Playwright not installed. Run: pip install playwright && playwright install chromium")


# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)


class BritishGasScraper:
    """Scrapes tariff information from the British Gas website."""

    def __init__(self, config_path: str = "config.json", headless: bool = True, debug: bool = False):
        """Initialise the scraper with configuration."""
        self.config = self._load_config(config_path)
        self.base_url = self.config["british_gas"]["base_url"]
        self.quote_url = self.config["british_gas"]["quote_url"]
        self.timeout = self.config["british_gas"]["timeout_ms"]
        self.headless = headless
        self.debug = debug
        self.results = []

    def _load_config(self, config_path: str) -> dict:
        """Load configuration from JSON file."""
        config_file = Path(__file__).parent / config_path
        with open(config_file, 'r') as f:
            return json.load(f)

    def _save_results(self, data: dict, filename: str):
        """Save results to JSON file."""
        output_path = Path(__file__).parent / filename
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, 'w') as f:
            json.dump(data, f, indent=2)
        logger.info(f"Results saved to {output_path}")

    def _clear_debug_screenshots(self):
        """Clear old debug screenshots to prevent storage buildup."""
        output_dir = Path(__file__).parent / "output"
        if not output_dir.exists():
            return

        count = 0
        for screenshot in output_dir.glob("debug_*.png"):
            try:
                screenshot.unlink()
                count += 1
            except Exception as e:
                logger.debug(f"Could not delete {screenshot}: {e}")

        if count > 0:
            logger.info(f"Cleared {count} old debug screenshot(s)")

    def scrape_postcode(self, postcode: str, region: str = "Unknown") -> Optional[dict]:
        """
        Scrape tariff information for a specific postcode.

        Args:
            postcode: UK postcode (e.g., "SW1A 1AA")
            region: Region name for labelling

        Returns:
            Dictionary containing tariff information, or None if failed
        """
        if not PLAYWRIGHT_AVAILABLE:
            logger.error("Playwright not available")
            return None

        # Normalise postcode (remove spaces)
        postcode_clean = postcode.replace(" ", "").upper()

        logger.info(f"Scraping tariffs for {postcode} ({region})")

        result = {
            "postcode": postcode,
            "region": region,
            "scraped_at": datetime.now(timezone.utc).isoformat(),
            "success": False,
            "tariffs": [],
            "ev_tariff": None,
            "error": None
        }

        try:
            with sync_playwright() as p:
                # Launch browser with anti-detection settings
                browser = p.chromium.launch(
                    headless=self.headless,
                    slow_mo=500 if not self.headless else 0,
                    args=[
                        '--disable-blink-features=AutomationControlled',
                        '--disable-dev-shm-usage',
                        '--no-sandbox'
                    ]
                )
                context = browser.new_context(
                    user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
                    viewport={"width": 1920, "height": 1080},
                    locale="en-GB",
                    timezone_id="Europe/London",
                    geolocation={"latitude": 51.5074, "longitude": -0.1278},
                    permissions=["geolocation"]
                )

                # Add script to mask automation detection
                context.add_init_script("""
                    Object.defineProperty(navigator, 'webdriver', {
                        get: () => undefined
                    });
                """)

                page = context.new_page()

                # Set default timeout
                page.set_default_timeout(self.timeout)

                # Navigate to the quote page
                # Use "domcontentloaded" instead of "networkidle" as tracking scripts
                # can prevent networkidle from ever triggering
                logger.info(f"Navigating to {self.quote_url}")
                page.goto(self.quote_url, wait_until="domcontentloaded")

                # Give the page time to fully render (React app needs time to hydrate)
                page.wait_for_timeout(5000)

                # Handle cookie consent if present
                self._handle_cookie_consent(page)

                # Additional wait after cookies for any re-renders
                page.wait_for_timeout(2000)

                # Enter postcode and start quote journey
                journey_result = self._navigate_quote_journey(page, postcode_clean)

                standard_tariffs = journey_result.get("standard_tariffs", [])
                ev_tariff = journey_result.get("ev_tariff")

                if standard_tariffs:
                    # Filter out debug entries like PAGE_CONTENT_SAMPLE
                    real_tariffs = [t for t in standard_tariffs if t.get("name") != "PAGE_CONTENT_SAMPLE"]
                    if real_tariffs:
                        result["tariffs"] = real_tariffs
                        result["ev_tariff"] = ev_tariff
                        result["success"] = True
                        ev_status = "with EV tariff" if ev_tariff else "no EV tariff"
                        logger.info(f"Found {len(real_tariffs)} tariffs for {postcode} ({ev_status})")
                    else:
                        result["error"] = "Meter details required - address needs MPAN/MPRN"
                        logger.warning(f"Meter details required for {postcode}")
                else:
                    result["error"] = "No tariffs found - address may have limited options"
                    logger.warning(f"No tariffs found for {postcode}")

                browser.close()

        except PlaywrightTimeout as e:
            result["error"] = f"Timeout: {str(e)}"
            logger.error(f"Timeout scraping {postcode}: {e}")
        except Exception as e:
            result["error"] = f"Error: {str(e)}"
            logger.error(f"Error scraping {postcode}: {e}")

        return result

    def _handle_cookie_consent(self, page):
        """Handle cookie consent popup if present."""
        try:
            # Look for common cookie consent button patterns
            cookie_selectors = [
                "button:has-text('Accept all')",
                "button:has-text('Accept All')",
                "button:has-text('Accept cookies')",
                "#onetrust-accept-btn-handler",
                "[data-testid='cookie-accept']",
                ".cookie-accept-button"
            ]

            for selector in cookie_selectors:
                try:
                    button = page.locator(selector).first
                    if button.is_visible(timeout=2000):
                        button.click()
                        logger.info("Accepted cookies")
                        page.wait_for_timeout(1000)
                        return
                except:
                    continue

        except Exception as e:
            logger.debug(f"Cookie handling: {e}")

    def _navigate_quote_journey(self, page, postcode: str) -> dict:
        """
        Navigate through the British Gas quote journey to extract tariffs.

        Flow:
        1. Enter postcode → Click "Find address"
        2. Select address from dropdown → Click "Choose this address"
        3. Fill energy needs form → Click "Continue"
        4. Extract tariffs from results page
        5. Go back, select EV=Yes → Click "Continue"
        6. Extract EV tariff from EV Power page

        Returns:
            dict with 'standard_tariffs' list and 'ev_tariff' dict
        """
        result = {
            "standard_tariffs": [],
            "ev_tariff": None
        }

        try:
            # ============================================================
            # STEP 1: Enter postcode and click "Find address"
            # ============================================================
            logger.info("Step 1: Entering postcode")

            postcode_input = page.locator("input[name='postcode']")
            if not postcode_input.is_visible(timeout=5000):
                self._save_debug_screenshot(page, "no_postcode_input")
                logger.error("Could not find postcode input field")
                return result

            postcode_input.fill(postcode)
            logger.info(f"Entered postcode: {postcode}")

            # Click "Find address" button (custom web component)
            find_address_selectors = [
                "#find-address-button",
                "address-selector-cta[type='submit']",
                "address-selector-cta:has-text('Find address')",
                "button:has-text('Find address')",
                "ns-cta:has-text('Find address')",
                "[type='submit']:has-text('Find address')"
            ]

            find_clicked = False
            for selector in find_address_selectors:
                try:
                    btn = page.locator(selector).first
                    if btn.is_visible(timeout=2000):
                        btn.click()
                        logger.info(f"Clicked 'Find address' button ({selector})")
                        find_clicked = True
                        # Wait for address lookup API to complete
                        page.wait_for_timeout(4000)
                        break
                except:
                    continue

            if not find_clicked:
                self._save_debug_screenshot(page, "no_find_address_button")
                logger.error("Could not find 'Find address' button")
                return result

            # ============================================================
            # STEP 2: Select address (dropdown, radio buttons, or single)
            # ============================================================
            logger.info("Step 2: Selecting address")

            # Take screenshot to see current state
            self._save_debug_screenshot(page, "after_find_address")

            # Wait for address options to appear (API call may take time)
            page.wait_for_timeout(3000)

            address_selected = False

            # OPTION A: Try dropdown first (3+ addresses)
            dropdown_selectors = [
                "#address-select",
                "select[name='address']",
                "address-selector-inputter select",
                "select"
            ]

            for selector in dropdown_selectors:
                try:
                    dropdown = page.locator(selector).first
                    if dropdown.is_visible(timeout=2000):
                        # Get available options (excluding placeholder)
                        options = dropdown.locator("option").all()
                        valid_options = []
                        for opt in options:
                            val = opt.get_attribute("value")
                            # Skip placeholder options (empty value or "select" text)
                            if val and val != "":
                                valid_options.append(opt)

                        logger.info(f"Found dropdown with {len(valid_options)} valid address options")

                        if valid_options:
                            # Select the first valid address option
                            first_value = valid_options[0].get_attribute("value")
                            dropdown.select_option(value=first_value)
                            selected_text = dropdown.evaluate("el => el.options[el.selectedIndex].text")
                            logger.info(f"Selected address from dropdown: {selected_text}")
                            address_selected = True
                            break
                except Exception as e:
                    logger.debug(f"Dropdown selector {selector} failed: {e}")
                    continue

            # OPTION B: Try radio buttons (2-4 addresses)
            if not address_selected:
                try:
                    # Look for radio buttons for address selection
                    # Based on actual markup: <input type="radio" name="address" id="id0" value="0">
                    radio_selectors = [
                        "address-selector-inputter input[type='radio']",
                        "input[type='radio'][name='address']",
                        "input[type='radio'][id^='id']",
                        "input[type='radio']"
                    ]

                    for selector in radio_selectors:
                        radios = page.locator(selector).all()
                        if radios and len(radios) >= 1:
                            # Click the first radio button
                            radios[0].click()
                            logger.info(f"Selected address via radio button (found {len(radios)} options)")
                            address_selected = True
                            page.wait_for_timeout(500)  # Wait for selection to register
                            break
                except Exception as e:
                    logger.debug(f"Radio button selection failed: {e}")

            # OPTION C: Single address - no selection needed
            if not address_selected:
                logger.info("No dropdown or radio buttons found - assuming single address")
                # Just proceed to click "Choose this address"

            page.wait_for_timeout(1000)

            # Click "Choose this address" button (may be custom component)
            choose_address_selectors = [
                "address-selector-cta:has-text('Choose this address')",
                "[type='submit']:has-text('Choose this address')",
                "button:has-text('Choose this address')",
                "ns-cta:has-text('Choose this address')",
                # Fallback to any submit-type element with similar text
                ":has-text('Choose this address')"
            ]

            choose_clicked = False
            for selector in choose_address_selectors:
                try:
                    btn = page.locator(selector).first
                    if btn.is_visible(timeout=2000):
                        btn.click()
                        logger.info(f"Clicked 'Choose this address' button ({selector})")
                        choose_clicked = True
                        page.wait_for_timeout(3000)
                        break
                except:
                    continue

            if not choose_clicked:
                # Try alternative button texts
                alt_buttons = ["Continue", "Next", "Confirm"]
                for btn_text in alt_buttons:
                    try:
                        btn = page.locator(f"address-selector-cta:has-text('{btn_text}'), button:has-text('{btn_text}'), ns-cta:has-text('{btn_text}')").first
                        if btn.is_visible(timeout=1000):
                            btn.click()
                            logger.info(f"Clicked '{btn_text}' button")
                            page.wait_for_timeout(3000)
                            break
                    except:
                        continue

            # ============================================================
            # STEP 3: Fill energy needs form
            # ============================================================
            logger.info("Step 3: Filling energy needs form")

            self._save_debug_screenshot(page, "energy_needs_form")

            # Check if we're stuck on the "meter details required" page
            # This happens when British Gas can't auto-detect the meter
            page_text = page.locator("body").text_content() or ""

            meter_required_phrases = [
                "We need details about your meters",
                "we need your meter reference numbers",
                "MPAN for electricity",
                "MPRN for gas"
            ]

            if any(phrase.lower() in page_text.lower() for phrase in meter_required_phrases):
                logger.warning("Meter details required - British Gas cannot auto-detect meter for this address")
                return []  # Return empty - will be marked as error in calling code

            # The form should have these already selected by default:
            # - Gas & Electricity (selected)
            # - Actual usage (selected)
            # - No EV tariff (selected)

            # For simplicity, we'll use "Medium" usage instead of "Actual"
            # to avoid needing to enter specific kWh values
            try:
                medium_radio = page.locator("input[value='Medium'], label:has-text('Medium')").first
                if medium_radio.is_visible(timeout=2000):
                    medium_radio.click()
                    logger.info("Selected 'Medium' usage option")
                    page.wait_for_timeout(500)
            except Exception as e:
                logger.debug(f"Could not select Medium usage: {e}")

            # ============================================================
            # STEP 3b: First get EV tariff (select Yes, then go back for standard)
            # ============================================================
            # Select "Yes" for EV tariff question first
            # Markup: <input id="electricVehicle-true" type="radio" value="true" name="electricVehicle">
            #         <label for="electricVehicle-true">Yes</label>

            # Take screenshot to see the EV options
            self._save_debug_screenshot(page, "before_ev_selection")

            ev_selectors = [
                # Try clicking the label (more reliable for custom components)
                "label[for='electricVehicle-true']",
                # Try the radio input directly
                "input#electricVehicle-true",
                "input[name='electricVehicle'][value='true']",
                "ns-inputter[name='electricVehicle'] input[value='true']",
                # Try clicking text
                "text=Yes"
            ]

            ev_selected = False
            for selector in ev_selectors:
                try:
                    element = page.locator(selector).first
                    if element.is_visible(timeout=2000):
                        element.click()
                        logger.info(f"Selected 'Yes' for EV tariff using: {selector}")
                        ev_selected = True
                        page.wait_for_timeout(500)
                        break
                except Exception as e:
                    logger.debug(f"EV selector {selector} failed: {e}")
                    continue

            # Fallback: try JavaScript click
            if not ev_selected:
                try:
                    result_js = page.evaluate("""
                        () => {
                            // Try to find and click the EV Yes radio
                            const radio = document.querySelector('input#electricVehicle-true') ||
                                         document.querySelector('input[name="electricVehicle"][value="true"]');
                            if (radio) {
                                radio.click();
                                return 'clicked radio';
                            }
                            const label = document.querySelector('label[for="electricVehicle-true"]');
                            if (label) {
                                label.click();
                                return 'clicked label';
                            }
                            // List what we found for debugging
                            const inputs = document.querySelectorAll('input[type="radio"]');
                            return 'found ' + inputs.length + ' radio buttons: ' +
                                   Array.from(inputs).map(i => i.name + '=' + i.value).join(', ');
                        }
                    """)
                    logger.info(f"JavaScript EV selection result: {result_js}")
                    if 'clicked' in result_js:
                        ev_selected = True
                        page.wait_for_timeout(500)
                except Exception as e:
                    logger.debug(f"JavaScript EV selection failed: {e}")

            if ev_selected:
                # Click Continue to get EV tariff page
                continue_btn = page.locator("button:has-text('Continue'), ns-cta:has-text('Continue')")
                if continue_btn.first.is_visible(timeout=3000):
                    continue_btn.first.click()
                    logger.info("Clicked 'Continue' for EV tariff")
                    page.wait_for_timeout(10000)  # Wait for EV tariff page to load

                    # Extract EV tariff
                    logger.info("Step 4: Extracting EV tariff")
                    self._save_debug_screenshot(page, f"ev_tariff_{postcode}")
                    result["ev_tariff"] = self._extract_ev_tariff_from_page(page)

                    if result["ev_tariff"]:
                        logger.info("Successfully extracted EV tariff")
                    else:
                        logger.warning("Could not extract EV tariff details")

                    # Go back to get standard tariffs
                    logger.info("Step 5: Going back for standard tariffs")

                    # Click the Back button on the page (not browser back - this is a SPA)
                    back_clicked = False
                    back_selectors = [
                        "a:has-text('Back')",
                        "button:has-text('Back')",
                        "ns-cta:has-text('Back')",
                        "[class*='back']",
                        "text=Back"
                    ]

                    for selector in back_selectors:
                        try:
                            back_btn = page.locator(selector).first
                            if back_btn.is_visible(timeout=2000):
                                back_btn.click()
                                logger.info(f"Clicked Back button using: {selector}")
                                back_clicked = True
                                page.wait_for_timeout(3000)
                                break
                        except Exception as e:
                            logger.debug(f"Back selector {selector} failed: {e}")
                            continue

                    if not back_clicked:
                        # Fallback to browser back
                        logger.info("No Back button found, trying browser back")
                        page.go_back()
                        page.wait_for_timeout(3000)

                    self._save_debug_screenshot(page, "after_ev_goback")

                    # Select "No" for EV tariff (to get standard tariffs)
                    ev_no_selectors = [
                        # Try clicking the label first (this worked for Yes)
                        "label[for='electricVehicle-false']",
                        "input#electricVehicle-false",
                        "input[name='electricVehicle'][value='false']",
                        "ns-inputter[name='electricVehicle'] input[value='false']"
                    ]

                    ev_no_selected = False
                    for selector in ev_no_selectors:
                        try:
                            ev_no_radio = page.locator(selector).first
                            if ev_no_radio.is_visible(timeout=2000):
                                ev_no_radio.click()
                                logger.info(f"Selected 'No' for EV tariff using: {selector}")
                                ev_no_selected = True
                                page.wait_for_timeout(500)
                                break
                        except Exception as e:
                            logger.debug(f"EV No selector {selector} failed: {e}")
                            continue

                    if not ev_no_selected:
                        logger.warning("Could not select 'No' for EV - standard tariffs may show EV page again")

                    # Click Continue for standard tariffs
                    continue_btn = page.locator("button:has-text('Continue'), ns-cta:has-text('Continue')")
                    if continue_btn.first.is_visible(timeout=3000):
                        continue_btn.first.click()
                        logger.info("Clicked 'Continue' for standard tariffs")
                        page.wait_for_timeout(10000)
            else:
                logger.info("No EV option found - getting standard tariffs only")
                # Click Continue button for standard tariffs
                continue_btn = page.locator("button:has-text('Continue'), ns-cta:has-text('Continue')")
                if continue_btn.first.is_visible(timeout=3000):
                    continue_btn.first.click()
                    logger.info("Clicked 'Continue' button on energy needs form")
                    page.wait_for_timeout(10000)
                else:
                    self._save_debug_screenshot(page, "no_continue_button")
                    logger.error("Could not find 'Continue' button")
                    return result

            # ============================================================
            # STEP 6: Extract standard tariffs from results page
            # ============================================================
            logger.info("Step 6: Extracting standard tariffs")

            self._save_debug_screenshot(page, f"tariff_results_{postcode}")

            result["standard_tariffs"] = self._extract_tariffs_from_page(page)

        except Exception as e:
            logger.error(f"Error in quote journey: {e}")
            self._save_debug_screenshot(page, "error_state")

        return result

    def _extract_tariffs_from_page(self, page) -> list:
        """
        Extract tariff information from the British Gas results page.

        Looks for ns-labs-product-card elements which contain:
        - H2: Tariff name
        - div[slot="price"]: Monthly and annual costs
        - Expandable details with unit rates and standing charges
        """
        tariffs = []

        # British Gas uses custom web components
        tariff_card_selectors = [
            "ns-labs-product-card",
            "ns-product-card",
            "[class*='product-card']",
            "[class*='tariff-card']"
        ]

        for selector in tariff_card_selectors:
            try:
                cards = page.locator(selector).all()
                if cards:
                    logger.info(f"Found {len(cards)} tariff cards with selector: {selector}")
                    for i, card in enumerate(cards):
                        tariff = self._parse_british_gas_card(card, i)
                        if tariff:
                            tariffs.append(tariff)
                    break
            except Exception as e:
                logger.debug(f"Selector {selector} failed: {e}")
                continue

        # If no tariff cards found, try to extract from page text
        if not tariffs:
            logger.info("No tariff cards found, attempting text extraction")
            tariffs = self._extract_tariffs_from_text(page)

        return tariffs

    def _parse_british_gas_card(self, card, index: int) -> Optional[dict]:
        """
        Parse a British Gas ns-labs-product-card element.

        Expected structure:
        - H2: Tariff name
        - div[slot="price"]: Contains monthly and annual costs
        - Expandable section with unit rates and standing charges
        """
        import re

        tariff = {
            "index": index,
            "name": None,
            "type": None,
            "monthly_cost": None,
            "annual_cost": None,
            "electricity": {
                "unit_rate_pence": None,
                "standing_charge_pence": None
            },
            "gas": {
                "unit_rate_pence": None,
                "standing_charge_pence": None
            },
            "features": []
        }

        try:
            # Extract tariff name from H2
            try:
                h2 = card.locator("h2").first
                if h2.is_visible():
                    tariff["name"] = h2.text_content().strip()
                    logger.info(f"  Tariff {index}: {tariff['name']}")
            except:
                pass

            # Extract price from div[slot="price"]
            try:
                price_div = card.locator("div[slot='price']").first
                if price_div.is_visible():
                    price_text = price_div.text_content()

                    # Extract annual cost (e.g., "£632.27 / year")
                    annual_match = re.search(r'£([\d,]+\.?\d*)\s*/\s*year', price_text)
                    if annual_match:
                        tariff["annual_cost"] = float(annual_match.group(1).replace(',', ''))

                    # Extract monthly cost (e.g., "£52.69 / month")
                    monthly_match = re.search(r'£([\d,]+\.?\d*)\s*/\s*month', price_text)
                    if monthly_match:
                        tariff["monthly_cost"] = float(monthly_match.group(1).replace(',', ''))
            except Exception as e:
                logger.debug(f"Could not extract price: {e}")

            # Try to expand details and extract unit rates
            try:
                # Click the "See tariff details" button to expand
                details_btn = card.locator("[aria-label*='tariff details'], .cta:has-text('tariff details')")
                if details_btn.first.is_visible(timeout=1000):
                    details_btn.first.click()
                    card.page.wait_for_timeout(500)
            except:
                pass

            # Extract unit rates and standing charges from expanded details
            try:
                details_content = card.locator("ns-content[slot='details'], [slot='details']").first
                if details_content.is_visible(timeout=1000):
                    details_text = details_content.text_content()

                    # Extract electricity rates
                    # Pattern: "Unit rate" followed by rate like "25.9p per kWh"
                    elec_unit_match = re.search(r'Electricity.*?Unit rate.*?([\d.]+)p\s*per\s*kWh', details_text, re.IGNORECASE | re.DOTALL)
                    if elec_unit_match:
                        tariff["electricity"]["unit_rate_pence"] = float(elec_unit_match.group(1))

                    # Pattern: "Standing charge" followed by rate like "45.374p per day"
                    elec_standing_match = re.search(r'Electricity.*?Standing charge.*?([\d.]+)p\s*per\s*day', details_text, re.IGNORECASE | re.DOTALL)
                    if elec_standing_match:
                        tariff["electricity"]["standing_charge_pence"] = float(elec_standing_match.group(1))

                    # Extract gas rates
                    gas_unit_match = re.search(r'Gas.*?Unit rate.*?([\d.]+)p\s*per\s*kWh', details_text, re.IGNORECASE | re.DOTALL)
                    if gas_unit_match:
                        tariff["gas"]["unit_rate_pence"] = float(gas_unit_match.group(1))

                    gas_standing_match = re.search(r'Gas.*?Standing charge.*?([\d.]+)p\s*per\s*day', details_text, re.IGNORECASE | re.DOTALL)
                    if gas_standing_match:
                        tariff["gas"]["standing_charge_pence"] = float(gas_standing_match.group(1))

            except Exception as e:
                logger.debug(f"Could not extract details: {e}")

            # Determine tariff type from name
            if tariff["name"]:
                name_lower = tariff["name"].lower()
                if "fixed" in name_lower:
                    tariff["type"] = "Fixed"
                elif "variable" in name_lower or "flexible" in name_lower:
                    tariff["type"] = "Variable"
                elif "ev" in name_lower or "electric vehicle" in name_lower:
                    tariff["type"] = "EV"
                elif "prepay" in name_lower:
                    tariff["type"] = "Prepayment"

            # Only return if we got meaningful data
            if tariff["name"] or tariff["annual_cost"]:
                return tariff
            return None

        except Exception as e:
            logger.debug(f"Error parsing tariff card: {e}")
            return None

    def _extract_ev_tariff_from_page(self, page) -> Optional[dict]:
        """
        Extract EV Power tariff information from the British Gas EV tariff page.

        The EV page uses ns-card elements with:
        - h3[slot="heading"]: "Electricity" or "Gas"
        - dl.dl-row with dt/dd pairs for rates
        """
        import re

        ev_tariff = {
            "name": "EV Power Tariff",
            "type": "EV",
            "electricity": {
                "peak_unit_rate_pence": None,
                "off_peak_unit_rate_pence": None,
                "standing_charge_pence": None
            },
            "gas": {
                "unit_rate_pence": None,
                "standing_charge_pence": None
            }
        }

        try:
            # Find all ns-card elements on the page
            cards = page.locator("ns-card").all()
            logger.info(f"Found {len(cards)} ns-card elements on EV page")

            for card in cards:
                try:
                    # Get the heading (Electricity or Gas)
                    heading = card.locator("h3[slot='heading'], h3").first
                    if not heading.is_visible(timeout=1000):
                        continue

                    heading_text = heading.text_content().strip().lower()

                    # Get all dt/dd pairs from the description list
                    dl_items = card.locator("dl.dl-row div, dl div").all()

                    for item in dl_items:
                        try:
                            dt = item.locator("dt").first
                            dd = item.locator("dd").first

                            if not dt.is_visible(timeout=500) or not dd.is_visible(timeout=500):
                                continue

                            label = dt.text_content().strip().lower()
                            value_text = dd.text_content().strip()

                            # Extract numeric value (e.g., "30.560p per kWh" -> 30.560)
                            rate_match = re.search(r'([\d.]+)p', value_text)
                            if not rate_match:
                                continue

                            rate = float(rate_match.group(1))

                            if "electricity" in heading_text:
                                if "peak" in label and "off" not in label:
                                    ev_tariff["electricity"]["peak_unit_rate_pence"] = rate
                                    logger.info(f"  EV Electricity peak rate: {rate}p/kWh")
                                elif "off-peak" in label or "off peak" in label:
                                    ev_tariff["electricity"]["off_peak_unit_rate_pence"] = rate
                                    logger.info(f"  EV Electricity off-peak rate: {rate}p/kWh")
                                elif "standing" in label:
                                    ev_tariff["electricity"]["standing_charge_pence"] = rate
                                    logger.info(f"  EV Electricity standing charge: {rate}p/day")

                            elif "gas" in heading_text:
                                if "unit" in label and "standing" not in label:
                                    ev_tariff["gas"]["unit_rate_pence"] = rate
                                    logger.info(f"  EV Gas unit rate: {rate}p/kWh")
                                elif "standing" in label:
                                    ev_tariff["gas"]["standing_charge_pence"] = rate
                                    logger.info(f"  EV Gas standing charge: {rate}p/day")

                        except Exception as e:
                            logger.debug(f"Error parsing rate item: {e}")
                            continue

                except Exception as e:
                    logger.debug(f"Error parsing card: {e}")
                    continue

            # Check if we got meaningful EV data
            if (ev_tariff["electricity"]["peak_unit_rate_pence"] or
                ev_tariff["electricity"]["off_peak_unit_rate_pence"]):
                return ev_tariff

        except Exception as e:
            logger.error(f"Error extracting EV tariff: {e}")

        return None

    def _extract_tariffs_from_text(self, page) -> list:
        """
        Fallback method to extract tariff info from page text content.
        Useful for understanding the page structure.
        """
        tariffs = []

        try:
            # Get all text from the page body
            body_text = page.locator("body").text_content()

            # Log a sample for debugging
            if body_text:
                sample = body_text[:2000].replace('\n', ' ').strip()
                logger.info(f"Page text sample: {sample[:500]}...")

                # Store page info for analysis
                tariffs.append({
                    "name": "PAGE_CONTENT_SAMPLE",
                    "raw_text": sample,
                    "note": "This is raw page content for structure analysis"
                })

        except Exception as e:
            logger.error(f"Error extracting page text: {e}")

        return tariffs

    def _save_debug_screenshot(self, page, name: str):
        """Save a screenshot for debugging (only when debug mode is enabled)."""
        if not self.debug:
            return
        try:
            output_path = Path(__file__).parent / "output" / f"debug_{name}.png"
            output_path.parent.mkdir(parents=True, exist_ok=True)
            page.screenshot(path=str(output_path))
            logger.info(f"Debug screenshot saved: {output_path}")
        except Exception as e:
            logger.debug(f"Could not save screenshot: {e}")

    def scrape_all_regions(self) -> dict:
        """
        Scrape tariffs for all configured regions.

        Tries multiple postcodes per region until one succeeds.
        This handles cases where some addresses require meter details.
        """
        results = {
            "scraped_at": datetime.now(timezone.utc).isoformat(),
            "regions": []
        }

        for address_config in self.config["test_addresses"]:
            region = address_config["region"]

            # Support both old format (single postcode) and new format (list of postcodes)
            postcodes = address_config.get("postcodes", [])
            if not postcodes and "postcode" in address_config:
                postcodes = [address_config["postcode"]]

            logger.info(f"=== Processing {region} ({len(postcodes)} postcodes to try) ===")

            # Try each postcode until one succeeds
            region_result = None
            for i, postcode in enumerate(postcodes):
                logger.info(f"Trying postcode {i+1}/{len(postcodes)}: {postcode}")

                result = self.scrape_postcode(postcode=postcode, region=region)

                if result and result.get("success"):
                    logger.info(f"✓ Success with {postcode} for {region}")
                    result["postcodes_tried"] = i + 1
                    result["total_postcodes"] = len(postcodes)
                    region_result = result
                    break
                else:
                    error = result.get("error", "Unknown error") if result else "No result"
                    logger.info(f"✗ Failed: {error}")

            # If no postcode succeeded, use the last result with additional info
            if not region_result:
                if result:
                    result["postcodes_tried"] = len(postcodes)
                    result["total_postcodes"] = len(postcodes)
                    result["error"] = f"All {len(postcodes)} postcodes failed - {result.get('error', 'Unknown error')}"
                    region_result = result
                else:
                    region_result = {
                        "postcode": postcodes[0] if postcodes else "Unknown",
                        "region": region,
                        "scraped_at": datetime.now(timezone.utc).isoformat(),
                        "success": False,
                        "tariffs": [],
                        "error": f"All {len(postcodes)} postcodes failed",
                        "postcodes_tried": len(postcodes),
                        "total_postcodes": len(postcodes)
                    }
                logger.warning(f"✗ No working postcode found for {region}")

            results["regions"].append(region_result)

        # Summary
        successful = sum(1 for r in results["regions"] if r.get("success"))
        logger.info(f"\n=== Summary: {successful}/{len(results['regions'])} regions successful ===")

        # Save results
        self._save_results(results, self.config["output"]["tariffs_file"])

        return results

    def run_health_check(self) -> dict:
        """
        Run a health check against a subset of test regions.
        Tries first postcode from each of the first 3 regions.
        Returns pass/fail status for monitoring.
        """
        logger.info("Running health check...")

        # Use first 3 regions for quick health check
        test_regions = self.config["test_addresses"][:3]

        results = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "status": "PASS",
            "checks": [],
            "summary": {
                "total": len(test_regions),
                "passed": 0,
                "failed": 0
            }
        }

        for region_config in test_regions:
            region = region_config["region"]
            # Get first postcode from the list
            postcodes = region_config.get("postcodes", [])
            if not postcodes and "postcode" in region_config:
                postcodes = [region_config["postcode"]]
            postcode = postcodes[0] if postcodes else "Unknown"

            check_result = {
                "region": region,
                "postcode": postcode,
                "status": "FAIL",
                "error": None
            }

            result = self.scrape_postcode(postcode=postcode, region=region)

            if result and result.get("success"):
                check_result["status"] = "PASS"
                check_result["postcode"] = result.get("postcode", postcode)
                results["summary"]["passed"] += 1
            else:
                check_result["status"] = "FAIL"
                check_result["error"] = result.get("error") if result else "No result"
                results["summary"]["failed"] += 1

            results["checks"].append(check_result)

        # Overall status - pass if at least one region succeeded
        if results["summary"]["passed"] == 0:
            results["status"] = "FAIL"
        elif results["summary"]["failed"] > 0:
            results["status"] = "PARTIAL"
        else:
            results["status"] = "PASS"

        # Save health check results
        self._save_results(results, self.config["output"]["health_check_file"])

        return results


def main():
    """Main entry point for the scraper."""
    parser = argparse.ArgumentParser(
        description="Scrape British Gas tariff information"
    )
    parser.add_argument(
        "--postcode",
        type=str,
        help="Scrape a single postcode (e.g., SW1A1AA)"
    )
    parser.add_argument(
        "--health-check",
        action="store_true",
        help="Run health check only"
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Scrape all configured regions"
    )
    parser.add_argument(
        "--visible",
        action="store_true",
        help="Show browser window (for debugging)"
    )
    parser.add_argument(
        "--debug",
        action="store_true",
        help="Enable debug mode (saves screenshots for troubleshooting)"
    )

    args = parser.parse_args()

    if not PLAYWRIGHT_AVAILABLE:
        print("\nPlaywright is not installed. To install:")
        print("  pip install playwright")
        print("  playwright install chromium")
        sys.exit(1)

    # Create scraper (visible mode shows browser window for debugging)
    headless = not args.visible
    scraper = BritishGasScraper(headless=headless, debug=args.debug)

    if args.debug:
        print("Debug mode enabled - screenshots will be saved")
        scraper._clear_debug_screenshots()

    if args.health_check:
        results = scraper.run_health_check()
        print(f"\nHealth Check: {results['status']}")
        print(f"Passed: {results['summary']['passed']}/{results['summary']['total']}")
        sys.exit(0 if results['status'] == 'PASS' else 1)

    elif args.postcode:
        result = scraper.scrape_postcode(args.postcode)
        print(json.dumps(result, indent=2))

    elif args.all:
        results = scraper.scrape_all_regions()
        print(f"\nScraped {len(results['regions'])} regions")
        successful = sum(1 for r in results['regions'] if r.get('success'))
        print(f"Successful: {successful}/{len(results['regions'])}")

    else:
        # Default: run a single test
        print("Running single postcode test (HP20 1HP)...")
        result = scraper.scrape_postcode("HP20 1HP", "South East")
        print(json.dumps(result, indent=2))


if __name__ == "__main__":
    main()
