import os
import requests
import json
import time
import logging
import sys
from typing import Dict, Optional
from config import APIFY_API_TOKEN, is_facebook_scraping_enabled, validate_config

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class FacebookPageScraper:
    def __init__(self):
        self.api_token = APIFY_API_TOKEN
        self.actor_id = 'apify~facebook-pages-scraper'
        self.base_url = 'https://api.apify.com/v2'
        
        # Validate configuration
        is_valid, message = validate_config()
        if not is_valid:
            logger.warning(message)
            
    def scrape_facebook_page(self, facebook_url: str) -> Optional[Dict]:
        """
        Scrapes a Facebook page using Apify's Facebook Pages Scraper
        """
        if not is_facebook_scraping_enabled():
            logger.error("Facebook scraping is disabled - APIFY_API_TOKEN not set")
            return None
            
        try:
            # Prepare the input for the actor
            actor_input = {
                "startUrls": [{"url": facebook_url}],
                "maxPosts": 5,
                "maxReviews": 10,
                "maxReviewDate": 6,  # months
                "scrapeAbout": True,
                "scrapeReviews": True,
                "scrapePosts": True,
                "scrapeServices": True,
                "language": "en-US",
                "proxy": {
                    "useApifyProxy": True
                }
            }
            
            # Start the actor run
            run_url = f"{self.base_url}/acts/{self.actor_id}/runs"
            headers = {
                "Authorization": f"Bearer {self.api_token}",
                "Content-Type": "application/json"
            }
            
            logger.info(f"🔄 Starting Apify Facebook page scrape for: {facebook_url}")
            print(f"🔄 Starting Apify Facebook page scrape for: {facebook_url}")
            
            # Start the run
            response = requests.post(
                run_url,
                headers=headers,
                json=actor_input,
                timeout=30
            )
            
            if response.status_code != 201:
                logger.error(f"❌ Error starting Apify actor: {response.status_code} - {response.text}")
                print(f"❌ Error starting Apify actor: {response.status_code} - {response.text}")
                return None
                
            run_id = response.json()['data']['id']
            logger.info(f"📘 Apify actor run started with ID: {run_id}")
            print(f"📘 Apify actor run started with ID: {run_id}")
            
            # Wait for the run to complete
            return self._wait_for_run_completion(run_id)
            
        except requests.RequestException as e:
            logger.error(f"Network error during Facebook scrape: {e}")
            return None
        except Exception as e:
            logger.error(f"Unexpected error during Facebook scrape: {e}")
            return None
    
    def _wait_for_run_completion(self, run_id: str, timeout: int = 300) -> Optional[Dict]:
        """
        Waits for the actor run to complete and returns the results
        """
        run_status_url = f"{self.base_url}/actor-runs/{run_id}"
        
        headers = {
            "Authorization": f"Bearer {self.api_token}"
        }
        
        start_time = time.time()
        
        while time.time() - start_time < timeout:
            try:
                # Check run status
                response = requests.get(run_status_url, headers=headers, timeout=10)
                
                if response.status_code != 200:
                    logger.error(f"Error checking run status: {response.status_code} - {response.text}")
                    return None
                    
                run_data = response.json()['data']
                status = run_data['status']
                
                logger.info(f"⏳ Apify run status: {status}")
                print(f"⏳ Apify run status: {status}")
                
                if status == 'SUCCEEDED':
                    # Get the results from the default dataset
                    dataset_id = run_data['defaultDatasetId']
                    dataset_url = f"{self.base_url}/datasets/{dataset_id}/items"
                    
                    response = requests.get(dataset_url, headers=headers, timeout=30)
                    
                    if response.status_code == 200:
                        items = response.json()
                        if items:
                            logger.info("✅ Successfully retrieved Facebook page data from Apify")
                            print("✅ Successfully retrieved Facebook page data from Apify")
                            return items[0]  # Return first result
                        else:
                            logger.warning("⚠️ No results found in Apify dataset")
                            print("⚠️ No results found in Apify dataset")
                            return None
                    else:
                        logger.error(f"❌ Error fetching results from Apify: {response.status_code} - {response.text}")
                        print(f"❌ Error fetching results from Apify: {response.status_code} - {response.text}")
                        return None
                        
                elif status in ['FAILED', 'ABORTED', 'TIMED-OUT']:
                    logger.error(f"❌ Apify run failed with status: {status}")
                    print(f"❌ Apify run failed with status: {status}")
                    return None
                    
                # Wait before checking again
                time.sleep(5)
                
            except requests.RequestException as e:
                logger.error(f"Network error while waiting for completion: {e}")
                return None
                
        logger.error("Timeout waiting for run completion")
        return None
    
    def format_for_text_orchestrator(self, facebook_data: Dict) -> str:
        """
        Formats Facebook page data into a text description for the orchestrator
        """
        if not facebook_data:
            return None
            
        # Extract key information
        name = facebook_data.get('name', 'Business')
        categories = facebook_data.get('categories', [])
        if isinstance(categories, list):
            categories = ', '.join(categories)
        about = facebook_data.get('about', '')
        address = facebook_data.get('address', {})
        
        # Build location string
        location_parts = []
        if isinstance(address, dict):
            if address.get('street'):
                location_parts.append(address['street'])
            if address.get('city'):
                location_parts.append(address['city'])
            if address.get('state'):
                location_parts.append(address['state'])
        location = ', '.join(location_parts)
        
        # Extract business details
        phone = facebook_data.get('phone', '')
        email = facebook_data.get('email', '')
        website = facebook_data.get('website', '')
        price_range = facebook_data.get('priceRange', '')
        
        # Extract hours
        hours = facebook_data.get('hours', {})
        hours_text = []
        if isinstance(hours, dict):
            for day, time in hours.items():
                hours_text.append(f"{day}: {time}")
        hours_string = ', '.join(hours_text[:3]) + '...' if len(hours_text) > 3 else ', '.join(hours_text)
        
        # Extract social proof
        likes = facebook_data.get('likes', 0)
        followers = facebook_data.get('followers', 0)
        
        # Build the text description
        text_parts = [
            f"Create a modern website for {name}",
            f"Business Type: {categories}" if categories else "",
            f"About: {about}" if about else "",
            f"Location: {location}" if location else "",
            f"Phone: {phone}" if phone else "",
            f"Email: {email}" if email else "",
            f"Current Website: {website}" if website else "No existing website",
            f"Price Range: {price_range}" if price_range else "",
            f"Hours: {hours_string}" if hours_text else "",
            f"Social Proof: {likes:,} likes, {followers:,} followers" if likes or followers else "",
        ]
        
        # Filter out empty parts and join
        text_description = "\n".join([part for part in text_parts if part])
        
        # Add extracted posts or reviews for context if available
        if 'reviews' in facebook_data and facebook_data['reviews']:
            text_description += "\n\nRecent Customer Reviews:"
            for review in facebook_data['reviews'][:3]:
                if isinstance(review, dict) and review.get('text'):
                    text_description += f"\n- \"{review['text'][:100]}...\""
                    
        return text_description

def main():
    """Command line interface for testing"""
    if len(sys.argv) < 2:
        print("Usage: python facebook_scraper.py <facebook_url>")
        sys.exit(1)
    
    facebook_url = sys.argv[1]
    
    scraper = FacebookPageScraper()
    result = scraper.scrape_facebook_page(facebook_url)
    
    if result:
        print("🔄 Formatting Apify data for website generation...")
        text_description = scraper.format_for_text_orchestrator(result)
        print("\nFormatted for orchestrator:")
        print(text_description)
        
        # Also output raw JSON for debugging
        print("\nRaw Facebook data:")
        print(json.dumps(result, indent=2))
    else:
        print("Failed to scrape Facebook page")
        sys.exit(1)

if __name__ == "__main__":
    main() 