import requests from bs4 import BeautifulSoup import csv from datetime import datetime import time def scrape_cyberparks(): """Scrape company information from CyberParks website""" url = "https://cyberparks.in/companies-at-park/" print(f"Fetching data from {url}...") try: # Send GET request headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # Parse HTML soup = BeautifulSoup(response.content, 'html.parser') # Find all company entries companies = [] # Look for company information (adjust selectors based on actual HTML structure) company_sections = soup.find_all(['div', 'article', 'section'], class_=lambda x: x and ('company' in x.lower() or 'list' in x.lower())) if not company_sections: # Try alternative approach - find all text blocks company_sections = soup.find_all(['div', 'p', 'li']) print(f"Found {len(company_sections)} potential company entries...") for section in company_sections: text = section.get_text(strip=True) # Skip empty or very short entries if len(text) < 10: continue # Extract links links = section.find_all('a', href=True) company_url = links[0]['href'] if links else '' # Basic extraction logic (you may need to adjust based on actual structure) company_data = { 'Company Name': '', 'URL': company_url, 'CEO/Chairman': '' } # Try to extract company name (usually in bold, heading, or first line) name_tag = section.find(['strong', 'b', 'h1', 'h2', 'h3', 'h4']) if name_tag: company_data['Company Name'] = name_tag.get_text(strip=True) else: # Take first line as company name lines = text.split('\n') company_data['Company Name'] = lines[0] if lines else text[:50] # Look for CEO/Chairman keywords if any(keyword in text.lower() for keyword in ['ceo', 'chairman', 'director', 'founder']): for line in text.split('\n'): if any(keyword in line.lower() for keyword in ['ceo', 'chairman', 'director', 'founder']): company_data['CEO/Chairman'] = line.strip() break if company_data['Company Name']: companies.append(company_data) # Create CSV file timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"cyberparks_companies_{timestamp}.csv" with open(filename, 'w', newline='', encoding='utf-8') as f: if companies: fieldnames = ['Company Name', 'URL', 'CEO/Chairman'] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(companies) print(f"\n✓ Successfully scraped {len(companies)} companies!") print(f"✓ Data saved to: {filename}") else: print("\n✗ No companies found. The website structure might have changed.") print("Please check the website manually and adjust the scraping logic.") return filename except requests.exceptions.RequestException as e: print(f"\n✗ Error fetching the website: {e}") return None except Exception as e: print(f"\n✗ Error during scraping: {e}") return None if __name__ == "__main__": print("=" * 60) print("CyberParks Company Information Scraper") print("=" * 60) scrape_cyberparks() print("\nScraping completed!")