technopark-scraper/scrape.js

47 lines
1.4 KiB
JavaScript

import { chromium } from 'playwright';
import fs from 'fs';
(async () => {
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
await page.goto('https://technopark.in/company-list', { waitUntil: 'networkidle' });
// Scroll until all companies are loadedloaded
let previousHeight = 0;
while (true) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) break;
previousHeight = currentHeight;
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(2000);
}
// Extract company detail URLs
const companies = await page.$$eval("a[href*='company-details']", links =>
links.map(link => ({
name: link.innerText.trim(),
detailUrl: link.href
}))
);
// Visit each company page to get official website
for (let company of companies) {
try {
await page.goto(company.detailUrl, { waitUntil: 'networkidle' });
const website = await page.$eval('a[href^="http"]:not([href*="company-details"])', el => el.href).catch(() => '');
company.website = website;
} catch {
company.website = '';
}
}
// Save to CSV
const csvContent = [
'Company Name,Company Website',
...companies.map(c => `"${c.name.replace(/"/g, '""')}","${c.website}"`)
].join('\n');
fs.writeFileSync('companies.csv', csvContent, 'utf8');
})();