47 lines
1.4 KiB
JavaScript
47 lines
1.4 KiB
JavaScript
import { chromium } from 'playwright';
|
|
import fs from 'fs';
|
|
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
|
|
await page.goto('https://technopark.in/company-list', { waitUntil: 'networkidle' });
|
|
|
|
// Scroll until all companies are loadedloaded
|
|
let previousHeight = 0;
|
|
while (true) {
|
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
if (currentHeight === previousHeight) break;
|
|
previousHeight = currentHeight;
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await page.waitForTimeout(2000);
|
|
}
|
|
|
|
// Extract company detail URLs
|
|
const companies = await page.$$eval("a[href*='company-details']", links =>
|
|
links.map(link => ({
|
|
name: link.innerText.trim(),
|
|
detailUrl: link.href
|
|
}))
|
|
);
|
|
|
|
// Visit each company page to get official website
|
|
for (let company of companies) {
|
|
try {
|
|
await page.goto(company.detailUrl, { waitUntil: 'networkidle' });
|
|
const website = await page.$eval('a[href^="http"]:not([href*="company-details"])', el => el.href).catch(() => '');
|
|
company.website = website;
|
|
} catch {
|
|
company.website = '';
|
|
}
|
|
}
|
|
|
|
// Save to CSV
|
|
const csvContent = [
|
|
'Company Name,Company Website',
|
|
...companies.map(c => `"${c.name.replace(/"/g, '""')}","${c.website}"`)
|
|
].join('\n');
|
|
|
|
fs.writeFileSync('companies.csv', csvContent, 'utf8');
|
|
})();
|