You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.7 KiB
60 lines
1.7 KiB
#!/usr/bin/env node
|
|
import { chromium } from 'playwright';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
|
|
async function summarizeWebPage(url) {
|
|
console.log(`正在访问: ${url}`);
|
|
|
|
const browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.goto(url, {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 30000
|
|
});
|
|
|
|
// 提取页面主要内容
|
|
const content = await page.evaluate(() => {
|
|
// 移除无关元素
|
|
document.querySelectorAll('script, style, nav, footer, aside, .advertisement, .ad, .popup').forEach(el => el.remove());
|
|
|
|
// 提取文本
|
|
const text = document.body.innerText
|
|
.replace(/\s+/g, ' ')
|
|
.replace(/\n+/g, '\n')
|
|
.trim();
|
|
|
|
// 提取标题
|
|
const title = document.title;
|
|
|
|
return { title, text };
|
|
});
|
|
|
|
await browser.close();
|
|
|
|
// 简单总结(这里可以换成实际的LLM调用,暂时返回前500字摘要)
|
|
const summary = content.text.slice(0, 500) + (content.text.length > 500 ? '...' : '');
|
|
|
|
console.log(`\n📄 页面标题: ${content.title}`);
|
|
console.log(`\n📝 内容摘要:\n${summary}`);
|
|
console.log(`\n📊 原文长度: ${content.text.length} 字符`);
|
|
|
|
// 清理临时文件
|
|
const tmpItems = fs.readdirSync('/tmp').filter(f => f.startsWith('playwright_'));
|
|
tmpItems.forEach(item => {
|
|
const fullPath = path.join('/tmp', item);
|
|
const stat = fs.statSync(fullPath);
|
|
if (stat.isDirectory()) {
|
|
fs.rmSync(fullPath, { recursive: true, force: true });
|
|
} else {
|
|
fs.unlinkSync(fullPath);
|
|
}
|
|
});
|
|
}
|
|
|
|
// 测试用例:访问百度首页
|
|
summarizeWebPage('https://www.baidu.com');
|
|
|