You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

60 lines
1.7 KiB

#!/usr/bin/env node
import { chromium } from 'playwright';
import fs from 'fs';
import path from 'path';
async function summarizeWebPage(url) {
console.log(`正在访问: ${url}`);
const browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: 30000
});
// 提取页面主要内容
const content = await page.evaluate(() => {
// 移除无关元素
document.querySelectorAll('script, style, nav, footer, aside, .advertisement, .ad, .popup').forEach(el => el.remove());
// 提取文本
const text = document.body.innerText
.replace(/\s+/g, ' ')
.replace(/\n+/g, '\n')
.trim();
// 提取标题
const title = document.title;
return { title, text };
});
await browser.close();
// 简单总结(这里可以换成实际的LLM调用,暂时返回前500字摘要)
const summary = content.text.slice(0, 500) + (content.text.length > 500 ? '...' : '');
console.log(`\n📄 页面标题: ${content.title}`);
console.log(`\n📝 内容摘要:\n${summary}`);
console.log(`\n📊 原文长度: ${content.text.length} 字符`);
// 清理临时文件
const tmpItems = fs.readdirSync('/tmp').filter(f => f.startsWith('playwright_'));
tmpItems.forEach(item => {
const fullPath = path.join('/tmp', item);
const stat = fs.statSync(fullPath);
if (stat.isDirectory()) {
fs.rmSync(fullPath, { recursive: true, force: true });
} else {
fs.unlinkSync(fullPath);
}
});
}
// 测试用例:访问百度首页
summarizeWebPage('https://www.baidu.com');