You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

125 lines
3.6 KiB

#!/usr/bin/env node
import { chromium } from 'playwright';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
async function summarize(url, options = {}) {
const { length = 'medium', output = 'text' } = options;
console.log(`✨ Summarizing: ${url}`);
console.log(`📏 Length: ${length}`);
const browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: 30000
});
// 提取页面内容
const content = await page.evaluate(() => {
// 移除干扰元素
const selectors = 'script, style, nav, footer, aside, .ad, .advertisement, .popup, .cookie-banner';
document.querySelectorAll(selectors).forEach(el => el.remove());
// 提取内容
const title = document.title;
const text = document.body.innerText
.replace(/\s+/g, ' ')
.replace(/\n{3,}/g, '\n\n')
.trim();
// 提取所有链接
const links = Array.from(document.querySelectorAll('a[href]'))
.map(a => ({ text: a.textContent.trim(), href: a.href }))
.filter(link => link.text && link.href.startsWith('http'))
.slice(0, 10);
// 提取图片
const images = Array.from(document.querySelectorAll('img[src]'))
.map(img => ({ alt: img.alt.trim(), src: img.src }))
.filter(img => img.src.startsWith('http'))
.slice(0, 5);
return { title, text, links, images };
});
await browser.close();
// 根据长度决定摘要大小
const lengthMap = {
short: 300,
medium: 800,
long: 2000,
xl: 5000
};
const maxLength = lengthMap[length] || lengthMap.medium;
const summary = content.text.slice(0, maxLength) + (content.text.length > maxLength ? '...' : '');
// 输出结果
const result = {
url,
title: content.title,
summary,
stats: {
totalCharacters: content.text.length,
summaryLength: summary.length,
linksFound: content.links.length,
imagesFound: content.images.length
},
topLinks: content.links,
topImages: content.images
};
if (output === 'json') {
console.log(JSON.stringify(result, null, 2));
} else {
console.log(`\n📄 Title: ${result.title}`);
console.log(`\n📝 Summary:`);
console.log(result.summary);
console.log(`\n📊 Stats:`);
console.log(` - Total characters: ${result.stats.totalCharacters}`);
console.log(` - Summary length: ${result.stats.summaryLength}`);
console.log(` - Links found: ${result.stats.linksFound}`);
console.log(` - Images found: ${result.stats.imagesFound}`);
if (result.topLinks.length > 0) {
console.log(`\n🔗 Top links:`);
result.topLinks.forEach((link, i) => {
console.log(` ${i+1}. ${link.text} - ${link.href}`);
});
}
}
// 清理临时文件
const tmpItems = fs.readdirSync('/tmp').filter(f => f.startsWith('playwright_'));
tmpItems.forEach(item => {
const fullPath = path.join('/tmp', item);
const stat = fs.statSync(fullPath);
if (stat.isDirectory()) {
fs.rmSync(fullPath, { recursive: true, force: true });
} else {
fs.unlinkSync(fullPath);
}
});
return result;
}
// 如果直接运行这个文件,执行演示
if (process.argv[1] === __filename) {
const url = process.argv[2] || 'https://www.baidu.com';
const length = process.argv[3] || 'medium';
summarize(url, { length });
}
export default summarize;