You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

187 lines
4.6 KiB

#!/usr/bin/env node
/**
* Playwright 浏览器工具集
* 提供便捷的网页访问、截图、内容提取等功能
*/
import { chromium } from 'playwright';
import fs from 'fs';
import path from 'path';
/**
* 访问网页并获取页面内容
* @param {string} url - 要访问的URL
* @param {Object} options - 配置选项
* @returns {Object} 页面内容和截图
*/
export async function visitPage(url, options = {}) {
const {
headless = true,
waitUntil = 'domcontentloaded',
timeout = 30000,
screenshot = true,
screenshotPath = null,
fullPage = false
} = options;
const browser = await chromium.launch({ headless });
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
});
const page = await context.newPage();
try {
console.log(`🌐 正在访问: ${url}`);
await page.goto(url, { waitUntil, timeout });
// 获取页面标题
const title = await page.title();
console.log(`📄 页面标题: ${title}`);
// 获取页面内容
const content = await page.content();
// 获取页面文本(简化版)
const text = await page.evaluate(() => {
return document.body.innerText;
});
let screenshotBuffer = null;
if (screenshot) {
screenshotBuffer = await page.screenshot({ fullPage });
if (screenshotPath) {
fs.writeFileSync(screenshotPath, screenshotBuffer);
console.log(`📸 截图已保存: ${screenshotPath}`);
}
}
return {
success: true,
url,
title,
content,
text,
screenshot: screenshotBuffer
};
} catch (error) {
console.error(`❌ 访问失败: ${error.message}`);
return {
success: false,
url,
error: error.message
};
} finally {
await browser.close();
}
}
/**
* 截取网页截图
* @param {string} url - 网页URL
* @param {string} outputPath - 输出路径
* @param {Object} options - 配置选项
*/
export async function screenshot(url, outputPath, options = {}) {
const result = await visitPage(url, {
...options,
screenshot: true,
screenshotPath: outputPath
});
return result;
}
/**
* 提取网页文本内容
* @param {string} url - 网页URL
* @param {Object} options - 配置选项
*/
export async function extractText(url, options = {}) {
const result = await visitPage(url, {
...options,
screenshot: false
});
return result;
}
/**
* 在页面上执行自定义操作
* @param {string} url - 网页URL
* @param {Function} action - 自定义操作函数
* @param {Object} options - 配置选项
*/
export async function customAction(url, action, options = {}) {
const {
headless = true,
waitUntil = 'domcontentloaded',
timeout = 30000
} = options;
const browser = await chromium.launch({ headless });
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
});
const page = await context.newPage();
try {
console.log(`🌐 正在访问: ${url}`);
await page.goto(url, { waitUntil, timeout });
// 执行自定义操作
const result = await action(page);
return {
success: true,
url,
result
};
} catch (error) {
console.error(`❌ 操作失败: ${error.message}`);
return {
success: false,
url,
error: error.message
};
} finally {
await browser.close();
}
}
// 如果直接运行此脚本
if (import.meta.url === `file://${process.argv[1]}`) {
// 示例:访问一个网页
const url = process.argv[2] || 'https://www.baidu.com';
const outputDir = path.join(process.cwd(), 'screenshots');
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const screenshotPath = path.join(outputDir, `screenshot-${timestamp}.png`);
visitPage(url, { screenshotPath })
.then(result => {
if (result.success) {
console.log('\n✅ 访问成功!');
console.log(`标题: ${result.title}`);
console.log(`文本长度: ${result.text.length} 字符`);
} else {
console.log('\n❌ 访问失败');
console.log(`错误: ${result.error}`);
}
process.exit(result.success ? 0 : 1);
})
.catch(error => {
console.error('Error:', error);
process.exit(1);
});
}
export default {
visitPage,
screenshot,
extractText,
customAction
};