University-Playwright-Codeg…/artifacts/debug_cs_faculty.py

#!/usr/bin/env python3
"""
调试Computer Science的Faculty页面
"""

import asyncio
from playwright.async_api import async_playwright


async def debug_cs():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()

        # 访问Computer Science GSAS页面
        gsas_url = "https://gsas.harvard.edu/program/computer-science"
        print(f"访问: {gsas_url}")

        await page.goto(gsas_url, wait_until="domcontentloaded", timeout=30000)
        await page.wait_for_timeout(3000)

        await page.screenshot(path="cs_gsas_page.png", full_page=True)
        print("截图已保存: cs_gsas_page.png")

        # 查找所有链接
        links = await page.evaluate('''() => {
            const links = [];
            document.querySelectorAll('a[href]').forEach(a => {
                const text = a.innerText.trim();
                const href = a.href;
                if (text && text.length > 2 && text.length < 100) {
                    links.push({text: text, href: href});
                }
            });
            return links;
        }''')

        print(f"\n页面上的所有链接 ({len(links)} 个):")
        for link in links:
            print(f"  - {link['text'][:60]} -> {link['href']}")

        # 查找可能的Faculty或People链接
        print("\n\n查找Faculty/People相关链接:")
        for link in links:
            text_lower = link['text'].lower()
            href_lower = link['href'].lower()
            if 'faculty' in text_lower or 'people' in href_lower or 'faculty' in href_lower or 'website' in text_lower:
                print(f"  * {link['text']} -> {link['href']}")

        # 尝试访问SEAS (School of Engineering)
        print("\n\n尝试访问SEAS Computer Science页面...")
        seas_url = "https://seas.harvard.edu/computer-science"
        await page.goto(seas_url, wait_until="domcontentloaded", timeout=30000)
        await page.wait_for_timeout(2000)

        await page.screenshot(path="seas_cs_page.png", full_page=True)
        print("截图已保存: seas_cs_page.png")

        seas_links = await page.evaluate('''() => {
            const links = [];
            document.querySelectorAll('a[href]').forEach(a => {
                const text = a.innerText.trim();
                const href = a.href;
                const lowerText = text.toLowerCase();
                const lowerHref = href.toLowerCase();
                if ((lowerText.includes('faculty') || lowerText.includes('people') ||
                     lowerHref.includes('faculty') || lowerHref.includes('people')) &&
                    text.length > 2) {
                    links.push({text: text, href: href});
                }
            });
            return links;
        }''')

        print(f"\nSEAS页面上的Faculty/People链接:")
        for link in seas_links:
            print(f"  * {link['text']} -> {link['href']}")

        await browser.close()


if __name__ == "__main__":
    asyncio.run(debug_cs())