university: name: "REPLACE_UNIVERSITY_NAME" url: "https://www.example.ac.uk/" country: "United Kingdom" schools: discovery_method: "static_list" request: timeout_ms: 45000 max_retries: 3 retry_backoff_ms: 3000 static_list: # 基于 Research Explorer (Pure Portal) 的学院示例 - name: "School of Engineering" url: "https://research.example.ac.uk/en/organisations/school-of-engineering/persons/" keywords: - "engineering" - "mechanical" - "civil" - "materials" faculty_pages: - url: "https://research.example.ac.uk/en/organisations/school-of-engineering/persons/" extract_method: "research_explorer" requires_scroll: true request: timeout_ms: 120000 wait_until: "networkidle" post_wait_ms: 5000 research_explorer: org_slug: "school-of-engineering" page_size: 400 - name: "Faculty of Humanities" url: "https://research.example.ac.uk/en/organisations/faculty-of-humanities/persons/" keywords: - "arts" - "languages" - "history" - "philosophy" faculty_pages: - url: "https://research.example.ac.uk/en/organisations/faculty-of-humanities/persons/" extract_method: "research_explorer" requires_scroll: true request: timeout_ms: 120000 wait_until: "networkidle" post_wait_ms: 4500 research_explorer: org_slug: "faculty-of-humanities" page_size: 300 programs: paths_to_try: - "/study/masters/courses/list/" - "/study/postgraduate/courses/list/" link_patterns: - text_contains: ["masters", "postgraduate", "graduate"] href_contains: ["/courses/", "/study/", "/programmes/"] selectors: program_item: "li.course-item, article.course-card, a.course-link" program_name: ".course-title, h3, .title" program_url: "a[href]" degree_type: ".course-award, .badge" request: timeout_ms: 40000 wait_until: "domcontentloaded" post_wait_ms: 2500 faculty: discovery_strategies: - type: "link_in_page" patterns: - text_contains: ["faculty", "people", "staff", "directory"] href_contains: ["/faculty", "/people", "/staff"] request: timeout_ms: 30000 wait_until: "domcontentloaded" post_wait_ms: 1500 - type: "url_pattern" patterns: - "{program_url}/people" - "{program_url}/faculty" - "{school_url}/people" - "{school_url}/staff" request: timeout_ms: 30000 wait_until: "domcontentloaded" post_wait_ms: 1500 - type: "school_directory" assign_to_all: false match_by_school_keywords: true metadata_keyword_field: "keywords" request: timeout_ms: 120000 wait_for_selector: "a.link.person" post_wait_ms: 4000 filters: program_degree_types: include: ["MSc", "MA", "MBA", "MEng", "LLM", "MRes"] exclude: ["PhD", "Bachelor", "BSc", "BA"] exclude_schools: []