university: name: "University College London" url: "https://www.ucl.ac.uk/" country: "United Kingdom" schools: discovery_method: "static_list" request: timeout_ms: 45000 max_retries: 3 retry_backoff_ms: 3000 static_list: - name: "Faculty of Engineering Sciences" url: "https://www.ucl.ac.uk/engineering/people" keywords: - "engineering" - "mechanical" - "civil" - "materials" - "electronic" - "computer" faculty_pages: - url: "https://www.ucl.ac.uk/engineering/people" extract_method: "links" requires_scroll: true scroll_times: 8 scroll_delay_ms: 600 blocked_resources: ["image", "font", "media"] - url: "https://www.ucl.ac.uk/electronic-electrical-engineering/people/academic-staff" extract_method: "table" request: timeout_ms: 45000 wait_until: "domcontentloaded" post_wait_ms: 2000 - name: "Faculty of Mathematical & Physical Sciences" url: "https://www.ucl.ac.uk/mathematical-physical-sciences/people" keywords: - "mathematics" - "physics" - "chemistry" - "earth sciences" - "astronomy" faculty_pages: - url: "https://www.ucl.ac.uk/mathematical-physical-sciences/people" extract_method: "links" requires_scroll: true scroll_times: 6 scroll_delay_ms: 600 - url: "https://www.ucl.ac.uk/physics-astronomy/people/academic-staff" extract_method: "links" - name: "Faculty of Arts & Humanities" url: "https://www.ucl.ac.uk/arts-humanities/people/academic-staff" keywords: - "arts" - "languages" - "culture" - "history" - "philosophy" - "translation" faculty_pages: - url: "https://www.ucl.ac.uk/arts-humanities/people/academic-staff" extract_method: "links" requires_scroll: true scroll_times: 6 scroll_delay_ms: 600 - name: "Faculty of Laws" url: "https://www.ucl.ac.uk/laws/people/academic-staff" keywords: - "law" - "legal" - "llm" faculty_pages: - url: "https://www.ucl.ac.uk/laws/people/academic-staff" extract_method: "links" requires_scroll: true scroll_times: 5 scroll_delay_ms: 600 - name: "Faculty of Social & Historical Sciences" url: "https://www.ucl.ac.uk/social-historical-sciences/people" keywords: - "social" - "economics" - "geography" - "anthropology" - "politics" - "history" faculty_pages: - url: "https://www.ucl.ac.uk/social-historical-sciences/people" extract_method: "links" requires_scroll: true scroll_times: 6 scroll_delay_ms: 600 - name: "Faculty of Brain Sciences" url: "https://www.ucl.ac.uk/brain-sciences/people" keywords: - "neuroscience" - "psychology" - "cognitive" - "biomedical" faculty_pages: - url: "https://www.ucl.ac.uk/brain-sciences/people" extract_method: "links" requires_scroll: true scroll_times: 6 scroll_delay_ms: 600 - name: "Faculty of the Built Environment (The Bartlett)" url: "https://www.ucl.ac.uk/bartlett/people/all" keywords: - "architecture" - "planning" - "urban" - "built environment" faculty_pages: - url: "https://www.ucl.ac.uk/bartlett/people/all" extract_method: "links" requires_scroll: true scroll_times: 10 scroll_delay_ms: 600 programs: paths_to_try: - "/prospective-students/graduate/taught-degrees/" link_patterns: - text_contains: ["graduate", "taught", "masters", "postgraduate"] href_contains: ["/prospective-students/graduate", "/study/graduate", "/courses/"] selectors: program_item: ".view-content .view-row, li.listing__item, article.prog-card" program_name: ".listing__title, h3, .title" program_url: "a[href]" degree_type: ".listing__award, .award" request: timeout_ms: 40000 wait_until: "domcontentloaded" post_wait_ms: 2500 faculty: discovery_strategies: - type: "link_in_page" patterns: - text_contains: ["people", "faculty", "staff", "team"] href_contains: ["/people", "/faculty", "/staff", "/team"] request: timeout_ms: 30000 wait_until: "domcontentloaded" post_wait_ms: 1500 - type: "url_pattern" patterns: - "{program_url}/people" - "{program_url}/staff" - "{school_url}/people" - "{school_url}/staff" request: timeout_ms: 30000 wait_until: "domcontentloaded" post_wait_ms: 1500 - type: "school_directory" assign_to_all: false match_by_school_keywords: true metadata_keyword_field: "keywords" request: timeout_ms: 60000 wait_for_selector: "a[href*='/people/'], .person, .profile-card" post_wait_ms: 2500 filters: program_degree_types: include: ["MSc", "MSci", "MA", "MBA", "MEng", "LLM", "MRes"] exclude: ["PhD", "Bachelor", "BSc", "BA", "PGCert"] exclude_schools: []