Add university scraper system with backend, frontend, and configs

- Add src/university_scraper module with scraper, analyzer, and CLI
- Add backend FastAPI service with API endpoints and database models
- Add frontend React app with university management pages
- Add configs for Harvard, Manchester, and UCL universities
- Add artifacts with various scraper implementations
- Add Docker compose configuration for deployment
- Update .gitignore to exclude generated files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
yangxiaoyu-crypto
2025-12-22 15:25:08 +08:00
parent 2714c8ad5c
commit 426cf4d2cd
75 changed files with 13527 additions and 2 deletions

View File

@ -0,0 +1,165 @@
"""大学管理API"""
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from ..database import get_db
from ..models import University, ScrapeResult
from ..schemas.university import (
UniversityCreate,
UniversityUpdate,
UniversityResponse,
UniversityListResponse
)
router = APIRouter()
@router.get("", response_model=UniversityListResponse)
def list_universities(
skip: int = Query(0, ge=0),
limit: int = Query(20, ge=1, le=100),
search: Optional[str] = None,
db: Session = Depends(get_db)
):
"""获取大学列表"""
query = db.query(University)
if search:
query = query.filter(University.name.ilike(f"%{search}%"))
total = query.count()
universities = query.order_by(University.created_at.desc()).offset(skip).limit(limit).all()
# 添加统计信息
items = []
for uni in universities:
# 获取最新结果
latest_result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == uni.id
).order_by(ScrapeResult.created_at.desc()).first()
items.append(UniversityResponse(
id=uni.id,
name=uni.name,
url=uni.url,
country=uni.country,
description=uni.description,
status=uni.status,
created_at=uni.created_at,
updated_at=uni.updated_at,
scripts_count=len(uni.scripts),
jobs_count=len(uni.jobs),
latest_result={
"schools_count": latest_result.schools_count,
"programs_count": latest_result.programs_count,
"faculty_count": latest_result.faculty_count,
"created_at": latest_result.created_at.isoformat()
} if latest_result else None
))
return UniversityListResponse(total=total, items=items)
@router.post("", response_model=UniversityResponse)
def create_university(
data: UniversityCreate,
db: Session = Depends(get_db)
):
"""创建大学"""
# 检查是否已存在
existing = db.query(University).filter(University.url == data.url).first()
if existing:
raise HTTPException(status_code=400, detail="该大学URL已存在")
university = University(**data.model_dump())
db.add(university)
db.commit()
db.refresh(university)
return UniversityResponse(
id=university.id,
name=university.name,
url=university.url,
country=university.country,
description=university.description,
status=university.status,
created_at=university.created_at,
updated_at=university.updated_at,
scripts_count=0,
jobs_count=0,
latest_result=None
)
@router.get("/{university_id}", response_model=UniversityResponse)
def get_university(
university_id: int,
db: Session = Depends(get_db)
):
"""获取大学详情"""
university = db.query(University).filter(University.id == university_id).first()
if not university:
raise HTTPException(status_code=404, detail="大学不存在")
# 获取最新结果
latest_result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university.id
).order_by(ScrapeResult.created_at.desc()).first()
return UniversityResponse(
id=university.id,
name=university.name,
url=university.url,
country=university.country,
description=university.description,
status=university.status,
created_at=university.created_at,
updated_at=university.updated_at,
scripts_count=len(university.scripts),
jobs_count=len(university.jobs),
latest_result={
"schools_count": latest_result.schools_count,
"programs_count": latest_result.programs_count,
"faculty_count": latest_result.faculty_count,
"created_at": latest_result.created_at.isoformat()
} if latest_result else None
)
@router.put("/{university_id}", response_model=UniversityResponse)
def update_university(
university_id: int,
data: UniversityUpdate,
db: Session = Depends(get_db)
):
"""更新大学信息"""
university = db.query(University).filter(University.id == university_id).first()
if not university:
raise HTTPException(status_code=404, detail="大学不存在")
update_data = data.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(university, field, value)
db.commit()
db.refresh(university)
return get_university(university_id, db)
@router.delete("/{university_id}")
def delete_university(
university_id: int,
db: Session = Depends(get_db)
):
"""删除大学"""
university = db.query(University).filter(University.id == university_id).first()
if not university:
raise HTTPException(status_code=404, detail="大学不存在")
db.delete(university)
db.commit()
return {"message": "删除成功"}