Add university scraper system with backend, frontend, and configs
- Add src/university_scraper module with scraper, analyzer, and CLI - Add backend FastAPI service with API endpoints and database models - Add frontend React app with university management pages - Add configs for Harvard, Manchester, and UCL universities - Add artifacts with various scraper implementations - Add Docker compose configuration for deployment - Update .gitignore to exclude generated files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
33
backend/app/schemas/__init__.py
Normal file
33
backend/app/schemas/__init__.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""Pydantic schemas for API"""
|
||||
|
||||
from .university import (
|
||||
UniversityCreate,
|
||||
UniversityUpdate,
|
||||
UniversityResponse,
|
||||
UniversityListResponse
|
||||
)
|
||||
from .script import (
|
||||
ScriptCreate,
|
||||
ScriptResponse,
|
||||
GenerateScriptRequest,
|
||||
GenerateScriptResponse
|
||||
)
|
||||
from .job import (
|
||||
JobCreate,
|
||||
JobResponse,
|
||||
JobStatusResponse,
|
||||
LogResponse
|
||||
)
|
||||
from .result import (
|
||||
ResultResponse,
|
||||
SchoolData,
|
||||
ProgramData,
|
||||
FacultyData
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"UniversityCreate", "UniversityUpdate", "UniversityResponse", "UniversityListResponse",
|
||||
"ScriptCreate", "ScriptResponse", "GenerateScriptRequest", "GenerateScriptResponse",
|
||||
"JobCreate", "JobResponse", "JobStatusResponse", "LogResponse",
|
||||
"ResultResponse", "SchoolData", "ProgramData", "FacultyData"
|
||||
]
|
||||
52
backend/app/schemas/job.py
Normal file
52
backend/app/schemas/job.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""爬取任务相关的Pydantic模型"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class JobCreate(BaseModel):
|
||||
"""创建任务请求"""
|
||||
university_id: int
|
||||
script_id: Optional[int] = None
|
||||
|
||||
|
||||
class JobResponse(BaseModel):
|
||||
"""任务响应"""
|
||||
id: int
|
||||
university_id: int
|
||||
script_id: Optional[int] = None
|
||||
status: str
|
||||
progress: int
|
||||
current_step: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
error_message: Optional[str] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class JobStatusResponse(BaseModel):
|
||||
"""任务状态响应"""
|
||||
id: int
|
||||
status: str
|
||||
progress: int
|
||||
current_step: Optional[str] = None
|
||||
logs: List["LogResponse"] = []
|
||||
|
||||
|
||||
class LogResponse(BaseModel):
|
||||
"""日志响应"""
|
||||
id: int
|
||||
level: str
|
||||
message: str
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# 解决循环引用
|
||||
JobStatusResponse.model_rebuild()
|
||||
67
backend/app/schemas/result.py
Normal file
67
backend/app/schemas/result.py
Normal file
@ -0,0 +1,67 @@
|
||||
"""爬取结果相关的Pydantic模型"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class FacultyData(BaseModel):
|
||||
"""导师数据"""
|
||||
name: str
|
||||
url: str
|
||||
title: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
department: Optional[str] = None
|
||||
|
||||
|
||||
class ProgramData(BaseModel):
|
||||
"""项目数据"""
|
||||
name: str
|
||||
url: str
|
||||
degree_type: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
faculty_page_url: Optional[str] = None
|
||||
faculty_count: int = 0
|
||||
faculty: List[FacultyData] = []
|
||||
|
||||
|
||||
class SchoolData(BaseModel):
|
||||
"""学院数据"""
|
||||
name: str
|
||||
url: str
|
||||
description: Optional[str] = None
|
||||
program_count: int = 0
|
||||
programs: List[ProgramData] = []
|
||||
|
||||
|
||||
class ResultResponse(BaseModel):
|
||||
"""完整结果响应"""
|
||||
id: int
|
||||
university_id: int
|
||||
job_id: Optional[int] = None
|
||||
|
||||
# 统计
|
||||
schools_count: int
|
||||
programs_count: int
|
||||
faculty_count: int
|
||||
|
||||
# 完整数据
|
||||
result_data: Dict[str, Any]
|
||||
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ResultSummary(BaseModel):
|
||||
"""结果摘要"""
|
||||
id: int
|
||||
university_id: int
|
||||
schools_count: int
|
||||
programs_count: int
|
||||
faculty_count: int
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
46
backend/app/schemas/script.py
Normal file
46
backend/app/schemas/script.py
Normal file
@ -0,0 +1,46 @@
|
||||
"""爬虫脚本相关的Pydantic模型"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ScriptBase(BaseModel):
|
||||
"""脚本基础字段"""
|
||||
script_name: str
|
||||
script_content: str
|
||||
config_content: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class ScriptCreate(ScriptBase):
|
||||
"""创建脚本请求"""
|
||||
university_id: int
|
||||
|
||||
|
||||
class ScriptResponse(ScriptBase):
|
||||
"""脚本响应"""
|
||||
id: int
|
||||
university_id: int
|
||||
version: int
|
||||
status: str
|
||||
error_message: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class GenerateScriptRequest(BaseModel):
|
||||
"""生成脚本请求"""
|
||||
university_url: str
|
||||
university_name: Optional[str] = None
|
||||
|
||||
|
||||
class GenerateScriptResponse(BaseModel):
|
||||
"""生成脚本响应"""
|
||||
success: bool
|
||||
university_id: int
|
||||
script_id: Optional[int] = None
|
||||
message: str
|
||||
status: str # analyzing, completed, failed
|
||||
48
backend/app/schemas/university.py
Normal file
48
backend/app/schemas/university.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""大学相关的Pydantic模型"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
|
||||
class UniversityBase(BaseModel):
|
||||
"""大学基础字段"""
|
||||
name: str
|
||||
url: str
|
||||
country: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class UniversityCreate(UniversityBase):
|
||||
"""创建大学请求"""
|
||||
pass
|
||||
|
||||
|
||||
class UniversityUpdate(BaseModel):
|
||||
"""更新大学请求"""
|
||||
name: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class UniversityResponse(UniversityBase):
|
||||
"""大学响应"""
|
||||
id: int
|
||||
status: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
# 统计信息
|
||||
scripts_count: int = 0
|
||||
jobs_count: int = 0
|
||||
latest_result: Optional[dict] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class UniversityListResponse(BaseModel):
|
||||
"""大学列表响应"""
|
||||
total: int
|
||||
items: List[UniversityResponse]
|
||||
Reference in New Issue
Block a user