Add university scraper system with backend, frontend, and configs

- Add src/university_scraper module with scraper, analyzer, and CLI
- Add backend FastAPI service with API endpoints and database models
- Add frontend React app with university management pages
- Add configs for Harvard, Manchester, and UCL universities
- Add artifacts with various scraper implementations
- Add Docker compose configuration for deployment
- Update .gitignore to exclude generated files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
yangxiaoyu-crypto
2025-12-22 15:25:08 +08:00
parent 2714c8ad5c
commit 426cf4d2cd
75 changed files with 13527 additions and 2 deletions

View File

@ -0,0 +1,34 @@
"""爬取结果模型"""
from datetime import datetime
from sqlalchemy import Column, Integer, DateTime, ForeignKey, JSON
from sqlalchemy.orm import relationship
from ..database import Base
class ScrapeResult(Base):
"""爬取结果表"""
__tablename__ = "scrape_results"
id = Column(Integer, primary_key=True, index=True)
job_id = Column(Integer, ForeignKey("scrape_jobs.id"))
university_id = Column(Integer, ForeignKey("universities.id"), nullable=False)
# JSON数据: 学院 → 项目 → 导师 层级结构
result_data = Column(JSON, nullable=False)
# 统计信息
schools_count = Column(Integer, default=0)
programs_count = Column(Integer, default=0)
faculty_count = Column(Integer, default=0)
created_at = Column(DateTime, default=datetime.utcnow)
# 关联
job = relationship("ScrapeJob", back_populates="results")
university = relationship("University", back_populates="results")
def __repr__(self):
return f"<ScrapeResult(id={self.id}, programs={self.programs_count}, faculty={self.faculty_count})>"