- Add src/university_scraper module with scraper, analyzer, and CLI - Add backend FastAPI service with API endpoints and database models - Add frontend React app with university management pages - Add configs for Harvard, Manchester, and UCL universities - Add artifacts with various scraper implementations - Add Docker compose configuration for deployment - Update .gitignore to exclude generated files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
"""爬取任务模型"""
|
|
|
|
from datetime import datetime
|
|
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from ..database import Base
|
|
|
|
|
|
class ScrapeJob(Base):
|
|
"""爬取任务表"""
|
|
|
|
__tablename__ = "scrape_jobs"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
university_id = Column(Integer, ForeignKey("universities.id"), nullable=False)
|
|
script_id = Column(Integer, ForeignKey("scraper_scripts.id"))
|
|
|
|
status = Column(String(50), default="pending") # pending, running, completed, failed, cancelled
|
|
progress = Column(Integer, default=0) # 0-100 进度百分比
|
|
current_step = Column(String(255)) # 当前步骤描述
|
|
|
|
started_at = Column(DateTime)
|
|
completed_at = Column(DateTime)
|
|
error_message = Column(Text)
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
# 关联
|
|
university = relationship("University", back_populates="jobs")
|
|
script = relationship("ScraperScript", back_populates="jobs")
|
|
logs = relationship("ScrapeLog", back_populates="job", cascade="all, delete-orphan")
|
|
results = relationship("ScrapeResult", back_populates="job", cascade="all, delete-orphan")
|
|
|
|
def __repr__(self):
|
|
return f"<ScrapeJob(id={self.id}, status='{self.status}')>"
|
|
|
|
|
|
class ScrapeLog(Base):
|
|
"""爬取日志表"""
|
|
|
|
__tablename__ = "scrape_logs"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
job_id = Column(Integer, ForeignKey("scrape_jobs.id"), nullable=False)
|
|
|
|
level = Column(String(20), default="info") # debug, info, warning, error
|
|
message = Column(Text, nullable=False)
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
|
|
# 关联
|
|
job = relationship("ScrapeJob", back_populates="logs")
|
|
|
|
def __repr__(self):
|
|
return f"<ScrapeLog(id={self.id}, level='{self.level}')>"
|