update
This commit is contained in:
90
news_crawler/config.py
Normal file
90
news_crawler/config.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Configuration management for news crawler
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
# Load environment variables from backend/.env
|
||||
backend_dir = Path(__file__).parent.parent / 'backend'
|
||||
env_path = backend_dir / '.env'
|
||||
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path)
|
||||
print(f"✓ Loaded configuration from: {env_path}")
|
||||
else:
|
||||
print(f"⚠ Warning: .env file not found at {env_path}")
|
||||
|
||||
|
||||
class Config:
|
||||
"""Centralized configuration for news crawler"""
|
||||
|
||||
# MongoDB Configuration
|
||||
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
|
||||
DB_NAME = 'munich_news'
|
||||
|
||||
# Ollama Configuration
|
||||
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
|
||||
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'phi3:latest')
|
||||
OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', '')
|
||||
OLLAMA_ENABLED = os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true'
|
||||
OLLAMA_TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '30'))
|
||||
|
||||
# Crawler Configuration
|
||||
RATE_LIMIT_DELAY = 1 # seconds between requests
|
||||
MAX_CONTENT_LENGTH = 50000 # characters
|
||||
SUMMARY_MAX_WORDS = 150 # maximum words in AI summary
|
||||
|
||||
@classmethod
|
||||
def print_config(cls):
|
||||
"""Print current configuration (without sensitive data)"""
|
||||
print("\n" + "="*60)
|
||||
print("News Crawler Configuration")
|
||||
print("="*60)
|
||||
print(f"MongoDB URI: {cls.MONGODB_URI}")
|
||||
print(f"Database: {cls.DB_NAME}")
|
||||
print(f"\nOllama Configuration:")
|
||||
print(f" Base URL: {cls.OLLAMA_BASE_URL}")
|
||||
print(f" Model: {cls.OLLAMA_MODEL}")
|
||||
print(f" Enabled: {cls.OLLAMA_ENABLED}")
|
||||
print(f" Timeout: {cls.OLLAMA_TIMEOUT}s")
|
||||
print(f" Has API Key: {bool(cls.OLLAMA_API_KEY)}")
|
||||
print(f"\nCrawler Settings:")
|
||||
print(f" Rate Limit: {cls.RATE_LIMIT_DELAY}s between requests")
|
||||
print(f" Max Content: {cls.MAX_CONTENT_LENGTH} chars")
|
||||
print(f" Summary Length: {cls.SUMMARY_MAX_WORDS} words")
|
||||
print("="*60 + "\n")
|
||||
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
"""Validate configuration and return list of issues"""
|
||||
issues = []
|
||||
|
||||
# Check MongoDB
|
||||
if not cls.MONGODB_URI:
|
||||
issues.append("MONGODB_URI is not set")
|
||||
|
||||
# Check Ollama if enabled
|
||||
if cls.OLLAMA_ENABLED:
|
||||
if not cls.OLLAMA_BASE_URL:
|
||||
issues.append("OLLAMA_BASE_URL is not set but Ollama is enabled")
|
||||
if not cls.OLLAMA_MODEL:
|
||||
issues.append("OLLAMA_MODEL is not set but Ollama is enabled")
|
||||
if cls.OLLAMA_TIMEOUT < 5:
|
||||
issues.append(f"OLLAMA_TIMEOUT ({cls.OLLAMA_TIMEOUT}s) is too low, recommend at least 5s")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test configuration
|
||||
Config.print_config()
|
||||
|
||||
# Validate
|
||||
issues = Config.validate()
|
||||
if issues:
|
||||
print("⚠ Configuration Issues:")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
else:
|
||||
print("✓ Configuration is valid")
|
||||
Reference in New Issue
Block a user