102 lines
3.4 KiB
Python
102 lines
3.4 KiB
Python
"""
|
|
Configuration management for news crawler
|
|
"""
|
|
import os
|
|
from dotenv import load_dotenv
|
|
from pathlib import Path
|
|
|
|
# Load environment variables from backend/.env
|
|
# Try multiple locations (Docker vs local)
|
|
env_locations = [
|
|
Path('/app/.env'), # Docker location
|
|
Path(__file__).parent.parent / 'backend' / '.env', # Local location
|
|
Path(__file__).parent / '.env', # Current directory
|
|
]
|
|
|
|
env_loaded = False
|
|
for env_path in env_locations:
|
|
if env_path.exists():
|
|
load_dotenv(dotenv_path=env_path)
|
|
print(f"✓ Loaded configuration from: {env_path}")
|
|
env_loaded = True
|
|
break
|
|
|
|
if not env_loaded:
|
|
print(f"⚠ Warning: .env file not found in any of these locations:")
|
|
for loc in env_locations:
|
|
print(f" - {loc}")
|
|
|
|
|
|
class Config:
|
|
"""Centralized configuration for news crawler"""
|
|
|
|
# MongoDB Configuration
|
|
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
|
|
DB_NAME = 'munich_news'
|
|
|
|
# Ollama Configuration
|
|
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
|
|
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'phi3:latest')
|
|
OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', '')
|
|
OLLAMA_ENABLED = os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true'
|
|
OLLAMA_TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '30'))
|
|
|
|
# Crawler Configuration
|
|
RATE_LIMIT_DELAY = 1 # seconds between requests
|
|
MAX_CONTENT_LENGTH = 50000 # characters
|
|
SUMMARY_MAX_WORDS = 150 # maximum words in AI summary
|
|
|
|
@classmethod
|
|
def print_config(cls):
|
|
"""Print current configuration (without sensitive data)"""
|
|
print("\n" + "="*60)
|
|
print("News Crawler Configuration")
|
|
print("="*60)
|
|
print(f"MongoDB URI: {cls.MONGODB_URI}")
|
|
print(f"Database: {cls.DB_NAME}")
|
|
print(f"\nOllama Configuration:")
|
|
print(f" Base URL: {cls.OLLAMA_BASE_URL}")
|
|
print(f" Model: {cls.OLLAMA_MODEL}")
|
|
print(f" Enabled: {cls.OLLAMA_ENABLED}")
|
|
print(f" Timeout: {cls.OLLAMA_TIMEOUT}s")
|
|
print(f" Has API Key: {bool(cls.OLLAMA_API_KEY)}")
|
|
print(f"\nCrawler Settings:")
|
|
print(f" Rate Limit: {cls.RATE_LIMIT_DELAY}s between requests")
|
|
print(f" Max Content: {cls.MAX_CONTENT_LENGTH} chars")
|
|
print(f" Summary Length: {cls.SUMMARY_MAX_WORDS} words")
|
|
print("="*60 + "\n")
|
|
|
|
@classmethod
|
|
def validate(cls):
|
|
"""Validate configuration and return list of issues"""
|
|
issues = []
|
|
|
|
# Check MongoDB
|
|
if not cls.MONGODB_URI:
|
|
issues.append("MONGODB_URI is not set")
|
|
|
|
# Check Ollama if enabled
|
|
if cls.OLLAMA_ENABLED:
|
|
if not cls.OLLAMA_BASE_URL:
|
|
issues.append("OLLAMA_BASE_URL is not set but Ollama is enabled")
|
|
if not cls.OLLAMA_MODEL:
|
|
issues.append("OLLAMA_MODEL is not set but Ollama is enabled")
|
|
if cls.OLLAMA_TIMEOUT < 5:
|
|
issues.append(f"OLLAMA_TIMEOUT ({cls.OLLAMA_TIMEOUT}s) is too low, recommend at least 5s")
|
|
|
|
return issues
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Test configuration
|
|
Config.print_config()
|
|
|
|
# Validate
|
|
issues = Config.validate()
|
|
if issues:
|
|
print("⚠ Configuration Issues:")
|
|
for issue in issues:
|
|
print(f" - {issue}")
|
|
else:
|
|
print("✓ Configuration is valid")
|