Files
Munich-news/news_crawler/config.py
2025-11-11 16:58:03 +01:00

102 lines
3.4 KiB
Python

"""
Configuration management for news crawler
"""
import os
from dotenv import load_dotenv
from pathlib import Path
# Load environment variables from backend/.env
# Try multiple locations (Docker vs local)
env_locations = [
Path('/app/.env'), # Docker location
Path(__file__).parent.parent / 'backend' / '.env', # Local location
Path(__file__).parent / '.env', # Current directory
]
env_loaded = False
for env_path in env_locations:
if env_path.exists():
load_dotenv(dotenv_path=env_path)
print(f"✓ Loaded configuration from: {env_path}")
env_loaded = True
break
if not env_loaded:
print(f"⚠ Warning: .env file not found in any of these locations:")
for loc in env_locations:
print(f" - {loc}")
class Config:
"""Centralized configuration for news crawler"""
# MongoDB Configuration
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
DB_NAME = 'munich_news'
# Ollama Configuration
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'phi3:latest')
OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', '')
OLLAMA_ENABLED = os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true'
OLLAMA_TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '30'))
# Crawler Configuration
RATE_LIMIT_DELAY = 1 # seconds between requests
MAX_CONTENT_LENGTH = 50000 # characters
SUMMARY_MAX_WORDS = 150 # maximum words in AI summary
@classmethod
def print_config(cls):
"""Print current configuration (without sensitive data)"""
print("\n" + "="*60)
print("News Crawler Configuration")
print("="*60)
print(f"MongoDB URI: {cls.MONGODB_URI}")
print(f"Database: {cls.DB_NAME}")
print(f"\nOllama Configuration:")
print(f" Base URL: {cls.OLLAMA_BASE_URL}")
print(f" Model: {cls.OLLAMA_MODEL}")
print(f" Enabled: {cls.OLLAMA_ENABLED}")
print(f" Timeout: {cls.OLLAMA_TIMEOUT}s")
print(f" Has API Key: {bool(cls.OLLAMA_API_KEY)}")
print(f"\nCrawler Settings:")
print(f" Rate Limit: {cls.RATE_LIMIT_DELAY}s between requests")
print(f" Max Content: {cls.MAX_CONTENT_LENGTH} chars")
print(f" Summary Length: {cls.SUMMARY_MAX_WORDS} words")
print("="*60 + "\n")
@classmethod
def validate(cls):
"""Validate configuration and return list of issues"""
issues = []
# Check MongoDB
if not cls.MONGODB_URI:
issues.append("MONGODB_URI is not set")
# Check Ollama if enabled
if cls.OLLAMA_ENABLED:
if not cls.OLLAMA_BASE_URL:
issues.append("OLLAMA_BASE_URL is not set but Ollama is enabled")
if not cls.OLLAMA_MODEL:
issues.append("OLLAMA_MODEL is not set but Ollama is enabled")
if cls.OLLAMA_TIMEOUT < 5:
issues.append(f"OLLAMA_TIMEOUT ({cls.OLLAMA_TIMEOUT}s) is too low, recommend at least 5s")
return issues
if __name__ == '__main__':
# Test configuration
Config.print_config()
# Validate
issues = Config.validate()
if issues:
print("⚠ Configuration Issues:")
for issue in issues:
print(f" - {issue}")
else:
print("✓ Configuration is valid")