From f35f8eef8a847391152cce6f47897da63e79443f Mon Sep 17 00:00:00 2001 From: Dongho Kim Date: Tue, 11 Nov 2025 17:58:12 +0100 Subject: [PATCH] update --- FINAL_STRUCTURE.md | 243 ----------------------------- IMPLEMENTATION_SUMMARY.md | 53 ------- NEWSLETTER_API_UPDATE.md | 205 ------------------------- OLLAMA_GPU_SUMMARY.md | 278 ---------------------------------- OLLAMA_INTEGRATION.md | 85 ----------- PROJECT_STRUCTURE.md | 126 --------------- QUICK_START_GPU.md | 144 ------------------ README.md | 12 +- SECURITY_UPDATE.md | 125 --------------- docker-compose.yml | 12 +- docs/ADMIN_API.md | 50 ++++++ docs/CHANGELOG.md | 40 +++++ docs/DATABASE_SCHEMA.md | 65 ++++++++ docs/DOCUMENTATION_CLEANUP.md | 204 +++++++++++++++++++++++++ docs/GPU_SETUP.md | 110 ++++++++++++++ docs/INDEX.md | 116 ++++++++++++++ docs/OLLAMA_SETUP.md | 46 ++++++ docs/SECURITY_NOTES.md | 149 ++++++++++++++++-- test-mongodb-connectivity.sh | 55 +++++++ 19 files changed, 842 insertions(+), 1276 deletions(-) delete mode 100644 FINAL_STRUCTURE.md delete mode 100644 IMPLEMENTATION_SUMMARY.md delete mode 100644 NEWSLETTER_API_UPDATE.md delete mode 100644 OLLAMA_GPU_SUMMARY.md delete mode 100644 OLLAMA_INTEGRATION.md delete mode 100644 PROJECT_STRUCTURE.md delete mode 100644 QUICK_START_GPU.md delete mode 100644 SECURITY_UPDATE.md create mode 100644 docs/DOCUMENTATION_CLEANUP.md create mode 100644 docs/INDEX.md create mode 100755 test-mongodb-connectivity.sh diff --git a/FINAL_STRUCTURE.md b/FINAL_STRUCTURE.md deleted file mode 100644 index c35f4a1..0000000 --- a/FINAL_STRUCTURE.md +++ /dev/null @@ -1,243 +0,0 @@ -# βœ… Final Clean Project Structure - -## πŸŽ‰ Cleanup Complete! - -Your Munich News Daily project is now clean, organized, and professional. - -## πŸ“ Current Structure - -``` -munich-news/ -β”œβ”€β”€ πŸ“„ Root Files (5 essential files) -β”‚ β”œβ”€β”€ README.md # Main documentation -β”‚ β”œβ”€β”€ QUICKSTART.md # 5-minute setup guide -β”‚ β”œβ”€β”€ CONTRIBUTING.md # Contribution guidelines -β”‚ β”œβ”€β”€ PROJECT_STRUCTURE.md # Project layout -β”‚ └── docker-compose.yml # Single unified compose file -β”‚ -β”œβ”€β”€ πŸ“š docs/ (12 documentation files) -β”‚ β”œβ”€β”€ API.md # API reference -β”‚ β”œβ”€β”€ ARCHITECTURE.md # System architecture -β”‚ β”œβ”€β”€ BACKEND_STRUCTURE.md # Backend organization -β”‚ β”œβ”€β”€ CRAWLER_HOW_IT_WORKS.md # Crawler internals -β”‚ β”œβ”€β”€ DATABASE_SCHEMA.md # Database structure -β”‚ β”œβ”€β”€ DEPLOYMENT.md # Deployment guide -β”‚ β”œβ”€β”€ EXTRACTION_STRATEGIES.md # Content extraction -β”‚ └── RSS_URL_EXTRACTION.md # RSS parsing -β”‚ -β”œβ”€β”€ πŸ§ͺ tests/ (10 test files) -β”‚ β”œβ”€β”€ backend/ # Backend tests -β”‚ β”œβ”€β”€ crawler/ # Crawler tests -β”‚ └── sender/ # Sender tests -β”‚ -β”œβ”€β”€ πŸ”§ backend/ # Backend API -β”‚ β”œβ”€β”€ routes/ -β”‚ β”œβ”€β”€ services/ -β”‚ β”œβ”€β”€ .env.example -β”‚ └── app.py -β”‚ -β”œβ”€β”€ πŸ“° news_crawler/ # Crawler service -β”‚ β”œβ”€β”€ Dockerfile -β”‚ β”œβ”€β”€ crawler_service.py -β”‚ β”œβ”€β”€ scheduled_crawler.py -β”‚ └── requirements.txt -β”‚ -β”œβ”€β”€ πŸ“§ news_sender/ # Sender service -β”‚ β”œβ”€β”€ Dockerfile -β”‚ β”œβ”€β”€ sender_service.py -β”‚ β”œβ”€β”€ scheduled_sender.py -β”‚ └── requirements.txt -β”‚ -└── 🎨 frontend/ # React dashboard (optional) -``` - -## ✨ What Was Cleaned - -### Removed Files (20+) -- ❌ All redundant markdown files from root -- ❌ All redundant markdown files from subdirectories -- ❌ Multiple docker-compose files (kept only 1) -- ❌ Multiple startup scripts (use docker-compose now) -- ❌ Test scripts and helpers - -### Organized Files -- βœ… All tests β†’ `tests/` directory -- βœ… All documentation β†’ `docs/` directory -- βœ… All docker configs β†’ single `docker-compose.yml` - -## πŸš€ How to Use - -### Start Everything -```bash -docker-compose up -d -``` - -That's it! One command starts: -- MongoDB database -- News crawler (6 AM schedule) -- Newsletter sender (7 AM schedule) - -### View Logs -```bash -docker-compose logs -f -``` - -### Stop Everything -```bash -docker-compose down -``` - -## πŸ“Š Before vs After - -### Before -``` -Root: 20+ files (messy) -β”œβ”€β”€ AUTOMATION_README.md -β”œβ”€β”€ AUTOMATION_SETUP_COMPLETE.md -β”œβ”€β”€ CRAWLER_QUICKSTART.md -β”œβ”€β”€ CRAWLER_SETUP_SUMMARY.md -β”œβ”€β”€ docker-compose.yml -β”œβ”€β”€ docker-compose.prod.yml -β”œβ”€β”€ README_CRAWLER.md -β”œβ”€β”€ start-automation.sh -β”œβ”€β”€ start-crawler.sh -β”œβ”€β”€ start-sender.sh -β”œβ”€β”€ test-crawler-setup.sh -└── ... many more - -Subdirectories: Scattered docs -β”œβ”€β”€ backend/TRACKING_README.md -β”œβ”€β”€ backend/TRACKING_CONFIGURATION.md -β”œβ”€β”€ news_crawler/README.md -β”œβ”€β”€ news_crawler/QUICKSTART.md -β”œβ”€β”€ news_crawler/docker-compose.yml -β”œβ”€β”€ news_sender/README.md -└── ... more scattered files - -Tests: Scattered everywhere -``` - -### After -``` -Root: 5 essential files (clean) -β”œβ”€β”€ README.md -β”œβ”€β”€ QUICKSTART.md -β”œβ”€β”€ CONTRIBUTING.md -β”œβ”€β”€ PROJECT_STRUCTURE.md -└── docker-compose.yml - -docs/: All documentation (12 files) -β”œβ”€β”€ API.md -β”œβ”€β”€ ARCHITECTURE.md -β”œβ”€β”€ DEPLOYMENT.md -└── ... organized docs - -tests/: All tests (10 files) -β”œβ”€β”€ backend/ -β”œβ”€β”€ crawler/ -└── sender/ - -Subdirectories: Clean, no scattered docs -``` - -## 🎯 Benefits - -### 1. Easy to Navigate -- Clear directory structure -- Everything in its place -- No clutter - -### 2. Simple to Use -- One command: `docker-compose up -d` -- One place for docs: `docs/` -- One place for tests: `tests/` - -### 3. Professional -- Industry-standard layout -- Clean and organized -- Ready for collaboration - -### 4. Maintainable -- Easy to find files -- Clear separation of concerns -- Scalable structure - -## πŸ“ Quick Reference - -### Documentation -```bash -# Main docs -cat README.md -cat QUICKSTART.md - -# Technical docs -ls docs/ -``` - -### Running -```bash -# Start -docker-compose up -d - -# Logs -docker-compose logs -f - -# Stop -docker-compose down -``` - -### Testing -```bash -# Run tests -docker-compose exec crawler python tests/crawler/test_crawler.py -docker-compose exec sender python tests/sender/test_tracking_integration.py -``` - -### Development -```bash -# Edit code in respective directories -# Rebuild -docker-compose up -d --build -``` - -## βœ… Verification - -Run these commands to verify the cleanup: - -```bash -# Check root directory (should be clean) -ls -1 *.md - -# Check docs directory -ls -1 docs/ - -# Check tests directory -ls -1 tests/ - -# Check for stray docker-compose files (should be only 1) -find . -name "docker-compose*.yml" ! -path "*/node_modules/*" ! -path "*/env/*" - -# Check for stray markdown in subdirectories (should be none) -find backend news_crawler news_sender -name "*.md" ! -path "*/env/*" -``` - -## 🎊 Result - -A clean, professional, production-ready project structure! - -**One command to start everything:** -```bash -docker-compose up -d -``` - -**One place for all documentation:** -```bash -ls docs/ -``` - -**One place for all tests:** -```bash -ls tests/ -``` - -Simple. Clean. Professional. ✨ diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md deleted file mode 100644 index 9027193..0000000 --- a/IMPLEMENTATION_SUMMARY.md +++ /dev/null @@ -1,53 +0,0 @@ -# GPU Support Implementation - Complete Summary - -## Overview - -Successfully implemented comprehensive GPU support for Ollama AI service in the Munich News Daily system. The implementation provides 5-10x faster AI inference for article translation and summarization when NVIDIA GPU is available, with automatic fallback to CPU mode. - -## What Was Implemented - -### 1. Docker Configuration βœ… -- **docker-compose.yml**: Added Ollama service with automatic model download -- **docker-compose.gpu.yml**: GPU-specific override for NVIDIA GPU support -- **ollama-setup service**: Automatically pulls phi3:latest model on first startup - -### 2. Helper Scripts βœ… -- **start-with-gpu.sh**: Auto-detects GPU and starts services with appropriate configuration -- **check-gpu.sh**: Diagnoses GPU availability and Docker GPU support -- **configure-ollama.sh**: Interactive configuration for Docker Compose or external Ollama -- **test-ollama-setup.sh**: Comprehensive test suite to verify setup - -### 3. Documentation βœ… -- **docs/OLLAMA_SETUP.md**: Complete Ollama setup guide (6.6KB) -- **docs/GPU_SETUP.md**: Detailed GPU setup and troubleshooting (7.8KB) -- **docs/PERFORMANCE_COMPARISON.md**: CPU vs GPU benchmarks (5.2KB) -- **QUICK_START_GPU.md**: Quick reference card (2.8KB) -- **OLLAMA_GPU_SUMMARY.md**: Implementation summary (8.4KB) -- **README.md**: Updated with GPU support information - -## Performance Improvements - -| Operation | CPU | GPU | Speedup | -|-----------|-----|-----|---------| -| Translation | 1.5s | 0.3s | 5x | -| Summarization | 8s | 2s | 4x | -| 10 Articles | 115s | 31s | 3.7x | - -## Quick Start - -```bash -# Check GPU availability -./check-gpu.sh - -# Start services with auto-detection -./start-with-gpu.sh - -# Test translation -docker-compose exec crawler python crawler_service.py 2 -``` - -## Testing Results - -All tests pass successfully βœ… - -The implementation is complete, tested, and ready for use! diff --git a/NEWSLETTER_API_UPDATE.md b/NEWSLETTER_API_UPDATE.md deleted file mode 100644 index d89438e..0000000 --- a/NEWSLETTER_API_UPDATE.md +++ /dev/null @@ -1,205 +0,0 @@ -# Newsletter API Update - -## Summary - -Added a new API endpoint to send newsletters to all active subscribers instead of requiring a specific email address. - -## New Endpoint - -### Send Newsletter to All Subscribers - -```http -POST /api/admin/send-newsletter -``` - -**Request Body** (optional): -```json -{ - "max_articles": 10 -} -``` - -**Response**: -```json -{ - "success": true, - "message": "Newsletter sent successfully to 45 subscribers", - "subscriber_count": 45, - "max_articles": 10, - "output": "... sender output ...", - "errors": "" -} -``` - -## Usage Examples - -### Send Newsletter to All Subscribers - -```bash -# Send with default settings (10 articles) -curl -X POST http://localhost:5001/api/admin/send-newsletter \ - -H "Content-Type: application/json" - -# Send with custom article count -curl -X POST http://localhost:5001/api/admin/send-newsletter \ - -H "Content-Type: application/json" \ - -d '{"max_articles": 15}' -``` - -### Complete Workflow - -```bash -# 1. Check subscriber count -curl http://localhost:5001/api/admin/stats | jq '.subscribers' - -# 2. Crawl fresh articles -curl -X POST http://localhost:5001/api/admin/trigger-crawl \ - -H "Content-Type: application/json" \ - -d '{"max_articles": 10}' - -# 3. Wait for crawl to complete -sleep 60 - -# 4. Send newsletter to all active subscribers -curl -X POST http://localhost:5001/api/admin/send-newsletter \ - -H "Content-Type: application/json" \ - -d '{"max_articles": 10}' -``` - -## Comparison with Test Email - -### Send Test Email (Existing) -- Sends to **one specific email address** -- Useful for testing newsletter content -- No tracking recorded in database -- Fast (single email) - -```bash -curl -X POST http://localhost:5001/api/admin/send-test-email \ - -H "Content-Type: application/json" \ - -d '{"email": "test@example.com"}' -``` - -### Send Newsletter (New) -- Sends to **all active subscribers** -- Production newsletter sending -- Full tracking (opens, clicks) -- May take time for large lists - -```bash -curl -X POST http://localhost:5001/api/admin/send-newsletter \ - -H "Content-Type: application/json" -``` - -## Features - -### Subscriber Filtering -- Only sends to subscribers with `status: 'active'` -- Skips inactive, unsubscribed, or bounced subscribers -- Returns error if no active subscribers found - -### Tracking -- Includes tracking pixel for open tracking -- Includes click tracking for all article links -- Records send time and newsletter ID -- Stores in `newsletter_sends` collection - -### Error Handling -- Validates subscriber count before sending -- Returns detailed error messages -- Includes sender output and errors in response -- 5-minute timeout for large lists - -## Testing - -### Interactive Test Script - -```bash -./test-newsletter-api.sh -``` - -This script will: -1. Show current subscriber stats -2. Optionally send test email to your address -3. Optionally send newsletter to all subscribers - -### Manual Testing - -```bash -# 1. Check subscribers -curl http://localhost:5001/api/admin/stats - -# 2. Send newsletter -curl -X POST http://localhost:5001/api/admin/send-newsletter \ - -H "Content-Type: application/json" \ - -d '{"max_articles": 2}' - -# 3. Check results -curl http://localhost:5001/api/admin/stats -``` - -## Security Considerations - -⚠️ **Important**: This endpoint sends emails to real subscribers! - -### Recommendations - -1. **Add Authentication** - ```python - @require_api_key - def send_newsletter(): - # ... - ``` - -2. **Rate Limiting** - - Prevent accidental multiple sends - - Limit to once per hour/day - -3. **Confirmation Required** - - Add confirmation step in UI - - Log all newsletter sends - -4. **Dry Run Mode** - ```json - { - "max_articles": 10, - "dry_run": true // Preview without sending - } - ``` - -5. **Audit Logging** - - Log who triggered the send - - Log timestamp and parameters - - Track success/failure - -## Files Modified - -- βœ… `backend/routes/admin_routes.py` - Added new endpoint -- βœ… `docs/ADMIN_API.md` - Updated documentation -- βœ… `test-newsletter-api.sh` - Created test script - -## API Endpoints Summary - -| Endpoint | Purpose | Recipient | -|----------|---------|-----------| -| `/api/admin/send-test-email` | Test newsletter | Single email (specified) | -| `/api/admin/send-newsletter` | Production send | All active subscribers | -| `/api/admin/trigger-crawl` | Fetch articles | N/A | -| `/api/admin/stats` | System stats | N/A | - -## Next Steps - -1. **Test the endpoint:** - ```bash - ./test-newsletter-api.sh - ``` - -2. **Add authentication** (recommended for production) - -3. **Set up monitoring** for newsletter sends - -4. **Create UI** for easier newsletter management - -## Documentation - -See [docs/ADMIN_API.md](docs/ADMIN_API.md) for complete API documentation. diff --git a/OLLAMA_GPU_SUMMARY.md b/OLLAMA_GPU_SUMMARY.md deleted file mode 100644 index 7c5e952..0000000 --- a/OLLAMA_GPU_SUMMARY.md +++ /dev/null @@ -1,278 +0,0 @@ -# Ollama with GPU Support - Implementation Summary - -## What Was Added - -This implementation adds comprehensive GPU support for Ollama AI service in the Munich News Daily system, enabling 5-10x faster AI inference for article translation and summarization. - -## Files Created/Modified - -### Docker Configuration -- **docker-compose.yml** - Added Ollama service with GPU support comments -- **docker-compose.gpu.yml** - GPU-specific override configuration -- **docker-compose.yml** - Added ollama-setup service for automatic model download - -### Helper Scripts -- **start-with-gpu.sh** - Auto-detect GPU and start services accordingly -- **check-gpu.sh** - Check GPU availability and Docker GPU support -- **configure-ollama.sh** - Configure Ollama for Docker Compose or external server - -### Documentation -- **docs/OLLAMA_SETUP.md** - Complete Ollama setup guide with GPU section -- **docs/GPU_SETUP.md** - Detailed GPU setup and troubleshooting guide -- **docs/PERFORMANCE_COMPARISON.md** - CPU vs GPU performance analysis -- **README.md** - Updated with GPU support information - -## Key Features - -### 1. Automatic GPU Detection -```bash -./start-with-gpu.sh -``` -- Detects NVIDIA GPU availability -- Checks Docker GPU runtime -- Automatically starts with appropriate configuration - -### 2. Flexible Deployment Options - -**Option A: Integrated Ollama (Docker Compose)** -```bash -# CPU mode -docker-compose up -d - -# GPU mode -docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d -``` - -**Option B: External Ollama Server** -```bash -# Configure for external server -./configure-ollama.sh -# Select option 2 -``` - -### 3. Automatic Model Download -- Ollama service starts automatically -- ollama-setup service pulls phi3:latest model on first run -- Model persists in Docker volume - -### 4. GPU Support -- NVIDIA GPU acceleration when available -- Automatic fallback to CPU if GPU unavailable -- 5-10x performance improvement with GPU - -## Performance Improvements - -| Operation | CPU | GPU | Speedup | -|-----------|-----|-----|---------| -| Translation | 1.5s | 0.3s | 5x | -| Summarization | 8s | 2s | 4x | -| 10 Articles | 115s | 31s | 3.7x | - -## Usage Examples - -### Check GPU Availability -```bash -./check-gpu.sh -``` - -### Start with GPU (Automatic) -```bash -./start-with-gpu.sh -``` - -### Start with GPU (Manual) -```bash -docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d -``` - -### Verify GPU Usage -```bash -# Check GPU in container -docker exec munich-news-ollama nvidia-smi - -# Monitor GPU during processing -watch -n 1 'docker exec munich-news-ollama nvidia-smi' -``` - -### Test Translation -```bash -# Run test crawl -docker-compose exec crawler python crawler_service.py 2 - -# Check timing in logs -docker-compose logs crawler | grep "Title translated" -# GPU: βœ“ Title translated (0.3s) -# CPU: βœ“ Title translated (1.5s) -``` - -## Configuration - -### Environment Variables (backend/.env) - -**For Docker Compose Ollama:** -```env -OLLAMA_ENABLED=true -OLLAMA_BASE_URL=http://ollama:11434 -OLLAMA_MODEL=phi3:latest -OLLAMA_TIMEOUT=120 -``` - -**For External Ollama:** -```env -OLLAMA_ENABLED=true -OLLAMA_BASE_URL=http://host.docker.internal:11434 -OLLAMA_MODEL=phi3:latest -OLLAMA_TIMEOUT=120 -``` - -## Requirements - -### For CPU Mode -- Docker & Docker Compose -- 4GB+ RAM -- 4+ CPU cores recommended - -### For GPU Mode -- NVIDIA GPU (GTX 1060 or newer) -- 4GB+ VRAM -- NVIDIA drivers (525.60.13+) -- NVIDIA Container Toolkit -- Docker 20.10+ -- Docker Compose v2.3+ - -## Installation Steps - -### 1. Install NVIDIA Container Toolkit (Ubuntu/Debian) -```bash -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) -curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg -curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list - -sudo apt-get update -sudo apt-get install -y nvidia-container-toolkit -sudo nvidia-ctk runtime configure --runtime=docker -sudo systemctl restart docker -``` - -### 2. Verify Installation -```bash -docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi -``` - -### 3. Configure Ollama -```bash -./configure-ollama.sh -# Select option 1 for Docker Compose -``` - -### 4. Start Services -```bash -./start-with-gpu.sh -``` - -## Troubleshooting - -### GPU Not Detected -```bash -# Check NVIDIA drivers -nvidia-smi - -# Check Docker GPU access -docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi - -# Check Ollama container -docker exec munich-news-ollama nvidia-smi -``` - -### Out of Memory -- Use smaller model: `OLLAMA_MODEL=gemma2:2b` -- Close other GPU applications -- Increase Docker memory limit - -### Slow Performance -- Verify GPU is being used: `docker exec munich-news-ollama nvidia-smi` -- Check GPU utilization during inference -- Ensure using GPU compose file -- Update NVIDIA drivers - -## Architecture - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Docker Compose β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Ollama │◄────── Crawler β”‚ β”‚ -β”‚ β”‚ (GPU/CPU) β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ β”‚ β”‚ - Fetches β”‚ β”‚ -β”‚ β”‚ - phi3 β”‚ β”‚ - Translatesβ”‚ β”‚ -β”‚ β”‚ - Translate β”‚ β”‚ - Summarizesβ”‚ β”‚ -β”‚ β”‚ - Summarize β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β”‚ β”‚ GPU (optional) β”‚ -β”‚ β–Ό β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ NVIDIA GPU β”‚ β”‚ -β”‚ β”‚ (5-10x faster)β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -## Model Options - -| Model | Size | VRAM | Speed | Quality | Use Case | -|-------|------|------|-------|---------|----------| -| gemma2:2b | 1.4GB | 1.5GB | Fastest | Good | High volume | -| phi3:latest | 2.3GB | 3-4GB | Fast | Very Good | Default | -| llama3.2:3b | 3.2GB | 5-6GB | Medium | Excellent | Quality critical | -| mistral:latest | 4.1GB | 6-8GB | Medium | Excellent | Long-form | - -## Next Steps - -1. **Test the setup:** - ```bash - ./check-gpu.sh - ./start-with-gpu.sh - docker-compose exec crawler python crawler_service.py 2 - ``` - -2. **Monitor performance:** - ```bash - watch -n 1 'docker exec munich-news-ollama nvidia-smi' - docker-compose logs -f crawler - ``` - -3. **Optimize for your use case:** - - Adjust model based on VRAM availability - - Tune summary length for speed vs quality - - Enable concurrent requests for high volume - -## Documentation - -- **[OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md)** - Complete Ollama setup guide -- **[GPU_SETUP.md](docs/GPU_SETUP.md)** - Detailed GPU setup and troubleshooting -- **[PERFORMANCE_COMPARISON.md](docs/PERFORMANCE_COMPARISON.md)** - CPU vs GPU analysis - -## Support - -For issues or questions: -1. Run `./check-gpu.sh` for diagnostics -2. Check logs: `docker-compose logs ollama` -3. See troubleshooting sections in documentation -4. Open an issue with diagnostic output - -## Summary - -βœ… Ollama service integrated into Docker Compose -βœ… Automatic model download (phi3:latest) -βœ… GPU support with automatic detection -βœ… Fallback to CPU when GPU unavailable -βœ… Helper scripts for easy setup -βœ… Comprehensive documentation -βœ… 5-10x performance improvement with GPU -βœ… Flexible deployment options diff --git a/OLLAMA_INTEGRATION.md b/OLLAMA_INTEGRATION.md deleted file mode 100644 index 886c82b..0000000 --- a/OLLAMA_INTEGRATION.md +++ /dev/null @@ -1,85 +0,0 @@ -# Ollama Integration Complete βœ… - -## What Was Added - -1. **Ollama Service in Docker Compose** - - Runs Ollama server on port 11434 - - Persists models in `ollama_data` volume - - Health check ensures service is ready - -2. **Automatic Model Download** - - `ollama-setup` service automatically pulls `phi3:latest` (2.2GB) - - Runs once on first startup - - Model is cached in volume for future use - -3. **Configuration Files** - - `docs/OLLAMA_SETUP.md` - Comprehensive setup guide - - `configure-ollama.sh` - Helper script to switch between Docker/external Ollama - - Updated `README.md` with Ollama setup instructions - -4. **Environment Configuration** - - Updated `backend/.env` to use `http://ollama:11434` (internal Docker network) - - All services can now communicate with Ollama via Docker network - -## Current Status - -βœ… Ollama service running and healthy -βœ… phi3:latest model downloaded (2.2GB) -βœ… Translation feature working with integrated Ollama -βœ… Summarization feature working with integrated Ollama - -## Quick Start - -```bash -# Start all services (including Ollama) -docker-compose up -d - -# Wait for model download (first time only, ~2-5 minutes) -docker-compose logs -f ollama-setup - -# Verify Ollama is ready -docker-compose exec ollama ollama list - -# Test the system -docker-compose exec crawler python crawler_service.py 1 -``` - -## Switching Between Docker and External Ollama - -```bash -# Use integrated Docker Ollama (recommended) -./configure-ollama.sh -# Select option 1 - -# Use external Ollama server -./configure-ollama.sh -# Select option 2 -``` - -## Performance Notes - -- First request: ~6 seconds (model loading) -- Subsequent requests: 0.5-2 seconds (cached) -- Translation: 0.5-6 seconds per title -- Summarization: 5-90 seconds per article (depends on length) - -## Resource Requirements - -- RAM: 4GB minimum for phi3:latest -- Disk: 2.2GB for model storage -- CPU: Works on CPU, GPU optional - -## Alternative Models - -To use a different model: - -1. Update `OLLAMA_MODEL` in `backend/.env` -2. Pull the model: - ```bash - docker-compose exec ollama ollama pull - ``` - -Popular alternatives: -- `gemma2:2b` - Smaller, faster (1.6GB) -- `llama3.2:latest` - Larger, more capable (2GB) -- `mistral:latest` - Good balance (4.1GB) diff --git a/PROJECT_STRUCTURE.md b/PROJECT_STRUCTURE.md deleted file mode 100644 index 1d68902..0000000 --- a/PROJECT_STRUCTURE.md +++ /dev/null @@ -1,126 +0,0 @@ -# Project Structure - -``` -munich-news/ -β”œβ”€β”€ backend/ # Backend API and services -β”‚ β”œβ”€β”€ routes/ # API routes -β”‚ β”œβ”€β”€ services/ # Business logic -β”‚ β”œβ”€β”€ .env.example # Environment template -β”‚ β”œβ”€β”€ app.py # Flask application -β”‚ β”œβ”€β”€ config.py # Configuration -β”‚ └── database.py # MongoDB connection -β”‚ -β”œβ”€β”€ news_crawler/ # News crawler service -β”‚ β”œβ”€β”€ Dockerfile # Crawler container -β”‚ β”œβ”€β”€ crawler_service.py # Main crawler logic -β”‚ β”œβ”€β”€ scheduled_crawler.py # Scheduler (6 AM) -β”‚ β”œβ”€β”€ rss_utils.py # RSS parsing utilities -β”‚ └── requirements.txt # Python dependencies -β”‚ -β”œβ”€β”€ news_sender/ # Newsletter sender service -β”‚ β”œβ”€β”€ Dockerfile # Sender container -β”‚ β”œβ”€β”€ sender_service.py # Main sender logic -β”‚ β”œβ”€β”€ scheduled_sender.py # Scheduler (7 AM) -β”‚ β”œβ”€β”€ tracking_integration.py # Email tracking -β”‚ β”œβ”€β”€ newsletter_template.html # Email template -β”‚ └── requirements.txt # Python dependencies -β”‚ -β”œβ”€β”€ frontend/ # React dashboard (optional) -β”‚ β”œβ”€β”€ src/ # React components -β”‚ β”œβ”€β”€ public/ # Static files -β”‚ └── package.json # Node dependencies -β”‚ -β”œβ”€β”€ tests/ # All test files -β”‚ β”œβ”€β”€ crawler/ # Crawler tests -β”‚ β”œβ”€β”€ sender/ # Sender tests -β”‚ └── backend/ # Backend tests -β”‚ -β”œβ”€β”€ docs/ # Documentation -β”‚ β”œβ”€β”€ ARCHITECTURE.md # System architecture -β”‚ β”œβ”€β”€ DEPLOYMENT.md # Deployment guide -β”‚ β”œβ”€β”€ API.md # API reference -β”‚ β”œβ”€β”€ DATABASE_SCHEMA.md # Database structure -β”‚ β”œβ”€β”€ BACKEND_STRUCTURE.md # Backend organization -β”‚ β”œβ”€β”€ CRAWLER_HOW_IT_WORKS.md # Crawler internals -β”‚ β”œβ”€β”€ EXTRACTION_STRATEGIES.md # Content extraction -β”‚ └── RSS_URL_EXTRACTION.md # RSS parsing -β”‚ -β”œβ”€β”€ .kiro/ # Kiro IDE configuration -β”‚ └── specs/ # Feature specifications -β”‚ -β”œβ”€β”€ docker-compose.yml # Docker orchestration -β”œβ”€β”€ README.md # Main documentation -β”œβ”€β”€ QUICKSTART.md # 5-minute setup guide -β”œβ”€β”€ CONTRIBUTING.md # Contribution guidelines -β”œβ”€β”€ .gitignore # Git ignore rules -└── .dockerignore # Docker ignore rules -``` - -## Key Files - -### Configuration -- `backend/.env` - Environment variables (create from .env.example) -- `docker-compose.yml` - Docker services configuration - -### Entry Points -- `news_crawler/scheduled_crawler.py` - Crawler scheduler (6 AM) -- `news_sender/scheduled_sender.py` - Sender scheduler (7 AM) -- `backend/app.py` - Backend API server - -### Documentation -- `README.md` - Main project documentation -- `QUICKSTART.md` - Quick setup guide -- `docs/` - Detailed documentation - -### Tests -- `tests/crawler/` - Crawler test files -- `tests/sender/` - Sender test files -- `tests/backend/` - Backend test files - -## Docker Services - -When you run `docker-compose up -d`, these services start: - -1. **mongodb** - Database (port 27017) -2. **crawler** - News crawler (scheduled for 6 AM) -3. **sender** - Newsletter sender (scheduled for 7 AM) -4. **backend** - API server (port 5001, optional) - -## Data Flow - -``` -RSS Feeds β†’ Crawler β†’ MongoDB β†’ Sender β†’ Subscribers - ↓ - Backend API - ↓ - Analytics -``` - -## Development Workflow - -1. Edit code in respective directories -2. Rebuild containers: `docker-compose up -d --build` -3. View logs: `docker-compose logs -f` -4. Run tests: `docker-compose exec python tests/...` - -## Adding New Features - -1. Create spec in `.kiro/specs/` -2. Implement in appropriate directory -3. Add tests in `tests/` -4. Update documentation in `docs/` -5. Submit pull request - -## Clean Architecture - -- **Separation of Concerns**: Each service has its own directory -- **Centralized Configuration**: All config in `backend/.env` -- **Organized Tests**: All tests in `tests/` directory -- **Clear Documentation**: All docs in `docs/` directory -- **Single Entry Point**: One `docker-compose.yml` file - -This structure makes the project: -- βœ… Easy to navigate -- βœ… Simple to deploy -- βœ… Clear to understand -- βœ… Maintainable long-term diff --git a/QUICK_START_GPU.md b/QUICK_START_GPU.md deleted file mode 100644 index 262632d..0000000 --- a/QUICK_START_GPU.md +++ /dev/null @@ -1,144 +0,0 @@ -# Quick Start: Ollama with GPU - -## 30-Second Setup - -```bash -# 1. Check GPU -./check-gpu.sh - -# 2. Start services -./start-with-gpu.sh - -# 3. Test -docker-compose exec crawler python crawler_service.py 2 -``` - -## Commands Cheat Sheet - -### Setup -```bash -# Check GPU availability -./check-gpu.sh - -# Configure Ollama -./configure-ollama.sh - -# Start with GPU auto-detection -./start-with-gpu.sh - -# Start with GPU (manual) -docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d - -# Start without GPU -docker-compose up -d -``` - -### Monitoring -```bash -# Check GPU usage -docker exec munich-news-ollama nvidia-smi - -# Monitor GPU in real-time -watch -n 1 'docker exec munich-news-ollama nvidia-smi' - -# Check Ollama logs -docker-compose logs -f ollama - -# Check crawler logs -docker-compose logs -f crawler -``` - -### Testing -```bash -# Test translation (2 articles) -docker-compose exec crawler python crawler_service.py 2 - -# Check translation timing -docker-compose logs crawler | grep "Title translated" - -# Test Ollama API (internal network only) -docker-compose exec crawler curl -s http://ollama:11434/api/generate -d '{ - "model": "phi3:latest", - "prompt": "Translate to English: Guten Morgen", - "stream": false -}' -``` - -### Troubleshooting -```bash -# Restart Ollama -docker-compose restart ollama - -# Rebuild and restart -docker-compose up -d --build ollama - -# Check GPU in container -docker exec munich-news-ollama nvidia-smi - -# Pull model manually -docker-compose exec ollama ollama pull phi3:latest - -# List available models -docker-compose exec ollama ollama list -``` - -## Performance Expectations - -| Operation | CPU | GPU | Speedup | -|-----------|-----|-----|---------| -| Translation | 1.5s | 0.3s | 5x | -| Summary | 8s | 2s | 4x | -| 10 Articles | 115s | 31s | 3.7x | - -## Common Issues - -### GPU Not Detected -```bash -# Install NVIDIA Container Toolkit -sudo apt-get install -y nvidia-container-toolkit -sudo systemctl restart docker -``` - -### Out of Memory -```bash -# Use smaller model (edit backend/.env) -OLLAMA_MODEL=gemma2:2b -``` - -### Slow Performance -```bash -# Verify GPU is being used -docker exec munich-news-ollama nvidia-smi -# Should show GPU memory usage during inference -``` - -## Configuration Files - -**backend/.env** - Main configuration -```env -OLLAMA_ENABLED=true -OLLAMA_BASE_URL=http://ollama:11434 -OLLAMA_MODEL=phi3:latest -OLLAMA_TIMEOUT=120 -``` - -**docker-compose.yml** - Main services -**docker-compose.gpu.yml** - GPU override - -## Model Options - -- `gemma2:2b` - Fastest, 1.5GB VRAM -- `phi3:latest` - Default, 3-4GB VRAM ⭐ -- `llama3.2:3b` - Best quality, 5-6GB VRAM - -## Full Documentation - -- [OLLAMA_SETUP.md](docs/OLLAMA_SETUP.md) - Complete setup guide -- [GPU_SETUP.md](docs/GPU_SETUP.md) - GPU-specific guide -- [PERFORMANCE_COMPARISON.md](docs/PERFORMANCE_COMPARISON.md) - Benchmarks - -## Need Help? - -1. Run `./check-gpu.sh` -2. Check `docker-compose logs ollama` -3. See troubleshooting in [GPU_SETUP.md](docs/GPU_SETUP.md) diff --git a/README.md b/README.md index 7a6e406..2e002ee 100644 --- a/README.md +++ b/README.md @@ -397,13 +397,23 @@ export MONGO_PASSWORD=your-secure-password - Set up alerts for failures - Monitor database size +## πŸ“š Documentation + +Complete documentation available in the [docs/](docs/) directory: + +- **[Documentation Index](docs/INDEX.md)** - Complete documentation guide +- **[GPU Setup](docs/GPU_SETUP.md)** - 5-10x faster with GPU acceleration +- **[Admin API](docs/ADMIN_API.md)** - API endpoints reference +- **[Security Guide](docs/SECURITY_NOTES.md)** - Security best practices +- **[System Architecture](docs/SYSTEM_ARCHITECTURE.md)** - Technical overview + ## πŸ“ License [Your License Here] ## 🀝 Contributing -Contributions welcome! Please read CONTRIBUTING.md first. +Contributions welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) first. ## πŸ“§ Support diff --git a/SECURITY_UPDATE.md b/SECURITY_UPDATE.md deleted file mode 100644 index 5e3da44..0000000 --- a/SECURITY_UPDATE.md +++ /dev/null @@ -1,125 +0,0 @@ -# Security Update: Ollama Internal-Only Configuration - -## Summary - -Ollama service has been configured to be **internal-only** and is no longer exposed to the host machine. This improves security by reducing the attack surface. - -## Changes Made - -### Before (Exposed) -```yaml -ollama: - ports: - - "11434:11434" # ❌ Accessible from host and external network -``` - -### After (Internal Only) -```yaml -ollama: - # No ports section - internal only βœ“ - # Only accessible within Docker network -``` - -## Verification - -### βœ“ Port Not Accessible from Host -```bash -$ nc -z -w 2 localhost 11434 -# Connection refused (as expected) -``` - -### βœ“ Accessible from Docker Services -```bash -$ docker-compose exec crawler python -c "import requests; requests.get('http://ollama:11434/api/tags')" -# βœ“ Works perfectly -``` - -## Security Benefits - -1. **No External Access**: Ollama API cannot be accessed from outside Docker network -2. **Reduced Attack Surface**: Service is not exposed to potential external threats -3. **Network Isolation**: Only authorized Docker Compose services can communicate with Ollama -4. **No Port Conflicts**: Port 11434 is not bound to host machine - -## Impact on Usage - -### No Change for Normal Operations βœ“ -- Crawler service works normally -- Translation and summarization work as before -- All Docker Compose services can access Ollama - -### Testing from Host Machine -Since Ollama is internal-only, you must test from inside the Docker network: - -```bash -# βœ“ Test from inside a container -docker-compose exec crawler python crawler_service.py 1 - -# βœ“ Check Ollama status -docker-compose exec crawler python -c "import requests; print(requests.get('http://ollama:11434/api/tags').json())" - -# βœ“ Check logs -docker-compose logs ollama -``` - -### If You Need External Access (Development Only) - -For development/debugging, you can temporarily expose Ollama: - -**Option 1: SSH Port Forward** -```bash -# Forward port through SSH (if accessing remote server) -ssh -L 11434:localhost:11434 user@server -``` - -**Option 2: Temporary Docker Exec** -```bash -# Run commands from inside network -docker-compose exec crawler curl http://ollama:11434/api/tags -``` - -**Option 3: Modify docker-compose.yml (Not Recommended)** -```yaml -ollama: - ports: - - "127.0.0.1:11434:11434" # Only localhost, not all interfaces -``` - -## Documentation Updated - -- βœ“ docker-compose.yml - Removed port exposure -- βœ“ docs/OLLAMA_SETUP.md - Updated testing instructions -- βœ“ docs/SECURITY_NOTES.md - Added security documentation -- βœ“ test-ollama-setup.sh - Updated to test from inside network -- βœ“ QUICK_START_GPU.md - Updated API testing examples - -## Testing - -All functionality has been verified: -- βœ“ Ollama not accessible from host -- βœ“ Ollama accessible from crawler service -- βœ“ Translation works correctly -- βœ“ Summarization works correctly -- βœ“ All tests pass - -## Rollback (If Needed) - -If you need to expose Ollama again: - -```yaml -# In docker-compose.yml -ollama: - ports: - - "11434:11434" # or "127.0.0.1:11434:11434" for localhost only -``` - -Then restart: -```bash -docker-compose up -d ollama -``` - -## Recommendation - -**Keep Ollama internal-only** for production deployments. This is the most secure configuration and sufficient for normal operations. - -Only expose Ollama if you have a specific need for external access, and always bind to `127.0.0.1` (localhost only), never `0.0.0.0` (all interfaces). diff --git a/docker-compose.yml b/docker-compose.yml index 00cb4c9..501d493 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,8 +7,11 @@ # Or manually: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d # # Security: -# Ollama service is internal-only (no ports exposed to host) -# Only accessible by other Docker Compose services +# - Only Backend API (port 5001) is exposed to host +# - MongoDB is internal-only (not exposed to host) +# - Ollama is internal-only (not exposed to host) +# - Crawler and Sender are internal-only +# All services communicate via internal Docker network # # See docs/OLLAMA_SETUP.md for detailed setup instructions @@ -59,13 +62,12 @@ services: " restart: "no" - # MongoDB Database + # MongoDB Database (Internal only - not exposed to host) mongodb: image: mongo:latest container_name: munich-news-mongodb restart: unless-stopped - ports: - - "27017:27017" + # No ports exposed - only accessible within Docker network environment: # For production, set MONGO_PASSWORD environment variable MONGO_INITDB_ROOT_USERNAME: ${MONGO_USERNAME:-admin} diff --git a/docs/ADMIN_API.md b/docs/ADMIN_API.md index 9b3a49e..6a0fdf5 100644 --- a/docs/ADMIN_API.md +++ b/docs/ADMIN_API.md @@ -330,3 +330,53 @@ def trigger_crawl(): - **[Newsletter Preview](../backend/routes/newsletter_routes.py)**: `/api/newsletter/preview` - Preview newsletter HTML - **[Analytics](API.md)**: `/api/analytics/*` - View engagement metrics - **[RSS Feeds](API.md)**: `/api/rss-feeds` - Manage RSS feeds + + +--- + +## Newsletter API Summary + +### Available Endpoints + +| Endpoint | Purpose | Recipient | +|----------|---------|-----------| +| `/api/admin/send-test-email` | Test newsletter | Single email (specified) | +| `/api/admin/send-newsletter` | Production send | All active subscribers | +| `/api/admin/trigger-crawl` | Fetch articles | N/A | +| `/api/admin/stats` | System stats | N/A | + +### Subscriber Status + +The system uses a `status` field to determine who receives newsletters: +- **`active`** - Receives newsletters βœ… +- **`inactive`** - Does not receive newsletters ❌ + +See [SUBSCRIBER_STATUS.md](SUBSCRIBER_STATUS.md) for details. + +### Quick Examples + +**Send to all subscribers:** +```bash +curl -X POST http://localhost:5001/api/admin/send-newsletter \ + -H "Content-Type: application/json" \ + -d '{"max_articles": 10}' +``` + +**Send test email:** +```bash +curl -X POST http://localhost:5001/api/admin/send-test-email \ + -H "Content-Type: application/json" \ + -d '{"email": "test@example.com"}' +``` + +**Check stats:** +```bash +curl http://localhost:5001/api/admin/stats | jq '.subscribers' +``` + +### Testing + +Use the test script: +```bash +./test-newsletter-api.sh +``` diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 48315e2..fba6156 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -134,3 +134,43 @@ Root: - [ ] API rate limiting - [ ] Caching layer (Redis) - [ ] Message queue for crawler (Celery) + + +--- + +## Recent Updates (November 2025) + +### Security Improvements +- **MongoDB Internal-Only**: Removed port exposure, only accessible via Docker network +- **Ollama Internal-Only**: Removed port exposure, only accessible via Docker network +- **Reduced Attack Surface**: Only Backend API (port 5001) exposed to host +- **Network Isolation**: All services communicate via internal Docker network + +### Ollama Integration +- **Docker Compose Integration**: Ollama service runs alongside other services +- **Automatic Model Download**: phi3:latest model downloaded on first startup +- **GPU Support**: NVIDIA GPU acceleration with automatic detection +- **Helper Scripts**: `start-with-gpu.sh`, `check-gpu.sh`, `configure-ollama.sh` +- **Performance**: 5-10x faster with GPU acceleration + +### API Enhancements +- **Send Newsletter Endpoint**: `/api/admin/send-newsletter` to send to all active subscribers +- **Subscriber Status Fix**: Fixed stats endpoint to correctly count active subscribers +- **Better Error Handling**: Improved error messages and validation + +### Documentation +- **Consolidated Documentation**: Moved all docs to `docs/` directory +- **Security Guide**: Comprehensive security documentation +- **GPU Setup Guide**: Detailed GPU acceleration setup +- **MongoDB Connection Guide**: Connection configuration explained +- **Subscriber Status Guide**: How subscriber status system works + +### Configuration +- **MongoDB URI**: Updated to use Docker service name (`mongodb` instead of `localhost`) +- **Ollama URL**: Configured for internal Docker network (`http://ollama:11434`) +- **Single .env File**: All configuration in `backend/.env` + +### Testing +- **Connectivity Tests**: `test-mongodb-connectivity.sh` +- **Ollama Tests**: `test-ollama-setup.sh` +- **Newsletter API Tests**: `test-newsletter-api.sh` diff --git a/docs/DATABASE_SCHEMA.md b/docs/DATABASE_SCHEMA.md index 9490425..890eab3 100644 --- a/docs/DATABASE_SCHEMA.md +++ b/docs/DATABASE_SCHEMA.md @@ -269,3 +269,68 @@ db.articles.find({ summary: { $exists: false } }) // Count summarized articles db.articles.countDocuments({ summary: { $exists: true, $ne: null } }) ``` + + +--- + +## MongoDB Connection Configuration + +### Docker Compose Setup + +**Connection URI:** +```env +MONGODB_URI=mongodb://admin:changeme@mongodb:27017/ +``` + +**Key Points:** +- Uses `mongodb` (Docker service name), not `localhost` +- Includes authentication credentials +- Only works inside Docker network +- Port 27017 is NOT exposed to host (internal only) + +### Why 'mongodb' Instead of 'localhost'? + +**Inside Docker containers:** +``` +Container β†’ mongodb:27017 βœ… Works (Docker DNS) +Container β†’ localhost:27017 ❌ Fails (localhost = container itself) +``` + +**From host machine:** +``` +Host β†’ localhost:27017 ❌ Blocked (port not exposed) +Host β†’ mongodb:27017 ❌ Fails (DNS only works in Docker) +``` + +### Connection Priority + +1. **Docker Compose environment variables** (highest) +2. **.env file** (fallback) +3. **Code defaults** (lowest) + +### Testing Connection + +```bash +# From backend +docker-compose exec backend python -c " +from database import articles_collection +print(f'Articles: {articles_collection.count_documents({})}') +" + +# From crawler +docker-compose exec crawler python -c " +from pymongo import MongoClient +from config import Config +client = MongoClient(Config.MONGODB_URI) +print(f'MongoDB version: {client.server_info()[\"version\"]}') +" +``` + +### Security + +- βœ… MongoDB is internal-only (not exposed to host) +- βœ… Uses authentication (username/password) +- βœ… Only accessible via Docker network +- βœ… Cannot be accessed from external network + +See [SECURITY_NOTES.md](SECURITY_NOTES.md) for more security details. diff --git a/docs/DOCUMENTATION_CLEANUP.md b/docs/DOCUMENTATION_CLEANUP.md new file mode 100644 index 0000000..707b0b8 --- /dev/null +++ b/docs/DOCUMENTATION_CLEANUP.md @@ -0,0 +1,204 @@ +# Documentation Cleanup Summary + +## What Was Done + +Consolidated and organized all markdown documentation files. + +## Before + +**Root Level:** 14 markdown files (cluttered) +``` +README.md +QUICKSTART.md +CONTRIBUTING.md +IMPLEMENTATION_SUMMARY.md +MONGODB_CONNECTION_EXPLAINED.md +NETWORK_SECURITY_SUMMARY.md +NEWSLETTER_API_UPDATE.md +OLLAMA_GPU_SUMMARY.md +OLLAMA_INTEGRATION.md +QUICK_START_GPU.md +SECURITY_IMPROVEMENTS.md +SECURITY_UPDATE.md +FINAL_STRUCTURE.md (outdated) +PROJECT_STRUCTURE.md (redundant) +``` + +**docs/:** 18 files (organized but some content duplicated) + +## After + +**Root Level:** 3 essential files (clean) +``` +README.md - Main entry point +QUICKSTART.md - Quick setup guide +CONTRIBUTING.md - Contribution guidelines +``` + +**docs/:** 19 files (organized, consolidated, no duplication) +``` +INDEX.md - Documentation index (NEW) +ADMIN_API.md - Admin API (consolidated) +API.md +ARCHITECTURE.md +BACKEND_STRUCTURE.md +CHANGELOG.md - Updated with recent changes +CRAWLER_HOW_IT_WORKS.md +DATABASE_SCHEMA.md - Added MongoDB connection info +DEPLOYMENT.md +EXTRACTION_STRATEGIES.md +GPU_SETUP.md - Consolidated GPU docs +OLLAMA_SETUP.md - Consolidated Ollama docs +OLD_ARCHITECTURE.md +PERFORMANCE_COMPARISON.md +QUICK_REFERENCE.md +RSS_URL_EXTRACTION.md +SECURITY_NOTES.md - Consolidated all security docs +SUBSCRIBER_STATUS.md +SYSTEM_ARCHITECTURE.md +``` + +## Changes Made + +### 1. Deleted Redundant Files +- ❌ `FINAL_STRUCTURE.md` (outdated) +- ❌ `PROJECT_STRUCTURE.md` (redundant with README) + +### 2. Merged into docs/SECURITY_NOTES.md +- βœ… `SECURITY_UPDATE.md` (Ollama security) +- βœ… `SECURITY_IMPROVEMENTS.md` (Network isolation) +- βœ… `NETWORK_SECURITY_SUMMARY.md` (Port exposure summary) + +### 3. Merged into docs/GPU_SETUP.md +- βœ… `OLLAMA_GPU_SUMMARY.md` (GPU implementation summary) +- βœ… `QUICK_START_GPU.md` (Quick start commands) + +### 4. Merged into docs/OLLAMA_SETUP.md +- βœ… `OLLAMA_INTEGRATION.md` (Integration details) + +### 5. Merged into docs/ADMIN_API.md +- βœ… `NEWSLETTER_API_UPDATE.md` (Newsletter endpoint) + +### 6. Merged into docs/DATABASE_SCHEMA.md +- βœ… `MONGODB_CONNECTION_EXPLAINED.md` (Connection config) + +### 7. Merged into docs/CHANGELOG.md +- βœ… `IMPLEMENTATION_SUMMARY.md` (Recent updates) + +### 8. Created New Files +- ✨ `docs/INDEX.md` - Complete documentation index + +### 9. Updated Existing Files +- πŸ“ `README.md` - Added documentation section +- πŸ“ `docs/CHANGELOG.md` - Added recent updates +- πŸ“ `docs/SECURITY_NOTES.md` - Comprehensive security guide +- πŸ“ `docs/GPU_SETUP.md` - Complete GPU guide +- πŸ“ `docs/OLLAMA_SETUP.md` - Complete Ollama guide +- πŸ“ `docs/ADMIN_API.md` - Complete API reference +- πŸ“ `docs/DATABASE_SCHEMA.md` - Added connection info + +## Benefits + +### 1. Cleaner Root Directory +- Only 3 essential files visible +- Easier to navigate +- Professional appearance + +### 2. Better Organization +- All technical docs in `docs/` +- Logical grouping by topic +- Easy to find information + +### 3. No Duplication +- Consolidated related content +- Single source of truth +- Easier to maintain + +### 4. Improved Discoverability +- Documentation index (`docs/INDEX.md`) +- Clear navigation +- Quick links by task + +### 5. Better Maintenance +- Fewer files to update +- Related content together +- Clear structure + +## Documentation Structure + +``` +project/ +β”œβ”€β”€ README.md # Main entry point +β”œβ”€β”€ QUICKSTART.md # Quick setup +β”œβ”€β”€ CONTRIBUTING.md # How to contribute +β”‚ +└── docs/ # All technical documentation + β”œβ”€β”€ INDEX.md # Documentation index + β”‚ + β”œβ”€β”€ Setup & Configuration + β”‚ β”œβ”€β”€ OLLAMA_SETUP.md + β”‚ β”œβ”€β”€ GPU_SETUP.md + β”‚ └── DEPLOYMENT.md + β”‚ + β”œβ”€β”€ API Documentation + β”‚ β”œβ”€β”€ ADMIN_API.md + β”‚ β”œβ”€β”€ API.md + β”‚ └── SUBSCRIBER_STATUS.md + β”‚ + β”œβ”€β”€ Architecture + β”‚ β”œβ”€β”€ SYSTEM_ARCHITECTURE.md + β”‚ β”œβ”€β”€ ARCHITECTURE.md + β”‚ β”œβ”€β”€ DATABASE_SCHEMA.md + β”‚ └── BACKEND_STRUCTURE.md + β”‚ + β”œβ”€β”€ Features + β”‚ β”œβ”€β”€ CRAWLER_HOW_IT_WORKS.md + β”‚ β”œβ”€β”€ EXTRACTION_STRATEGIES.md + β”‚ β”œβ”€β”€ RSS_URL_EXTRACTION.md + β”‚ └── PERFORMANCE_COMPARISON.md + β”‚ + β”œβ”€β”€ Security + β”‚ └── SECURITY_NOTES.md + β”‚ + └── Reference + β”œβ”€β”€ CHANGELOG.md + └── QUICK_REFERENCE.md +``` + +## Quick Access + +### For Users +- Start here: [README.md](README.md) +- Quick setup: [QUICKSTART.md](QUICKSTART.md) +- All docs: [docs/INDEX.md](docs/INDEX.md) + +### For Developers +- Architecture: [docs/SYSTEM_ARCHITECTURE.md](docs/SYSTEM_ARCHITECTURE.md) +- API Reference: [docs/ADMIN_API.md](docs/ADMIN_API.md) +- Contributing: [CONTRIBUTING.md](CONTRIBUTING.md) + +### For DevOps +- Deployment: [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) +- Security: [docs/SECURITY_NOTES.md](docs/SECURITY_NOTES.md) +- GPU Setup: [docs/GPU_SETUP.md](docs/GPU_SETUP.md) + +## Statistics + +- **Files Deleted:** 11 redundant markdown files +- **Files Merged:** 9 files consolidated into existing docs +- **Files Created:** 1 new index file +- **Files Updated:** 7 existing files enhanced +- **Root Level:** Reduced from 14 to 3 files (79% reduction) +- **Total Docs:** 19 well-organized files in docs/ + +## Result + +βœ… Clean, professional documentation structure +βœ… Easy to navigate and find information +βœ… No duplication or redundancy +βœ… Better maintainability +βœ… Improved user experience + +--- + +This cleanup makes the project more professional and easier to use! diff --git a/docs/GPU_SETUP.md b/docs/GPU_SETUP.md index 615c042..2192808 100644 --- a/docs/GPU_SETUP.md +++ b/docs/GPU_SETUP.md @@ -308,3 +308,113 @@ If you encounter issues: - Output of `nvidia-smi` - Output of `docker info | grep -i runtime` - Relevant logs + + +--- + +## Quick Start Guide + +### 30-Second Setup + +```bash +# 1. Check GPU +./check-gpu.sh + +# 2. Start services +./start-with-gpu.sh + +# 3. Test +docker-compose exec crawler python crawler_service.py 2 +``` + +### Command Reference + +**Setup:** +```bash +./check-gpu.sh # Check GPU availability +./configure-ollama.sh # Configure Ollama +./start-with-gpu.sh # Start with GPU auto-detection +``` + +**With GPU (manual):** +```bash +docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d +``` + +**Without GPU:** +```bash +docker-compose up -d +``` + +**Monitoring:** +```bash +docker exec munich-news-ollama nvidia-smi # Check GPU +watch -n 1 'docker exec munich-news-ollama nvidia-smi' # Monitor GPU +docker-compose logs -f ollama # Check logs +``` + +**Testing:** +```bash +docker-compose exec crawler python crawler_service.py 2 # Test crawl +docker-compose logs crawler | grep "Title translated" # Check timing +``` + +### Performance Expectations + +| Operation | CPU | GPU | Speedup | +|-----------|-----|-----|---------| +| Translation | 1.5s | 0.3s | 5x | +| Summary | 8s | 2s | 4x | +| 10 Articles | 115s | 31s | 3.7x | + +--- + +## Integration Summary + +### What Was Implemented + +1. **Ollama Service in Docker Compose** + - Runs on internal network (port 11434) + - Automatic model download (phi3:latest) + - Persistent storage in Docker volume + - GPU support with automatic detection + +2. **GPU Acceleration** + - NVIDIA GPU support via docker-compose.gpu.yml + - Automatic GPU detection script + - 5-10x performance improvement + - Graceful CPU fallback + +3. **Helper Scripts** + - `start-with-gpu.sh` - Auto-detect and start + - `check-gpu.sh` - Diagnose GPU availability + - `configure-ollama.sh` - Interactive configuration + - `test-ollama-setup.sh` - Comprehensive tests + +4. **Security** + - Ollama is internal-only (not exposed to host) + - Only accessible via Docker network + - Prevents unauthorized access + +### Files Created + +- `docker-compose.gpu.yml` - GPU configuration override +- `start-with-gpu.sh` - Auto-start script +- `check-gpu.sh` - GPU detection script +- `test-ollama-setup.sh` - Test suite +- `docs/GPU_SETUP.md` - This documentation +- `docs/OLLAMA_SETUP.md` - Ollama setup guide +- `docs/PERFORMANCE_COMPARISON.md` - Benchmarks + +### Quick Commands + +```bash +# Start with GPU +docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -d + +# Or use helper script +./start-with-gpu.sh + +# Verify GPU usage +docker exec munich-news-ollama nvidia-smi +``` diff --git a/docs/INDEX.md b/docs/INDEX.md new file mode 100644 index 0000000..1bf4436 --- /dev/null +++ b/docs/INDEX.md @@ -0,0 +1,116 @@ +# Documentation Index + +## Quick Start +- [README](../README.md) - Project overview and quick start +- [QUICKSTART](../QUICKSTART.md) - Detailed 5-minute setup guide + +## Setup & Configuration +- [OLLAMA_SETUP](OLLAMA_SETUP.md) - Ollama AI service setup +- [GPU_SETUP](GPU_SETUP.md) - GPU acceleration setup (5-10x faster) +- [DEPLOYMENT](DEPLOYMENT.md) - Production deployment guide + +## API Documentation +- [ADMIN_API](ADMIN_API.md) - Admin endpoints (crawl, send newsletter) +- [API](API.md) - Public API endpoints +- [SUBSCRIBER_STATUS](SUBSCRIBER_STATUS.md) - Subscriber status system + +## Architecture & Design +- [SYSTEM_ARCHITECTURE](SYSTEM_ARCHITECTURE.md) - Complete system architecture +- [ARCHITECTURE](ARCHITECTURE.md) - High-level architecture overview +- [DATABASE_SCHEMA](DATABASE_SCHEMA.md) - MongoDB schema and connection +- [BACKEND_STRUCTURE](BACKEND_STRUCTURE.md) - Backend code structure + +## Features & How-To +- [CRAWLER_HOW_IT_WORKS](CRAWLER_HOW_IT_WORKS.md) - News crawler explained +- [EXTRACTION_STRATEGIES](EXTRACTION_STRATEGIES.md) - Content extraction +- [RSS_URL_EXTRACTION](RSS_URL_EXTRACTION.md) - RSS feed handling +- [PERFORMANCE_COMPARISON](PERFORMANCE_COMPARISON.md) - CPU vs GPU benchmarks + +## Security +- [SECURITY_NOTES](SECURITY_NOTES.md) - Complete security guide + - Network isolation + - MongoDB security + - Ollama security + - Best practices + +## Reference +- [CHANGELOG](CHANGELOG.md) - Version history and recent updates +- [QUICK_REFERENCE](QUICK_REFERENCE.md) - Command cheat sheet + +## Contributing +- [CONTRIBUTING](../CONTRIBUTING.md) - How to contribute + +--- + +## Documentation Organization + +### Root Level (3 files) +Essential files that should be immediately visible: +- `README.md` - Main entry point +- `QUICKSTART.md` - Quick setup guide +- `CONTRIBUTING.md` - Contribution guidelines + +### docs/ Directory (18 files) +All technical documentation organized by category: +- **Setup**: Ollama, GPU, Deployment +- **API**: Admin API, Public API, Subscriber system +- **Architecture**: System design, database, backend structure +- **Features**: Crawler, extraction, RSS handling +- **Security**: Complete security documentation +- **Reference**: Changelog, quick reference + +--- + +## Quick Links by Task + +### I want to... + +**Set up the project:** +1. [README](../README.md) - Overview +2. [QUICKSTART](../QUICKSTART.md) - Step-by-step setup + +**Enable GPU acceleration:** +1. [GPU_SETUP](GPU_SETUP.md) - Complete GPU guide +2. Run: `./start-with-gpu.sh` + +**Send newsletters:** +1. [ADMIN_API](ADMIN_API.md) - API documentation +2. [SUBSCRIBER_STATUS](SUBSCRIBER_STATUS.md) - Subscriber system + +**Understand the architecture:** +1. [SYSTEM_ARCHITECTURE](SYSTEM_ARCHITECTURE.md) - Complete overview +2. [DATABASE_SCHEMA](DATABASE_SCHEMA.md) - Database design + +**Secure my deployment:** +1. [SECURITY_NOTES](SECURITY_NOTES.md) - Security guide +2. [DEPLOYMENT](DEPLOYMENT.md) - Production deployment + +**Troubleshoot issues:** +1. [QUICK_REFERENCE](QUICK_REFERENCE.md) - Common commands +2. [OLLAMA_SETUP](OLLAMA_SETUP.md) - Ollama troubleshooting +3. [GPU_SETUP](GPU_SETUP.md) - GPU troubleshooting + +--- + +## Documentation Standards + +### File Naming +- Use UPPERCASE for main docs (README, QUICKSTART) +- Use Title_Case for technical docs (GPU_Setup, API_Reference) +- Use descriptive names (not DOC1, DOC2) + +### Organization +- Root level: Only essential user-facing docs +- docs/: All technical documentation +- Keep related content together + +### Content +- Start with overview/summary +- Include code examples +- Add troubleshooting sections +- Link to related docs +- Keep up to date + +--- + +Last Updated: November 2025 diff --git a/docs/OLLAMA_SETUP.md b/docs/OLLAMA_SETUP.md index 805d8c5..38c962f 100644 --- a/docs/OLLAMA_SETUP.md +++ b/docs/OLLAMA_SETUP.md @@ -248,3 +248,49 @@ docker-compose logs crawler | grep "Title translated" | 10 Articles | 90s | 25s | 3.6x | **Tip:** GPU acceleration is most beneficial when processing many articles in batch. + + +--- + +## Integration Complete + +### What's Included + +βœ… Ollama service integrated into Docker Compose +βœ… Automatic model download (phi3:latest, 2.2GB) +βœ… GPU support with automatic detection +βœ… CPU fallback when GPU unavailable +βœ… Internal-only access (secure) +βœ… Persistent model storage + +### Quick Verification + +```bash +# Check Ollama is running +docker ps | grep ollama + +# Check model is downloaded +docker-compose exec ollama ollama list + +# Test from inside network +docker-compose exec crawler python -c " +from ollama_client import OllamaClient +from config import Config +client = OllamaClient(Config.OLLAMA_BASE_URL, Config.OLLAMA_MODEL, Config.OLLAMA_ENABLED) +print(client.translate_title('Guten Morgen')) +" +``` + +### Performance + +**CPU Mode:** +- Translation: ~1.5s per title +- Summarization: ~8s per article +- Suitable for <20 articles/day + +**GPU Mode:** +- Translation: ~0.3s per title (5x faster) +- Summarization: ~2s per article (4x faster) +- Suitable for high-volume processing + +See [GPU_SETUP.md](GPU_SETUP.md) for GPU acceleration setup. diff --git a/docs/SECURITY_NOTES.md b/docs/SECURITY_NOTES.md index 9dac916..ac77914 100644 --- a/docs/SECURITY_NOTES.md +++ b/docs/SECURITY_NOTES.md @@ -1,10 +1,21 @@ # Security Notes -## Ollama Service Security +## Network Security Architecture -### Internal-Only Access +### Internal-Only Services -The Ollama service is configured to be **internal-only** and is not exposed to the host machine or external network. This provides several security benefits: +The following services are configured to be **internal-only** and are not exposed to the host machine or external network: + +- **Ollama** - AI service (port 11434 internal only) +- **MongoDB** - Database (port 27017 internal only) +- **Crawler** - News crawler (no ports) +- **Sender** - Newsletter sender (no ports) + +Only the **Backend API** is exposed to the host on port 5001. + +This provides several security benefits: + +### Ollama Service Security **Configuration:** ```yaml @@ -95,14 +106,16 @@ ollama: ### Other Security Considerations **MongoDB:** -- Exposed on port 27017 for development +- βœ… **Internal-only** (not exposed to host) - Uses authentication (username/password) -- Consider restricting to localhost in production: `127.0.0.1:27017:27017` +- Only accessible via Docker network +- Cannot be accessed from host machine or external network **Backend API:** - Exposed on port 5001 for tracking and admin functions - Should be behind reverse proxy in production - Consider adding authentication for admin endpoints +- In production, bind to localhost only: `127.0.0.1:5001:5001` **Email Credentials:** - Stored in `.env` file @@ -118,18 +131,27 @@ ollama: external: true ``` -2. **Restrict Network Access**: +2. **Restrict Backend to Localhost** (if not using reverse proxy): ```yaml - ports: - - "127.0.0.1:27017:27017" # MongoDB - - "127.0.0.1:5001:5001" # Backend + backend: + ports: + - "127.0.0.1:5001:5001" # Only accessible from localhost ``` -3. **Use Reverse Proxy** (nginx, Traefik): +3. **Use Reverse Proxy** (nginx, Traefik) - Recommended: + ```yaml + backend: + # Remove ports section - only accessible via reverse proxy + expose: + - "5001" + ``` + + Benefits: - SSL/TLS termination - Rate limiting - Authentication - Access logs + - DDoS protection 4. **Regular Updates**: ```bash @@ -142,13 +164,22 @@ ollama: docker-compose logs -f ``` +6. **Network Isolation**: + - βœ… Already configured: MongoDB, Ollama, Crawler, Sender are internal-only + - Only Backend API is exposed + - All services communicate via internal Docker network + ### Security Checklist - [x] Ollama is internal-only (no exposed ports) +- [x] MongoDB is internal-only (no exposed ports) - [x] MongoDB uses authentication +- [x] Crawler is internal-only (no exposed ports) +- [x] Sender is internal-only (no exposed ports) +- [x] Only Backend API is exposed (port 5001) - [x] `.env` file is in `.gitignore` - [ ] Backend API has authentication (if needed) -- [ ] Using HTTPS in production +- [ ] Using HTTPS in production (reverse proxy) - [ ] Regular security updates - [ ] Monitoring and logging enabled - [ ] Backup strategy in place @@ -158,3 +189,99 @@ ollama: If you discover a security vulnerability, please email security@example.com (replace with your contact). Do not open public issues for security vulnerabilities. + + +--- + +## Network Isolation Summary + +### Current Port Exposure + +| Service | Port | Exposed to Host | Security Status | +|---------|------|-----------------|-----------------| +| Backend API | 5001 | βœ… Yes | Only exposed service | +| MongoDB | 27017 | ❌ No | Internal only | +| Ollama | 11434 | ❌ No | Internal only | +| Crawler | - | ❌ No | Internal only | +| Sender | - | ❌ No | Internal only | + +### Security Improvements Applied + +**Ollama Service:** +- Changed from exposed (port 11434) to internal-only +- Only accessible via Docker network +- Prevents unauthorized AI model usage + +**MongoDB Service:** +- Changed from exposed (port 27017) to internal-only +- Only accessible via Docker network +- Prevents unauthorized database access + +**Result:** +- 66% reduction in attack surface (3 services β†’ 1 service exposed) +- Better defense in depth +- Production-ready security configuration + +### Verification Commands + +```bash +# Check what's exposed +docker ps --format "table {{.Names}}\t{{.Ports}}" + +# Expected output: +# Backend: 0.0.0.0:5001->5001/tcp ← Only this exposed +# MongoDB: 27017/tcp ← Internal only +# Ollama: 11434/tcp ← Internal only + +# Test MongoDB not accessible from host +nc -z -w 2 localhost 27017 # Should fail + +# Test Ollama not accessible from host +nc -z -w 2 localhost 11434 # Should fail + +# Test Backend accessible from host +curl http://localhost:5001/health # Should work +``` + +--- + +## MongoDB Connection Security + +### Configuration + +**Inside Docker Network:** +```env +MONGODB_URI=mongodb://admin:changeme@mongodb:27017/ +``` +- Uses `mongodb` (Docker service name) +- Only works inside Docker network +- Cannot be accessed from host + +**Connection Flow:** +1. Service reads `MONGODB_URI` from environment +2. Docker DNS resolves `mongodb` to container IP +3. Connection established via internal network +4. No external exposure + +### Why This Is Secure + +- MongoDB port (27017) not exposed to host +- Only Docker Compose services can connect +- Uses authentication (username/password) +- Network isolation prevents external access + +--- + +## Testing Security Configuration + +Run the connectivity test: +```bash +./test-mongodb-connectivity.sh +``` + +Expected results: +- βœ… MongoDB NOT accessible from host +- βœ… Backend CAN connect to MongoDB +- βœ… Crawler CAN connect to MongoDB +- βœ… Sender CAN connect to MongoDB +- βœ… Backend API accessible from host diff --git a/test-mongodb-connectivity.sh b/test-mongodb-connectivity.sh new file mode 100755 index 0000000..d249145 --- /dev/null +++ b/test-mongodb-connectivity.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +echo "==========================================" +echo "MongoDB Connectivity Test" +echo "==========================================" +echo "" + +# Test 1: MongoDB not accessible from host +echo "Test 1: MongoDB port not exposed to host" +if nc -z -w 2 localhost 27017 2>&1 | grep -q "succeeded\|open"; then + echo "❌ FAIL: Port 27017 is accessible from host" +else + echo "βœ… PASS: Port 27017 is not accessible from host (internal only)" +fi +echo "" + +# Test 2: Backend can connect +echo "Test 2: Backend can connect to MongoDB" +if docker-compose exec -T backend python -c "from database import articles_collection; articles_collection.count_documents({})" &> /dev/null; then + echo "βœ… PASS: Backend can connect to MongoDB" +else + echo "❌ FAIL: Backend cannot connect to MongoDB" +fi +echo "" + +# Test 3: Crawler can connect +echo "Test 3: Crawler can connect to MongoDB" +if docker-compose exec -T crawler python -c "from pymongo import MongoClient; from config import Config; MongoClient(Config.MONGODB_URI).server_info()" &> /dev/null; then + echo "βœ… PASS: Crawler can connect to MongoDB" +else + echo "❌ FAIL: Crawler cannot connect to MongoDB" +fi +echo "" + +# Test 4: Sender can connect +echo "Test 4: Sender can connect to MongoDB" +if docker-compose exec -T sender python -c "from pymongo import MongoClient; import os; MongoClient(os.getenv('MONGODB_URI')).server_info()" &> /dev/null; then + echo "βœ… PASS: Sender can connect to MongoDB" +else + echo "❌ FAIL: Sender cannot connect to MongoDB" +fi +echo "" + +# Test 5: Backend API accessible +echo "Test 5: Backend API accessible from host" +if curl -s http://localhost:5001/health | grep -q "healthy"; then + echo "βœ… PASS: Backend API is accessible" +else + echo "❌ FAIL: Backend API is not accessible" +fi +echo "" + +echo "==========================================" +echo "Test Complete" +echo "=========================================="