commit ac5738c29df66f9f3fe4e184b6812e797c8539d5 Author: Dongho Kim Date: Mon Nov 10 19:13:33 2025 +0100 update diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bf92361 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,40 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Environment variables +.env +.env.local + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Git +.git/ +.gitignore + +# Documentation +*.md +!README.md + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e04f045 --- /dev/null +++ b/.gitignore @@ -0,0 +1,187 @@ +# =================================== +# Python +# =================================== +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# Virtual Environments +env/ +venv/ +ENV/ +.venv +.virtualenv +backend/env/ +backend/venv/ +news_crawler/env/ +news_crawler/venv/ +news_sender/env/ +news_sender/venv/ + +# Python Distribution / Packaging +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# =================================== +# Node.js +# =================================== +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.npm +.yarn-integrity +package-lock.json +yarn.lock +.pnp +.pnp.js + +# =================================== +# Environment Variables & Secrets +# =================================== +.env +.env.local +.env.development.local +.env.test.local +.env.production.local +*.env + +# =================================== +# Database +# =================================== +*.db +*.sqlite +*.sqlite3 +*.db-journal + +# MongoDB +data/ +mongodb_data/ + +# =================================== +# IDE & Editors +# =================================== +# VSCode +.vscode/ +.vscode-test/ +*.code-workspace + +# PyCharm / IntelliJ +.idea/ +*.iml +*.iws +*.ipr +out/ + +# Sublime Text +*.sublime-project +*.sublime-workspace + +# Vim +*.swp +*.swo +*~ +.vim/ + +# Emacs +*~ +\#*\# +.\#* + +# =================================== +# OS Files +# =================================== +# macOS +.DS_Store +.AppleDouble +.LSOverride +._* +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Windows +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db +*.stackdump +[Dd]esktop.ini +$RECYCLE.BIN/ +*.cab +*.msi +*.msix +*.msm +*.msp +*.lnk + +# Linux +.directory +.Trash-* + +# =================================== +# Project Specific +# =================================== +# Generated files +newsletter_preview.html +*.log + +# Temporary files +*.tmp +*.temp +*.bak +*.backup + +# Docker volumes +mongodb_data/ +ollama_data/ + +# Spec artifacts (optional - uncomment if you don't want to track specs) +# .kiro/specs/ + +# Test outputs +test-results/ +coverage/ + diff --git a/.kiro/specs/ai-article-summarization/design.md b/.kiro/specs/ai-article-summarization/design.md new file mode 100644 index 0000000..236c248 --- /dev/null +++ b/.kiro/specs/ai-article-summarization/design.md @@ -0,0 +1,487 @@ +# Design Document - AI Article Summarization + +## Overview + +This design integrates Ollama AI into the news crawler workflow to automatically generate concise summaries of articles. The system will extract full article content, send it to Ollama for summarization, and store both the original content and the AI-generated summary in MongoDB. + +## Architecture + +### High-Level Flow + +``` +RSS Feed → Extract Content → Summarize with Ollama → Store in MongoDB + ↓ ↓ ↓ + Full Article Text AI Summary (≤150 words) Both Stored +``` + +### Component Diagram + +``` +┌─────────────────────────────────────────────────────────────┐ +│ News Crawler Service │ +│ │ +│ ┌────────────────┐ ┌──────────────────┐ │ +│ │ RSS Parser │──────→│ Content Extractor│ │ +│ └────────────────┘ └──────────────────┘ │ +│ │ │ +│ ↓ │ +│ ┌──────────────────┐ │ +│ │ Ollama Client │ │ +│ │ (New Component) │ │ +│ └──────────────────┘ │ +│ │ │ +│ ↓ │ +│ ┌──────────────────┐ │ +│ │ Database Writer │ │ +│ └──────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ↓ + ┌──────────────────┐ + │ Ollama Server │ + │ (External) │ + └──────────────────┘ + │ + ↓ + ┌──────────────────┐ + │ MongoDB │ + └──────────────────┘ +``` + +## Components and Interfaces + +### 1. Ollama Client Module + +**File:** `news_crawler/ollama_client.py` + +**Purpose:** Handle communication with Ollama server for summarization + +**Interface:** +```python +class OllamaClient: + def __init__(self, base_url, model, api_key=None, enabled=True): + """Initialize Ollama client with configuration""" + + def summarize_article(self, content: str, max_words: int = 150) -> dict: + """ + Summarize article content using Ollama + + Args: + content: Full article text + max_words: Maximum words in summary (default 150) + + Returns: + { + 'summary': str, # AI-generated summary + 'word_count': int, # Summary word count + 'success': bool, # Whether summarization succeeded + 'error': str or None, # Error message if failed + 'duration': float # Time taken in seconds + } + """ + + def is_available(self) -> bool: + """Check if Ollama server is reachable""" + + def test_connection(self) -> dict: + """Test connection and return server info""" +``` + +**Key Methods:** + +1. **summarize_article()** + - Constructs prompt for Ollama + - Sends HTTP POST request + - Handles timeouts and errors + - Validates response + - Returns structured result + +2. **is_available()** + - Quick health check + - Returns True/False + - Used before attempting summarization + +3. **test_connection()** + - Detailed connection test + - Returns server info and model list + - Used for diagnostics + +### 2. Enhanced Crawler Service + +**File:** `news_crawler/crawler_service.py` + +**Changes:** + +```python +# Add Ollama client initialization +from ollama_client import OllamaClient + +# Initialize at module level +ollama_client = OllamaClient( + base_url=os.getenv('OLLAMA_BASE_URL'), + model=os.getenv('OLLAMA_MODEL'), + api_key=os.getenv('OLLAMA_API_KEY'), + enabled=os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true' +) + +# Modify crawl_rss_feed() to include summarization +def crawl_rss_feed(feed_url, feed_name, max_articles=10): + # ... existing code ... + + # After extracting content + article_data = extract_article_content(article_url) + + # NEW: Summarize with Ollama + summary_result = None + if ollama_client.enabled and article_data.get('content'): + print(f" 🤖 Summarizing with AI...") + summary_result = ollama_client.summarize_article( + article_data['content'], + max_words=150 + ) + + if summary_result['success']: + print(f" ✓ Summary generated ({summary_result['word_count']} words)") + else: + print(f" ⚠ Summarization failed: {summary_result['error']}") + + # Build article document with summary + article_doc = { + 'title': article_data.get('title'), + 'author': article_data.get('author'), + 'link': article_url, + 'content': article_data.get('content'), + 'summary': summary_result['summary'] if summary_result and summary_result['success'] else None, + 'word_count': article_data.get('word_count'), + 'summary_word_count': summary_result['word_count'] if summary_result and summary_result['success'] else None, + 'source': feed_name, + 'published_at': extract_published_date(entry), + 'crawled_at': article_data.get('crawled_at'), + 'summarized_at': datetime.utcnow() if summary_result and summary_result['success'] else None, + 'created_at': datetime.utcnow() + } +``` + +### 3. Configuration Module + +**File:** `news_crawler/config.py` (new file) + +**Purpose:** Centralize configuration management + +```python +import os +from dotenv import load_dotenv + +load_dotenv(dotenv_path='../.env') + +class Config: + # MongoDB + MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/') + DB_NAME = 'munich_news' + + # Ollama + OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434') + OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'phi3:latest') + OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', '') + OLLAMA_ENABLED = os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true' + OLLAMA_TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '30')) + + # Crawler + RATE_LIMIT_DELAY = 1 # seconds between requests + MAX_CONTENT_LENGTH = 50000 # characters +``` + +## Data Models + +### Updated Article Schema + +```javascript +{ + _id: ObjectId, + title: String, + author: String, + link: String, // Unique index + content: String, // Full article content + summary: String, // AI-generated summary (≤150 words) + word_count: Number, // Original content word count + summary_word_count: Number, // Summary word count + source: String, + published_at: String, + crawled_at: DateTime, + summarized_at: DateTime, // When AI summary was generated + created_at: DateTime +} +``` + +### Ollama Request Format + +```json +{ + "model": "phi3:latest", + "prompt": "Summarize the following article in 150 words or less. Focus on the key points and main message:\n\n[ARTICLE CONTENT]", + "stream": false, + "options": { + "temperature": 0.7, + "max_tokens": 200 + } +} +``` + +### Ollama Response Format + +```json +{ + "model": "phi3:latest", + "created_at": "2024-11-10T16:30:00Z", + "response": "The AI-generated summary text here...", + "done": true, + "total_duration": 5000000000 +} +``` + +## Error Handling + +### Error Scenarios and Responses + +| Scenario | Handling | User Impact | +|----------|----------|-------------| +| Ollama server down | Log warning, store original content | Article saved without summary | +| Ollama timeout (>30s) | Cancel request, store original | Article saved without summary | +| Empty summary returned | Log error, store original | Article saved without summary | +| Invalid response format | Log error, store original | Article saved without summary | +| Network error | Retry once, then store original | Article saved without summary | +| Model not found | Log error, disable Ollama | All articles saved without summaries | + +### Error Logging Format + +```python +{ + 'timestamp': datetime.utcnow(), + 'article_url': article_url, + 'error_type': 'timeout|connection|invalid_response|empty_summary', + 'error_message': str(error), + 'ollama_config': { + 'base_url': OLLAMA_BASE_URL, + 'model': OLLAMA_MODEL, + 'enabled': OLLAMA_ENABLED + } +} +``` + +## Testing Strategy + +### Unit Tests + +1. **test_ollama_client.py** + - Test summarization with mock responses + - Test timeout handling + - Test error scenarios + - Test connection checking + +2. **test_crawler_with_ollama.py** + - Test crawler with Ollama enabled + - Test crawler with Ollama disabled + - Test fallback when Ollama fails + - Test rate limiting + +### Integration Tests + +1. **test_end_to_end.py** + - Crawl real RSS feed + - Summarize with real Ollama + - Verify database storage + - Check all fields populated + +### Manual Testing + +1. Test with Ollama enabled and working +2. Test with Ollama disabled +3. Test with Ollama unreachable +4. Test with slow Ollama responses +5. Test with various article lengths + +## Performance Considerations + +### Timing Estimates + +- Article extraction: 2-5 seconds +- Ollama summarization: 5-15 seconds (depends on article length and model) +- Database write: <1 second +- **Total per article: 8-21 seconds** + +### Optimization Strategies + +1. **Sequential Processing** + - Process one article at a time + - Prevents overwhelming Ollama + - Easier to debug + +2. **Timeout Management** + - 30-second timeout per request + - Prevents hanging on slow responses + +3. **Rate Limiting** + - 1-second delay between articles + - Respects server resources + +4. **Future: Batch Processing** + - Queue articles for summarization + - Process in batches + - Use Celery for async processing + +### Resource Usage + +- **Memory**: ~100MB per crawler instance +- **Network**: ~1-5KB per article (to Ollama) +- **Storage**: +150 words per article (~1KB) +- **CPU**: Minimal (Ollama does the heavy lifting) + +## Security Considerations + +1. **API Key Storage** + - Store in environment variables + - Never commit to git + - Use secrets management in production + +2. **Content Sanitization** + - Don't log full article content + - Sanitize URLs in logs + - Limit error message detail + +3. **Network Security** + - Support HTTPS for Ollama + - Validate SSL certificates + - Use secure connections + +4. **Rate Limiting** + - Prevent abuse of Ollama server + - Implement backoff on errors + - Monitor usage patterns + +## Deployment Considerations + +### Environment Variables + +```bash +# Required +OLLAMA_BASE_URL=http://localhost:11434 +OLLAMA_MODEL=phi3:latest +OLLAMA_ENABLED=true + +# Optional +OLLAMA_API_KEY=your-api-key +OLLAMA_TIMEOUT=30 +``` + +### Docker Deployment + +```yaml +# docker-compose.yml +services: + crawler: + build: ./news_crawler + environment: + - OLLAMA_BASE_URL=http://ollama:11434 + - OLLAMA_ENABLED=true + depends_on: + - ollama + - mongodb + + ollama: + image: ollama/ollama:latest + ports: + - "11434:11434" + volumes: + - ollama_data:/root/.ollama +``` + +### Monitoring + +1. **Metrics to Track** + - Summarization success rate + - Average summarization time + - Ollama server uptime + - Error frequency by type + +2. **Logging** + - Log all summarization attempts + - Log errors with context + - Log performance metrics + +3. **Alerts** + - Alert if Ollama is down >5 minutes + - Alert if success rate <80% + - Alert if average time >20 seconds + +## Migration Plan + +### Phase 1: Add Ollama Client (Week 1) +- Create ollama_client.py +- Add configuration +- Write unit tests +- Test with sample articles + +### Phase 2: Integrate with Crawler (Week 1) +- Modify crawler_service.py +- Add summarization step +- Update database schema +- Test end-to-end + +### Phase 3: Update Backend API (Week 2) +- Update news routes +- Add summary fields to responses +- Update frontend to display summaries +- Deploy to production + +### Phase 4: Monitor and Optimize (Ongoing) +- Monitor performance +- Tune prompts for better summaries +- Optimize rate limiting +- Add batch processing if needed + +## Rollback Plan + +If issues arise: + +1. **Immediate**: Set `OLLAMA_ENABLED=false` +2. **Short-term**: Revert crawler code changes +3. **Long-term**: Remove Ollama integration + +System will continue to work with original content if Ollama is disabled. + +## Success Metrics + +- ✅ 95%+ of articles successfully summarized +- ✅ Average summarization time <15 seconds +- ✅ Zero data loss (all articles stored even if summarization fails) +- ✅ Ollama uptime >99% +- ✅ Summary quality: readable and accurate (manual review) + +## Future Enhancements + +1. **Multi-language Support** + - Detect article language + - Use appropriate model + - Translate summaries + +2. **Custom Summary Lengths** + - Allow configuration per feed + - Support different lengths for different use cases + +3. **Sentiment Analysis** + - Add sentiment score + - Categorize as positive/negative/neutral + +4. **Keyword Extraction** + - Extract key topics + - Enable better search + +5. **Batch Processing** + - Queue articles + - Process in parallel + - Use Celery for async + +6. **Caching** + - Cache summaries + - Avoid re-processing + - Use Redis for cache diff --git a/.kiro/specs/ai-article-summarization/requirements.md b/.kiro/specs/ai-article-summarization/requirements.md new file mode 100644 index 0000000..50cbfc6 --- /dev/null +++ b/.kiro/specs/ai-article-summarization/requirements.md @@ -0,0 +1,164 @@ +# Requirements Document + +## Introduction + +This feature integrates Ollama AI into the news crawler to automatically summarize articles before storing them in the database. Instead of storing full article content, the system will generate concise 150-word summaries using AI, making the content more digestible for newsletter readers and reducing storage requirements. + +## Glossary + +- **Crawler Service**: The standalone microservice that fetches and processes article content from RSS feeds +- **Ollama Server**: The AI inference server that provides text summarization capabilities +- **Article Content**: The full text extracted from a news article webpage +- **Summary**: A concise AI-generated version of the article content (max 150 words) +- **MongoDB**: The database where articles and summaries are stored + +## Requirements + +### Requirement 1: Ollama Integration in Crawler + +**User Story:** As a system administrator, I want the crawler to use Ollama for summarization, so that articles are automatically condensed before storage. + +#### Acceptance Criteria + +1. WHEN the crawler extracts article content, THE Crawler Service SHALL send the content to the Ollama Server for summarization +2. WHEN sending content to Ollama, THE Crawler Service SHALL include a prompt requesting a summary of 150 words or less +3. WHEN Ollama returns a summary, THE Crawler Service SHALL validate that the summary is not empty +4. IF the Ollama Server is unavailable, THEN THE Crawler Service SHALL store the original content without summarization and log a warning +5. WHEN summarization fails, THE Crawler Service SHALL continue processing other articles without stopping + +### Requirement 2: Configuration Management + +**User Story:** As a system administrator, I want to configure Ollama settings, so that I can control the summarization behavior. + +#### Acceptance Criteria + +1. THE Crawler Service SHALL read Ollama configuration from environment variables +2. THE Crawler Service SHALL support the following configuration options: + - OLLAMA_BASE_URL (server URL) + - OLLAMA_MODEL (model name) + - OLLAMA_ENABLED (enable/disable flag) + - OLLAMA_API_KEY (optional authentication) +3. WHERE OLLAMA_ENABLED is false, THE Crawler Service SHALL store original content without summarization +4. WHERE OLLAMA_ENABLED is true AND Ollama is unreachable, THE Crawler Service SHALL log an error and store original content + +### Requirement 3: Summary Storage + +**User Story:** As a developer, I want summaries stored in the database, so that the frontend can display concise article previews. + +#### Acceptance Criteria + +1. WHEN a summary is generated, THE Crawler Service SHALL store it in the `summary` field in MongoDB +2. WHEN storing an article, THE Crawler Service SHALL include both the original content and the AI summary +3. THE Crawler Service SHALL store the following fields: + - `content` (original full text) + - `summary` (AI-generated, max 150 words) + - `word_count` (original content word count) + - `summary_word_count` (summary word count) + - `summarized_at` (timestamp when summarized) +4. WHEN an article already has a summary, THE Crawler Service SHALL not re-summarize it + +### Requirement 4: Error Handling and Resilience + +**User Story:** As a system administrator, I want the crawler to handle AI failures gracefully, so that the system remains reliable. + +#### Acceptance Criteria + +1. IF Ollama returns an error, THEN THE Crawler Service SHALL log the error and store the original content +2. IF Ollama times out (>30 seconds), THEN THE Crawler Service SHALL cancel the request and store the original content +3. IF the summary is empty or invalid, THEN THE Crawler Service SHALL store the original content +4. WHEN an error occurs, THE Crawler Service SHALL include an error indicator in the database record +5. THE Crawler Service SHALL continue processing remaining articles after any summarization failure + +### Requirement 5: Performance and Rate Limiting + +**User Story:** As a system administrator, I want the crawler to respect rate limits, so that it doesn't overwhelm the Ollama server. + +#### Acceptance Criteria + +1. THE Crawler Service SHALL wait at least 1 second between Ollama API calls +2. THE Crawler Service SHALL set a timeout of 30 seconds for each Ollama request +3. WHEN processing multiple articles, THE Crawler Service SHALL process them sequentially to avoid overloading Ollama +4. THE Crawler Service SHALL log the time taken for each summarization +5. THE Crawler Service SHALL display progress indicators showing summarization status + +### Requirement 6: Monitoring and Logging + +**User Story:** As a system administrator, I want detailed logs of summarization activity, so that I can monitor and troubleshoot the system. + +#### Acceptance Criteria + +1. THE Crawler Service SHALL log when summarization starts for each article +2. THE Crawler Service SHALL log the original word count and summary word count +3. THE Crawler Service SHALL log any errors or warnings from Ollama +4. THE Crawler Service SHALL display a summary of total articles summarized at the end +5. THE Crawler Service SHALL include summarization statistics in the final report + +### Requirement 7: API Endpoint Updates + +**User Story:** As a frontend developer, I want API endpoints to return summaries, so that I can display them to users. + +#### Acceptance Criteria + +1. WHEN fetching articles via GET /api/news, THE Backend API SHALL include the `summary` field if available +2. WHEN fetching a single article via GET /api/news/, THE Backend API SHALL include both `content` and `summary` +3. THE Backend API SHALL include a `has_summary` boolean field indicating if AI summarization was performed +4. THE Backend API SHALL include `summarized_at` timestamp if available +5. WHERE no summary exists, THE Backend API SHALL return a preview of the original content (first 200 chars) + +### Requirement 8: Backward Compatibility + +**User Story:** As a developer, I want the system to work with existing articles, so that no data migration is required. + +#### Acceptance Criteria + +1. THE Crawler Service SHALL work with articles that don't have summaries +2. THE Backend API SHALL handle articles with or without summaries gracefully +3. WHERE an article has no summary, THE Backend API SHALL generate a preview from the content field +4. THE Crawler Service SHALL not re-process articles that already have summaries +5. THE system SHALL continue to function if Ollama is disabled or unavailable + +## Non-Functional Requirements + +### Performance +- Summarization SHALL complete within 30 seconds per article +- The crawler SHALL process at least 10 articles per minute (including summarization) +- Database operations SHALL not be significantly slower with summary storage + +### Reliability +- The system SHALL maintain 99% uptime even if Ollama is unavailable +- Failed summarizations SHALL not prevent article storage +- The crawler SHALL recover from Ollama errors without manual intervention + +### Security +- Ollama API keys SHALL be stored in environment variables, not in code +- Article content SHALL not be logged to prevent sensitive data exposure +- API communication with Ollama SHALL support HTTPS + +### Scalability +- The system SHALL support multiple Ollama servers for load balancing (future) +- The crawler SHALL handle articles of any length (up to 50,000 words) +- The database schema SHALL support future enhancements (tags, categories, etc.) + +## Dependencies + +- Ollama server must be running and accessible +- `requests` Python library for HTTP communication +- Environment variables properly configured +- MongoDB with sufficient storage for both content and summaries + +## Assumptions + +- Ollama server is already set up and configured +- The phi3:latest model (or configured model) supports summarization tasks +- Network connectivity between crawler and Ollama server is reliable +- Articles are in English or the configured Ollama model supports the article language + +## Future Enhancements + +- Support for multiple languages +- Customizable summary length +- Sentiment analysis integration +- Keyword extraction +- Category classification +- Batch summarization for improved performance +- Caching of summaries to avoid re-processing diff --git a/.kiro/specs/ai-article-summarization/tasks.md b/.kiro/specs/ai-article-summarization/tasks.md new file mode 100644 index 0000000..7d18351 --- /dev/null +++ b/.kiro/specs/ai-article-summarization/tasks.md @@ -0,0 +1,92 @@ +# Implementation Plan + +- [x] 1. Create Ollama client module + - Create `news_crawler/ollama_client.py` with OllamaClient class + - Implement `summarize_article()` method with prompt construction and API call + - Implement `is_available()` method for health checks + - Implement `test_connection()` method for diagnostics + - Add timeout handling (30 seconds) + - Add error handling for connection, timeout, and invalid responses + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 4.1, 4.2, 4.3, 5.2_ + +- [x] 2. Create configuration module for crawler + - Create `news_crawler/config.py` with Config class + - Load environment variables (OLLAMA_BASE_URL, OLLAMA_MODEL, OLLAMA_ENABLED, OLLAMA_API_KEY, OLLAMA_TIMEOUT) + - Add validation for required configuration + - Add default values for optional configuration + - _Requirements: 2.1, 2.2, 2.3, 2.4_ + +- [x] 3. Integrate Ollama client into crawler service + - Import OllamaClient in `news_crawler/crawler_service.py` + - Initialize Ollama client at module level using Config + - Modify `crawl_rss_feed()` to call summarization after content extraction + - Add conditional logic to skip summarization if OLLAMA_ENABLED is false + - Add error handling to continue processing if summarization fails + - Add logging for summarization start, success, and failure + - Add rate limiting delay after summarization + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 2.3, 2.4, 4.1, 4.5, 5.1, 5.3, 6.1, 6.2, 6.3_ + +- [x] 4. Update database schema and storage + - Modify article document structure in `crawl_rss_feed()` to include: + - `summary` field (AI-generated summary) + - `summary_word_count` field + - `summarized_at` field (timestamp) + - Update MongoDB upsert logic to handle new fields + - Add check to skip re-summarization if article already has summary + - _Requirements: 3.1, 3.2, 3.3, 3.4, 8.4_ + +- [x] 5. Update backend API to return summaries + - Modify `backend/routes/news_routes.py` GET /api/news endpoint + - Add `summary`, `summary_word_count`, `summarized_at` fields to response + - Add `has_summary` boolean field to indicate if AI summarization was performed + - Modify GET /api/news/ endpoint to include summary fields + - Add fallback to content preview if no summary exists + - _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 8.1, 8.2, 8.3_ + +- [x] 6. Update database schema documentation + - Update `backend/DATABASE_SCHEMA.md` with new summary fields + - Add example document showing summary fields + - Document the summarization workflow + - _Requirements: 3.1, 3.2, 3.3_ + +- [x] 7. Add environment variable configuration + - Update `backend/env.template` with Ollama configuration + - Add comments explaining each Ollama setting + - Document default values + - _Requirements: 2.1, 2.2_ + +- [x] 8. Create test script for Ollama integration + - Create `news_crawler/test_ollama.py` to test Ollama connection + - Test summarization with sample article + - Test error handling (timeout, connection failure) + - Display configuration and connection status + - _Requirements: 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 4.1, 4.2_ + +- [x] 9. Update crawler statistics and logging + - Add summarization statistics to final report in `crawl_all_feeds()` + - Track total articles summarized vs failed + - Log average summarization time + - Display progress indicators during summarization + - _Requirements: 5.4, 6.1, 6.2, 6.3, 6.4, 6.5_ + +- [x] 10. Create documentation for AI summarization + - Create `news_crawler/AI_SUMMARIZATION.md` explaining the feature + - Document configuration options + - Provide troubleshooting guide + - Add examples of usage + - _Requirements: 2.1, 2.2, 2.3, 2.4, 6.1, 6.2, 6.3_ + +- [x] 11. Update main README with AI summarization info + - Add section about AI summarization feature + - Document Ollama setup requirements + - Add configuration examples + - Update API endpoint documentation + - _Requirements: 2.1, 2.2, 7.1, 7.2_ + +- [x] 12. Test end-to-end workflow + - Run crawler with Ollama enabled + - Verify articles are summarized correctly + - Check database contains all expected fields + - Test API endpoints return summaries + - Verify error handling when Ollama is disabled/unavailable + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.1, 3.2, 3.3, 3.4, 4.1, 4.2, 4.3, 4.4, 4.5, 7.1, 7.2, 7.3, 7.4, 7.5, 8.1, 8.2, 8.3, 8.4, 8.5_ diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..cca7583 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,209 @@ +# Munich News Daily - Architecture + +## System Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Users / Browsers │ +└────────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Frontend (Port 3000) │ +│ Node.js + Express + Vanilla JS │ +│ - Subscription form │ +│ - News display │ +│ - RSS feed management UI (future) │ +└────────────────────────┬────────────────────────────────────┘ + │ HTTP/REST + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Backend API (Port 5001) │ +│ Flask + Python │ +│ │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ Routes (Blueprints) │ │ +│ │ - subscription_routes.py (subscribe/unsubscribe) │ │ +│ │ - news_routes.py (get news, stats) │ │ +│ │ - rss_routes.py (manage RSS feeds) │ │ +│ │ - ollama_routes.py (AI features) │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ Services (Business Logic) │ │ +│ │ - news_service.py (fetch & save articles) │ │ +│ │ - email_service.py (send newsletters) │ │ +│ │ - ollama_service.py (AI integration) │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ Core │ │ +│ │ - config.py (configuration) │ │ +│ │ - database.py (DB connection) │ │ +│ └──────────────────────────────────────────────────────┘ │ +└────────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ MongoDB (Port 27017) │ +│ │ +│ Collections: │ +│ - articles (news articles with full content) │ +│ - subscribers (email subscribers) │ +│ - rss_feeds (RSS feed sources) │ +└─────────────────────────┬───────────────────────────────────┘ + │ + │ Read/Write + │ +┌─────────────────────────┴───────────────────────────────────┐ +│ News Crawler Microservice │ +│ (Standalone) │ +│ │ +│ - Fetches RSS feeds from MongoDB │ +│ - Crawls full article content │ +│ - Extracts text, metadata, word count │ +│ - Stores back to MongoDB │ +│ - Can run independently or scheduled │ +└──────────────────────────────────────────────────────────────┘ + + │ + │ (Optional) + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Ollama AI Server (Port 11434) │ +│ (Optional, External) │ +│ │ +│ - Article summarization │ +│ - Content analysis │ +│ - AI-powered features │ +└──────────────────────────────────────────────────────────────┘ +``` + +## Component Details + +### Frontend (Port 3000) +- **Technology**: Node.js, Express, Vanilla JavaScript +- **Responsibilities**: + - User interface + - Subscription management + - News display + - API proxy to backend +- **Communication**: HTTP REST to Backend + +### Backend API (Port 5001) +- **Technology**: Python, Flask +- **Architecture**: Modular with Blueprints +- **Responsibilities**: + - REST API endpoints + - Business logic + - Database operations + - Email sending + - AI integration +- **Communication**: + - HTTP REST from Frontend + - MongoDB driver to Database + - HTTP to Ollama (optional) + +### MongoDB (Port 27017) +- **Technology**: MongoDB 7.0 +- **Responsibilities**: + - Persistent data storage + - Articles, subscribers, RSS feeds +- **Communication**: MongoDB protocol + +### News Crawler (Standalone) +- **Technology**: Python, BeautifulSoup +- **Architecture**: Microservice (can run independently) +- **Responsibilities**: + - Fetch RSS feeds + - Crawl article content + - Extract and clean text + - Store in database +- **Communication**: MongoDB driver to Database +- **Execution**: + - Manual: `python crawler_service.py` + - Scheduled: Cron, systemd, Docker + - On-demand: Via backend API (future) + +### Ollama AI Server (Optional, External) +- **Technology**: Ollama +- **Responsibilities**: + - AI model inference + - Text summarization + - Content analysis +- **Communication**: HTTP REST API + +## Data Flow + +### 1. News Aggregation Flow +``` +RSS Feeds → Backend (news_service) → MongoDB (articles) +``` + +### 2. Content Crawling Flow +``` +MongoDB (rss_feeds) → Crawler → Article URLs → +Web Scraping → MongoDB (articles with full_content) +``` + +### 3. Subscription Flow +``` +User → Frontend → Backend (subscription_routes) → +MongoDB (subscribers) +``` + +### 4. Newsletter Flow (Future) +``` +Scheduler → Backend (email_service) → +MongoDB (articles + subscribers) → SMTP → Users +``` + +### 5. AI Processing Flow (Optional) +``` +MongoDB (articles) → Backend (ollama_service) → +Ollama Server → AI Summary → MongoDB (articles) +``` + +## Deployment Options + +### Development +- All services run locally +- MongoDB via Docker Compose +- Manual crawler execution + +### Production +- Backend: Cloud VM, Container, or PaaS +- Frontend: Static hosting or same server +- MongoDB: MongoDB Atlas or self-hosted +- Crawler: Scheduled job (cron, systemd timer) +- Ollama: Separate GPU server (optional) + +## Scalability Considerations + +### Current Architecture +- Monolithic backend (single Flask instance) +- Standalone crawler (can run multiple instances) +- Shared MongoDB + +### Future Improvements +- Load balancer for backend +- Message queue for crawler jobs (Celery + Redis) +- Caching layer (Redis) +- CDN for frontend +- Read replicas for MongoDB + +## Security + +- CORS enabled for frontend-backend communication +- MongoDB authentication (production) +- Environment variables for secrets +- Input validation on all endpoints +- Rate limiting (future) + +## Monitoring (Future) + +- Application logs +- MongoDB metrics +- Crawler success/failure tracking +- API response times +- Error tracking (Sentry) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..48315e2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,136 @@ +# Changelog + +## [Unreleased] - 2024-11-10 + +### Added - Major Refactoring + +#### Backend Modularization +- ✅ Restructured backend into modular architecture +- ✅ Created separate route blueprints: + - `subscription_routes.py` - User subscriptions + - `news_routes.py` - News fetching and stats + - `rss_routes.py` - RSS feed management (CRUD) + - `ollama_routes.py` - AI integration +- ✅ Created service layer: + - `news_service.py` - News fetching logic + - `email_service.py` - Newsletter sending + - `ollama_service.py` - AI communication +- ✅ Centralized configuration in `config.py` +- ✅ Separated database logic in `database.py` +- ✅ Reduced main `app.py` from 700+ lines to 27 lines + +#### RSS Feed Management +- ✅ Dynamic RSS feed management via API +- ✅ Add/remove/list/toggle RSS feeds without code changes +- ✅ Unique index on RSS feed URLs (prevents duplicates) +- ✅ Default feeds auto-initialized on first run +- ✅ Created `fix_duplicates.py` utility script + +#### News Crawler Microservice +- ✅ Created standalone `news_crawler/` microservice +- ✅ Web scraping with BeautifulSoup +- ✅ Smart content extraction using multiple selectors +- ✅ Full article content storage in MongoDB +- ✅ Word count calculation +- ✅ Duplicate prevention (skips already-crawled articles) +- ✅ Rate limiting (1 second between requests) +- ✅ Can run independently or scheduled +- ✅ Docker support for crawler +- ✅ Comprehensive documentation + +#### API Endpoints +New endpoints added: +- `GET /api/rss-feeds` - List all RSS feeds +- `POST /api/rss-feeds` - Add new RSS feed +- `DELETE /api/rss-feeds/` - Remove RSS feed +- `PATCH /api/rss-feeds//toggle` - Toggle feed active status + +#### Documentation +- ✅ Created `ARCHITECTURE.md` - System architecture overview +- ✅ Created `backend/STRUCTURE.md` - Backend structure guide +- ✅ Created `news_crawler/README.md` - Crawler documentation +- ✅ Created `news_crawler/QUICKSTART.md` - Quick start guide +- ✅ Created `news_crawler/test_crawler.py` - Test suite +- ✅ Updated main `README.md` with new features +- ✅ Updated `DATABASE_SCHEMA.md` with new fields + +#### Configuration +- ✅ Added `FLASK_PORT` environment variable +- ✅ Fixed `OLLAMA_MODEL` typo in `.env` +- ✅ Port 5001 default to avoid macOS AirPlay conflict + +### Changed +- Backend structure: Monolithic → Modular +- RSS feeds: Hardcoded → Database-driven +- Article storage: Summary only → Full content support +- Configuration: Scattered → Centralized + +### Technical Improvements +- Separation of concerns (routes vs services) +- Better testability +- Easier maintenance +- Scalable architecture +- Independent microservices +- Proper error handling +- Comprehensive logging + +### Database Schema Updates +Articles collection now includes: +- `full_content` - Full article text +- `word_count` - Number of words +- `crawled_at` - When content was crawled + +RSS Feeds collection added: +- `name` - Feed name +- `url` - Feed URL (unique) +- `active` - Active status +- `created_at` - Creation timestamp + +### Files Added +``` +backend/ +├── config.py +├── database.py +├── fix_duplicates.py +├── STRUCTURE.md +├── routes/ +│ ├── __init__.py +│ ├── subscription_routes.py +│ ├── news_routes.py +│ ├── rss_routes.py +│ └── ollama_routes.py +└── services/ + ├── __init__.py + ├── news_service.py + ├── email_service.py + └── ollama_service.py + +news_crawler/ +├── crawler_service.py +├── test_crawler.py +├── requirements.txt +├── .gitignore +├── Dockerfile +├── docker-compose.yml +├── README.md +└── QUICKSTART.md + +Root: +├── ARCHITECTURE.md +└── CHANGELOG.md +``` + +### Files Removed +- Old monolithic `backend/app.py` (replaced with modular version) + +### Next Steps (Future Enhancements) +- [ ] Frontend UI for RSS feed management +- [ ] Automatic article summarization with Ollama +- [ ] Scheduled newsletter sending +- [ ] Article categorization and tagging +- [ ] Search functionality +- [ ] User preferences (categories, frequency) +- [ ] Analytics dashboard +- [ ] API rate limiting +- [ ] Caching layer (Redis) +- [ ] Message queue for crawler (Celery) diff --git a/QUICK_REFERENCE.md b/QUICK_REFERENCE.md new file mode 100644 index 0000000..5ad15b4 --- /dev/null +++ b/QUICK_REFERENCE.md @@ -0,0 +1,206 @@ +# Quick Reference Guide + +## Starting the Application + +### 1. Start MongoDB +```bash +docker-compose up -d +``` + +### 2. Start Backend (Port 5001) +```bash +cd backend +source venv/bin/activate # or: venv\Scripts\activate on Windows +python app.py +``` + +### 3. Start Frontend (Port 3000) +```bash +cd frontend +npm start +``` + +### 4. Run Crawler (Optional) +```bash +cd news_crawler +pip install -r requirements.txt +python crawler_service.py 10 +``` + +## Common Commands + +### RSS Feed Management + +**List all feeds:** +```bash +curl http://localhost:5001/api/rss-feeds +``` + +**Add a feed:** +```bash +curl -X POST http://localhost:5001/api/rss-feeds \ + -H "Content-Type: application/json" \ + -d '{"name": "Feed Name", "url": "https://example.com/rss"}' +``` + +**Remove a feed:** +```bash +curl -X DELETE http://localhost:5001/api/rss-feeds/ +``` + +**Toggle feed status:** +```bash +curl -X PATCH http://localhost:5001/api/rss-feeds//toggle +``` + +### News & Subscriptions + +**Get latest news:** +```bash +curl http://localhost:5001/api/news +``` + +**Subscribe:** +```bash +curl -X POST http://localhost:5001/api/subscribe \ + -H "Content-Type: application/json" \ + -d '{"email": "user@example.com"}' +``` + +**Get stats:** +```bash +curl http://localhost:5001/api/stats +``` + +### Ollama (AI) + +**Test connection:** +```bash +curl http://localhost:5001/api/ollama/ping +``` + +**List models:** +```bash +curl http://localhost:5001/api/ollama/models +``` + +### Database + +**Connect to MongoDB:** +```bash +mongosh +use munich_news +``` + +**Check articles:** +```javascript +db.articles.find().limit(5) +db.articles.countDocuments() +db.articles.countDocuments({full_content: {$exists: true}}) +``` + +**Check subscribers:** +```javascript +db.subscribers.find() +db.subscribers.countDocuments({status: "active"}) +``` + +**Check RSS feeds:** +```javascript +db.rss_feeds.find() +``` + +## File Locations + +### Configuration +- Backend: `backend/.env` +- Frontend: `frontend/package.json` +- Crawler: Uses backend's `.env` or own `.env` + +### Logs +- Backend: Terminal output +- Frontend: Terminal output +- Crawler: Terminal output + +### Database +- MongoDB data: Docker volume `mongodb_data` +- Database name: `munich_news` + +## Ports + +| Service | Port | URL | +|---------|------|-----| +| Frontend | 3000 | http://localhost:3000 | +| Backend | 5001 | http://localhost:5001 | +| MongoDB | 27017 | mongodb://localhost:27017 | +| Ollama | 11434 | http://localhost:11434 | + +## Troubleshooting + +### Backend won't start +- Check if port 5001 is available +- Verify MongoDB is running +- Check `.env` file exists + +### Frontend can't connect +- Verify backend is running on port 5001 +- Check CORS settings +- Check API_URL in frontend + +### Crawler fails +- Install dependencies: `pip install -r requirements.txt` +- Check MongoDB connection +- Verify RSS feeds exist in database + +### MongoDB connection error +- Start MongoDB: `docker-compose up -d` +- Check connection string in `.env` +- Verify port 27017 is not blocked + +### Port 5000 conflict (macOS) +- AirPlay uses port 5000 +- Use port 5001 instead (set in `.env`) +- Or disable AirPlay Receiver in System Preferences + +## Project Structure + +``` +munich-news/ +├── backend/ # Main API (Flask) +├── frontend/ # Web UI (Express + JS) +├── news_crawler/ # Crawler microservice +├── .env # Environment variables +└── docker-compose.yml # MongoDB setup +``` + +## Environment Variables + +### Backend (.env) +```env +MONGODB_URI=mongodb://localhost:27017/ +FLASK_PORT=5001 +SMTP_SERVER=smtp.gmail.com +SMTP_PORT=587 +EMAIL_USER=your-email@gmail.com +EMAIL_PASSWORD=your-app-password +OLLAMA_BASE_URL=http://127.0.0.1:11434 +OLLAMA_MODEL=phi3:latest +OLLAMA_ENABLED=true +``` + +## Development Workflow + +1. **Add RSS Feed** → Backend API +2. **Run Crawler** → Fetches full content +3. **View News** → Frontend displays articles +4. **Users Subscribe** → Via frontend form +5. **Send Newsletter** → Manual or scheduled + +## Useful Links + +- Frontend: http://localhost:3000 +- Backend API: http://localhost:5001 +- MongoDB: mongodb://localhost:27017 +- Architecture: See `ARCHITECTURE.md` +- Backend Structure: See `backend/STRUCTURE.md` +- Crawler Guide: See `news_crawler/README.md` diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7bf5bd --- /dev/null +++ b/README.md @@ -0,0 +1,327 @@ +# Munich News Daily 📰 + +A TLDR/Morning Brew-style news email platform specifically for Munich. Get the latest Munich news delivered to your inbox every morning. + +## Features + +- 📧 Email newsletter subscription system +- 📰 Aggregated news from multiple Munich news sources +- 🎨 Beautiful, modern web interface +- 📊 Subscription statistics +- 🔄 Real-time news updates + +## Tech Stack + +- **Backend**: Python (Flask) - Modular architecture with blueprints +- **Frontend**: Node.js (Express + Vanilla JavaScript) +- **Database**: MongoDB +- **News Crawler**: Standalone Python microservice +- **News Sources**: RSS feeds from major Munich news outlets + +## Setup Instructions + +### Prerequisites + +- Python 3.8+ +- Node.js 14+ +- npm or yarn +- Docker and Docker Compose (recommended for MongoDB) OR MongoDB (local installation or MongoDB Atlas account) + +### Backend Setup + +1. Navigate to the backend directory: +```bash +cd backend +``` + +2. Create a virtual environment (recommended): +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +3. Install dependencies: +```bash +pip install -r requirements.txt +``` + +4. Set up MongoDB using Docker Compose (recommended): + ```bash + # From the project root directory + docker-compose up -d + ``` + + This will start MongoDB in a Docker container. The database will be available at `mongodb://localhost:27017/` + + **Useful Docker commands:** + ```bash + # Start MongoDB + docker-compose up -d + + # Stop MongoDB + docker-compose down + + # View MongoDB logs + docker-compose logs -f mongodb + + # Restart MongoDB + docker-compose restart mongodb + + # Remove MongoDB and all data (WARNING: deletes all data) + docker-compose down -v + ``` + + **Alternative options:** + - **Local MongoDB**: Install MongoDB locally and make sure it's running + - **MongoDB Atlas** (Cloud): Create a free account at [mongodb.com/cloud/atlas](https://www.mongodb.com/cloud/atlas) and get your connection string + +5. Create a `.env` file in the backend directory: + ```bash + # Copy the template file + cp env.template .env + ``` + + Then edit `.env` with your configuration: + ```env + # MongoDB connection (default: mongodb://localhost:27017/) + # For Docker Compose (no authentication): + MONGODB_URI=mongodb://localhost:27017/ + # For Docker Compose with authentication (if you modify docker-compose.yml): + # MONGODB_URI=mongodb://admin:password@localhost:27017/ + # Or for MongoDB Atlas: + # MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/ + + # Email configuration (optional for testing) + SMTP_SERVER=smtp.gmail.com + SMTP_PORT=587 + EMAIL_USER=your-email@gmail.com + EMAIL_PASSWORD=your-app-password + + # Ollama Configuration (for AI-powered features) + # Remote Ollama server URL + OLLAMA_BASE_URL=http://your-remote-server-ip:11434 + # Optional: API key if your Ollama server requires authentication + # OLLAMA_API_KEY=your-api-key-here + # Model name to use (e.g., llama2, mistral, codellama, llama3) + OLLAMA_MODEL=llama2 + # Enable/disable Ollama features (true/false) + OLLAMA_ENABLED=false + ``` + +**Notes:** +- For Gmail, you'll need to use an [App Password](https://support.google.com/accounts/answer/185833) instead of your regular password. +- For Ollama, replace `your-remote-server-ip` with your actual server IP or domain. Set `OLLAMA_ENABLED=true` to enable AI features. + +6. Run the backend server: +```bash +python app.py +``` + +The backend will run on `http://localhost:5001` (port 5001 to avoid conflict with AirPlay on macOS) + +### Frontend Setup + +1. Navigate to the frontend directory: +```bash +cd frontend +``` + +2. Install dependencies: +```bash +npm install +``` + +3. Run the frontend server: +```bash +npm start +``` + +The frontend will run on `http://localhost:3000` + +## Usage + +1. Open your browser and go to `http://localhost:3000` +2. Enter your email address to subscribe to the newsletter +3. View the latest Munich news on the homepage +4. The backend will aggregate news from multiple Munich news sources + +## Sending Newsletters + +To send newsletters to all subscribers, you can add a scheduled task or manually trigger the `send_newsletter()` function in `app.py`. For production, consider using: + +- **Cron jobs** (Linux/Mac) +- **Task Scheduler** (Windows) +- **Celery** with Redis/RabbitMQ for more advanced scheduling +- **Cloud functions** (AWS Lambda, Google Cloud Functions) + +Example cron job to send daily at 8 AM: +``` +0 8 * * * cd /path/to/munich-news/backend && python -c "from app import send_newsletter; send_newsletter()" +``` + +## Project Structure + +``` +munich-news/ +├── backend/ # Main API server +│ ├── app.py # Flask application entry point +│ ├── config.py # Configuration management +│ ├── database.py # Database connection +│ ├── routes/ # API endpoints (blueprints) +│ ├── services/ # Business logic +│ ├── templates/ # Email templates +│ └── requirements.txt # Python dependencies +├── news_crawler/ # Crawler microservice +│ ├── crawler_service.py # Standalone crawler +│ ├── ollama_client.py # AI summarization client +│ ├── requirements.txt # Crawler dependencies +│ └── README.md # Crawler documentation +├── news_sender/ # Newsletter sender microservice +│ ├── sender_service.py # Standalone email sender +│ ├── newsletter_template.html # Email template +│ ├── requirements.txt # Sender dependencies +│ └── README.md # Sender documentation +├── frontend/ # Web interface +│ ├── server.js # Express server +│ ├── package.json # Node.js dependencies +│ └── public/ +│ ├── index.html # Main page +│ ├── styles.css # Styling +│ └── app.js # Frontend JavaScript +├── docker-compose.yml # Docker Compose for MongoDB (development) +├── docker-compose.prod.yml # Docker Compose with authentication (production) +└── README.md +``` + +## API Endpoints + +### `POST /api/subscribe` +Subscribe to the newsletter +- Body: `{ "email": "user@example.com" }` + +### `POST /api/unsubscribe` +Unsubscribe from the newsletter +- Body: `{ "email": "user@example.com" }` + +### `GET /api/news` +Get latest Munich news articles + +### `GET /api/stats` +Get subscription statistics +- Returns: `{ "subscribers": number, "articles": number, "crawled_articles": number }` + +### `GET /api/news/` +Get full article content by URL +- Returns: Full article with content, author, word count, etc. + +### `GET /api/ollama/ping` +Test connection to Ollama server +- Returns: Connection status and Ollama configuration +- Response examples: + - Success: `{ "status": "success", "message": "...", "response": "...", "ollama_config": {...} }` + - Disabled: `{ "status": "disabled", "message": "...", "ollama_config": {...} }` + - Error: `{ "status": "error", "message": "...", "error_details": "...", "troubleshooting": {...}, "ollama_config": {...} }` + +### `GET /api/ollama/models` +List available models on Ollama server +- Returns: List of available models and current configuration +- Response: `{ "status": "success", "models": [...], "current_model": "...", "ollama_config": {...} }` + +### `GET /api/rss-feeds` +Get all RSS feeds +- Returns: `{ "feeds": [...] }` + +### `POST /api/rss-feeds` +Add a new RSS feed +- Body: `{ "name": "Feed Name", "url": "https://example.com/rss" }` +- Returns: `{ "message": "...", "id": "..." }` + +### `DELETE /api/rss-feeds/` +Remove an RSS feed +- Returns: `{ "message": "..." }` + +### `PATCH /api/rss-feeds//toggle` +Toggle RSS feed active status +- Returns: `{ "message": "...", "active": boolean }` + +## Database Schema + +### Articles Collection +```javascript +{ + _id: ObjectId, + title: String, + link: String (unique), + summary: String, + source: String, + published_at: String, + created_at: DateTime +} +``` + +### Subscribers Collection +```javascript +{ + _id: ObjectId, + email: String (unique, lowercase), + subscribed_at: DateTime, + status: String ('active' | 'inactive') +} +``` + +**Indexes:** +- `articles.link` - Unique index to prevent duplicate articles +- `articles.created_at` - For efficient sorting +- `subscribers.email` - Unique index for email lookups +- `subscribers.subscribed_at` - For analytics + +## News Crawler Microservice + +The project includes a standalone crawler microservice that fetches full article content from RSS feeds. + +### Running the Crawler + +```bash +cd news_crawler + +# Install dependencies +pip install -r requirements.txt + +# Run crawler +python crawler_service.py 10 +``` + +See `news_crawler/README.md` for detailed documentation. + +### What It Does + +- Crawls full article content from RSS feed links +- Extracts text, word count, and metadata +- Stores in MongoDB for AI processing +- Skips already-crawled articles +- Rate-limited (1 second between requests) + +## Customization + +### Adding News Sources + +Use the API to add RSS feeds dynamically: + +```bash +curl -X POST http://localhost:5001/api/rss-feeds \ + -H "Content-Type: application/json" \ + -d '{"name": "Your Source Name", "url": "https://example.com/rss"}' +``` + +### Styling + +Modify `frontend/public/styles.css` to customize the appearance. + +## License + +MIT + +## Contributing + +Feel free to submit issues and enhancement requests! + diff --git a/TEST_INSTRUCTIONS.md b/TEST_INSTRUCTIONS.md new file mode 100644 index 0000000..5e19df5 --- /dev/null +++ b/TEST_INSTRUCTIONS.md @@ -0,0 +1,132 @@ +# Testing RSS Feed URL Extraction + +## Quick Test (Recommended) + +Run this from the project root with backend virtual environment activated: + +```bash +# 1. Activate backend virtual environment +cd backend +source venv/bin/activate # On Windows: venv\Scripts\activate + +# 2. Go back to project root +cd .. + +# 3. Run the test +python test_feeds_quick.py +``` + +This will: +- ✓ Check what RSS feeds are in your database +- ✓ Fetch each feed +- ✓ Test URL extraction on first 3 articles +- ✓ Show what fields are available +- ✓ Verify summary and date extraction + +## Expected Output + +``` +================================================================================ +RSS Feed Test - Checking Database Feeds +================================================================================ + +✓ Found 3 feed(s) in database + +================================================================================ +Feed: Süddeutsche Zeitung München +URL: https://www.sueddeutsche.de/muenchen/rss +Active: True +================================================================================ +Fetching RSS feed... +✓ Found 20 entries + +--- Entry 1 --- +Title: New U-Bahn Line Opens in Munich +✓ URL extracted: https://www.sueddeutsche.de/muenchen/article-123 +✓ Summary: The new U-Bahn line connecting the city center... +✓ Date: Mon, 10 Nov 2024 10:00:00 +0100 + +--- Entry 2 --- +Title: Munich Weather Update +✓ URL extracted: https://www.sueddeutsche.de/muenchen/article-124 +✓ Summary: Weather forecast for the week... +✓ Date: Mon, 10 Nov 2024 09:30:00 +0100 + +... +``` + +## If No Feeds Found + +Add a feed first: + +```bash +curl -X POST http://localhost:5001/api/rss-feeds \ + -H "Content-Type: application/json" \ + -d '{"name": "Süddeutsche Politik", "url": "https://rss.sueddeutsche.de/rss/Politik"}' +``` + +## Testing News Crawler + +Once feeds are verified, test the crawler: + +```bash +# 1. Install crawler dependencies +cd news_crawler +pip install -r requirements.txt + +# 2. Run the test +python test_rss_feeds.py + +# 3. Or run the actual crawler +python crawler_service.py 5 +``` + +## Troubleshooting + +### "No module named 'pymongo'" +- Activate the backend virtual environment first +- Or install dependencies: `pip install -r backend/requirements.txt` + +### "No RSS feeds in database" +- Make sure backend is running +- Add feeds via API (see above) +- Or check if MongoDB is running: `docker-compose ps` + +### "Could not extract URL" +- The test will show available fields +- Check if the feed uses `guid`, `id`, or `links` instead of `link` +- Our utility should handle most cases automatically + +### "No entries found" +- The RSS feed URL might be invalid +- Try opening the URL in a browser +- Check if it returns valid XML + +## Manual Database Check + +Using mongosh: + +```bash +mongosh +use munich_news +db.rss_feeds.find() +db.articles.find().limit(3) +``` + +## What to Look For + +✅ **Good signs:** +- URLs are extracted successfully +- URLs start with `http://` or `https://` +- Summaries are present +- Dates are extracted + +⚠️ **Warning signs:** +- "Could not extract URL" messages +- Empty summaries (not critical) +- Missing dates (not critical) + +❌ **Problems:** +- No entries found in feed +- All URL extractions fail +- Feed parsing errors diff --git a/backend/DATABASE_SCHEMA.md b/backend/DATABASE_SCHEMA.md new file mode 100644 index 0000000..6af803c --- /dev/null +++ b/backend/DATABASE_SCHEMA.md @@ -0,0 +1,143 @@ +# MongoDB Database Schema + +This document describes the MongoDB collections and their structure for Munich News Daily. + +## Collections + +### 1. Articles Collection (`articles`) + +Stores all news articles aggregated from Munich news sources. + +**Document Structure:** +```javascript +{ + _id: ObjectId, // Auto-generated MongoDB ID + title: String, // Article title (required) + author: String, // Article author (optional, extracted during crawl) + link: String, // Article URL (required, unique) + content: String, // Full article content (no length limit) + summary: String, // AI-generated English summary (≤150 words) + word_count: Number, // Word count of full content + summary_word_count: Number, // Word count of AI summary + source: String, // News source name (e.g., "Süddeutsche Zeitung München") + published_at: String, // Original publication date from RSS feed or crawled + crawled_at: DateTime, // When article content was crawled (UTC) + summarized_at: DateTime, // When AI summary was generated (UTC) + created_at: DateTime // When article was added to database (UTC) +} +``` + +**Indexes:** +- `link` - Unique index to prevent duplicate articles +- `created_at` - Index for efficient sorting by date + +**Example Document:** +```javascript +{ + _id: ObjectId("507f1f77bcf86cd799439011"), + title: "New U-Bahn Line Opens in Munich", + author: "Max Mustermann", + link: "https://www.sueddeutsche.de/muenchen/ubahn-1.123456", + content: "The new U-Bahn line connecting the city center with the airport opened today. Mayor Dieter Reiter attended the opening ceremony... [full article text continues]", + summary: "Munich's new U-Bahn line connecting the city center to the airport opened today with Mayor Dieter Reiter in attendance. The line features 10 stations and runs every 10 minutes during peak hours, significantly reducing travel time. Construction took five years and cost approximately 2 billion euros.", + word_count: 1250, + summary_word_count: 48, + source: "Süddeutsche Zeitung München", + published_at: "Mon, 15 Jan 2024 10:00:00 +0100", + crawled_at: ISODate("2024-01-15T09:30:00.000Z"), + summarized_at: ISODate("2024-01-15T09:30:15.000Z"), + created_at: ISODate("2024-01-15T09:00:00.000Z") +} +``` + +### 2. Subscribers Collection (`subscribers`) + +Stores all newsletter subscribers. + +**Document Structure:** +```javascript +{ + _id: ObjectId, // Auto-generated MongoDB ID + email: String, // Subscriber email (required, unique, lowercase) + subscribed_at: DateTime, // When user subscribed (UTC) + status: String // Subscription status: 'active' or 'inactive' +} +``` + +**Indexes:** +- `email` - Unique index for email lookups and preventing duplicates +- `subscribed_at` - Index for analytics and sorting + +**Example Document:** +```javascript +{ + _id: ObjectId("507f1f77bcf86cd799439012"), + email: "user@example.com", + subscribed_at: ISODate("2024-01-15T08:30:00.000Z"), + status: "active" +} +``` + +## Design Decisions + +### Why MongoDB? + +1. **Flexibility**: Easy to add new fields without schema migrations +2. **Scalability**: Handles large volumes of articles and subscribers efficiently +3. **Performance**: Indexes on frequently queried fields (link, email, created_at) +4. **Document Model**: Natural fit for news articles and subscriber data + +### Schema Choices + +1. **Unique Link Index**: Prevents duplicate articles from being stored, even if fetched multiple times +2. **Status Field**: Soft delete for subscribers (set to 'inactive' instead of deleting) - allows for analytics and easy re-subscription +3. **UTC Timestamps**: All dates stored in UTC for consistency across timezones +4. **Lowercase Emails**: Emails stored in lowercase to prevent case-sensitivity issues + +### Future Enhancements + +Potential fields to add in the future: + +**Articles:** +- `category`: String (e.g., "politics", "sports", "culture") +- `tags`: Array of Strings +- `image_url`: String +- `sent_in_newsletter`: Boolean (track if article was sent) +- `sent_at`: DateTime (when article was included in newsletter) + +**Subscribers:** +- `preferences`: Object (newsletter frequency, categories, etc.) +- `last_sent_at`: DateTime (last newsletter sent date) +- `unsubscribed_at`: DateTime (when user unsubscribed) +- `verification_token`: String (for email verification) + + + +## AI Summarization Workflow + +When the crawler processes an article: + +1. **Extract Content**: Full article text is extracted from the webpage +2. **Summarize with Ollama**: If `OLLAMA_ENABLED=true`, the content is sent to Ollama for summarization +3. **Store Both**: Both the original `content` and AI-generated `summary` are stored +4. **Fallback**: If Ollama is unavailable or fails, only the original content is stored + +### Summary Field Details + +- **Language**: Always in English, regardless of source article language +- **Length**: Maximum 150 words +- **Format**: Plain text, concise and clear +- **Purpose**: Quick preview for newsletters and frontend display + +### Querying Articles + +```javascript +// Get articles with AI summaries +db.articles.find({ summary: { $exists: true, $ne: null } }) + +// Get articles without summaries +db.articles.find({ summary: { $exists: false } }) + +// Count summarized articles +db.articles.countDocuments({ summary: { $exists: true, $ne: null } }) +``` diff --git a/backend/STRUCTURE.md b/backend/STRUCTURE.md new file mode 100644 index 0000000..17dbd11 --- /dev/null +++ b/backend/STRUCTURE.md @@ -0,0 +1,98 @@ +# Backend Structure + +The backend has been modularized for better maintainability and scalability. + +## Directory Structure + +``` +backend/ +├── app.py # Main Flask application entry point +├── config.py # Configuration management +├── database.py # Database connection and initialization +├── requirements.txt # Python dependencies +├── .env # Environment variables +│ +├── routes/ # API route handlers (blueprints) +│ ├── __init__.py +│ ├── subscription_routes.py # /api/subscribe, /api/unsubscribe +│ ├── news_routes.py # /api/news, /api/stats +│ ├── rss_routes.py # /api/rss-feeds (CRUD operations) +│ └── ollama_routes.py # /api/ollama/* (AI features) +│ +└── services/ # Business logic layer + ├── __init__.py + ├── news_service.py # News fetching and storage logic + ├── email_service.py # Newsletter email sending + └── ollama_service.py # Ollama AI integration +``` + +## Key Components + +### app.py +- Main Flask application +- Registers all blueprints +- Minimal code, just wiring things together + +### config.py +- Centralized configuration +- Loads environment variables +- Single source of truth for all settings + +### database.py +- MongoDB connection setup +- Collection definitions +- Database initialization with indexes + +### routes/ +Each route file is a Flask Blueprint handling specific API endpoints: +- **subscription_routes.py**: User subscription management +- **news_routes.py**: News fetching and statistics +- **rss_routes.py**: RSS feed management (add/remove/list/toggle) +- **ollama_routes.py**: AI/Ollama integration endpoints + +### services/ +Business logic separated from route handlers: +- **news_service.py**: Fetches news from RSS feeds, saves to database +- **email_service.py**: Sends newsletter emails to subscribers +- **ollama_service.py**: Communicates with Ollama AI server + +## Benefits of This Structure + +1. **Separation of Concerns**: Routes handle HTTP, services handle business logic +2. **Testability**: Each module can be tested independently +3. **Maintainability**: Easy to find and modify specific functionality +4. **Scalability**: Easy to add new routes or services +5. **Reusability**: Services can be used by multiple routes + +## Adding New Features + +### To add a new API endpoint: +1. Create a new route file in `routes/` or add to existing one +2. Create a Blueprint and define routes +3. Register the blueprint in `app.py` + +### To add new business logic: +1. Create a new service file in `services/` +2. Import and use in your route handlers + +### Example: +```python +# services/my_service.py +def my_business_logic(): + return "Hello" + +# routes/my_routes.py +from flask import Blueprint +from services.my_service import my_business_logic + +my_bp = Blueprint('my', __name__) + +@my_bp.route('/api/my-endpoint') +def my_endpoint(): + result = my_business_logic() + return {'message': result} + +# app.py +from routes.my_routes import my_bp +app.register_blueprint(my_bp) +``` diff --git a/backend/app.py b/backend/app.py new file mode 100644 index 0000000..10f8e88 --- /dev/null +++ b/backend/app.py @@ -0,0 +1,29 @@ +from flask import Flask +from flask_cors import CORS +from config import Config +from database import init_db +from routes.subscription_routes import subscription_bp +from routes.news_routes import news_bp +from routes.rss_routes import rss_bp +from routes.ollama_routes import ollama_bp +from routes.newsletter_routes import newsletter_bp + +# Initialize Flask app +app = Flask(__name__) +CORS(app) + +# Initialize database +init_db() + +# Register blueprints +app.register_blueprint(subscription_bp) +app.register_blueprint(news_bp) +app.register_blueprint(rss_bp) +app.register_blueprint(ollama_bp) +app.register_blueprint(newsletter_bp) + +# Print configuration +Config.print_config() + +if __name__ == '__main__': + app.run(debug=True, port=Config.FLASK_PORT, host='127.0.0.1') diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..e206865 --- /dev/null +++ b/backend/config.py @@ -0,0 +1,52 @@ +import os +from dotenv import load_dotenv +from pathlib import Path + +# Get the directory where this script is located +backend_dir = Path(__file__).parent +env_path = backend_dir / '.env' + +# Load .env file +load_dotenv(dotenv_path=env_path) + +# Debug: Print if .env file exists (for troubleshooting) +if env_path.exists(): + print(f"✓ Loading .env file from: {env_path}") +else: + print(f"⚠ Warning: .env file not found at {env_path}") + print(f" Current working directory: {os.getcwd()}") + print(f" Looking for .env in: {env_path}") + + +class Config: + """Application configuration""" + + # MongoDB + MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/') + DB_NAME = 'munich_news' + + # Email + SMTP_SERVER = os.getenv('SMTP_SERVER', 'smtp.gmail.com') + SMTP_PORT = int(os.getenv('SMTP_PORT', '587')) + EMAIL_USER = os.getenv('EMAIL_USER', '') + EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD', '') + + # Ollama + OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434') + OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2') + OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', '') + OLLAMA_ENABLED = os.getenv('OLLAMA_ENABLED', 'false').lower() == 'true' + + # Flask + FLASK_PORT = int(os.getenv('FLASK_PORT', '5000')) + + @classmethod + def print_config(cls): + """Print configuration (without sensitive data)""" + print("\nApplication Configuration:") + print(f" MongoDB URI: {cls.MONGODB_URI}") + print(f" Database: {cls.DB_NAME}") + print(f" Flask Port: {cls.FLASK_PORT}") + print(f" Ollama Base URL: {cls.OLLAMA_BASE_URL}") + print(f" Ollama Model: {cls.OLLAMA_MODEL}") + print(f" Ollama Enabled: {cls.OLLAMA_ENABLED}") diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..0adbe9f --- /dev/null +++ b/backend/database.py @@ -0,0 +1,53 @@ +from pymongo import MongoClient +from datetime import datetime +from config import Config + +# MongoDB setup +client = MongoClient(Config.MONGODB_URI) +db = client[Config.DB_NAME] + +# Collections +articles_collection = db['articles'] +subscribers_collection = db['subscribers'] +rss_feeds_collection = db['rss_feeds'] + + +def init_db(): + """Initialize database with indexes""" + # Create unique index on article links to prevent duplicates + articles_collection.create_index('link', unique=True) + # Create index on created_at for faster sorting + articles_collection.create_index('created_at') + # Create unique index on subscriber emails + subscribers_collection.create_index('email', unique=True) + # Create index on subscribed_at + subscribers_collection.create_index('subscribed_at') + # Create unique index on RSS feed URLs + rss_feeds_collection.create_index('url', unique=True) + + # Initialize default RSS feeds if collection is empty + if rss_feeds_collection.count_documents({}) == 0: + default_feeds = [ + { + 'name': 'Süddeutsche Zeitung München', + 'url': 'https://www.sueddeutsche.de/muenchen/rss', + 'active': True, + 'created_at': datetime.utcnow() + }, + { + 'name': 'Münchner Merkur', + 'url': 'https://www.merkur.de/muenchen/rss', + 'active': True, + 'created_at': datetime.utcnow() + }, + { + 'name': 'Abendzeitung München', + 'url': 'https://www.abendzeitung-muenchen.de/rss', + 'active': True, + 'created_at': datetime.utcnow() + } + ] + rss_feeds_collection.insert_many(default_feeds) + print(f"Initialized {len(default_feeds)} default RSS feeds") + + print("Database initialized with indexes") diff --git a/backend/env.template b/backend/env.template new file mode 100644 index 0000000..ddb40be --- /dev/null +++ b/backend/env.template @@ -0,0 +1,32 @@ +# MongoDB Configuration +# For Docker Compose (no authentication): +MONGODB_URI=mongodb://localhost:27017/ +# For Docker Compose with authentication: +# MONGODB_URI=mongodb://admin:password@localhost:27017/ +# For MongoDB Atlas (cloud): +# MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/ + +# Email Configuration (for sending newsletters) +SMTP_SERVER=smtp.gmail.com +SMTP_PORT=587 +EMAIL_USER=your-email@gmail.com +EMAIL_PASSWORD=your-app-password +# Note: For Gmail, use an App Password: https://support.google.com/accounts/answer/185833 + +# Ollama Configuration (for AI-powered features) +# Remote Ollama server URL (e.g., http://your-server-ip:11434 or https://your-domain.com) +OLLAMA_BASE_URL=http://localhost:11434 +# Optional: API key if your Ollama server requires authentication +# OLLAMA_API_KEY=your-api-key-here +# Model name to use (e.g., llama2, mistral, codellama, llama3, phi3:latest) +OLLAMA_MODEL=phi3:latest +# Enable/disable Ollama features (true/false) +# When enabled, the crawler will automatically summarize articles in English (≤150 words) +OLLAMA_ENABLED=true +# Timeout for Ollama requests in seconds (default: 30) +OLLAMA_TIMEOUT=30 + +# Flask Server Configuration +# Port for Flask server (default: 5001 to avoid AirPlay conflict on macOS) +FLASK_PORT=5001 + diff --git a/backend/fix_duplicates.py b/backend/fix_duplicates.py new file mode 100644 index 0000000..39af2bd --- /dev/null +++ b/backend/fix_duplicates.py @@ -0,0 +1,61 @@ +""" +Script to fix duplicate RSS feeds and create unique index +Run this once: python fix_duplicates.py +""" +from pymongo import MongoClient +from config import Config + +# Connect to MongoDB +client = MongoClient(Config.MONGODB_URI) +db = client[Config.DB_NAME] +rss_feeds_collection = db['rss_feeds'] + +print("Fixing duplicate RSS feeds...") + +# Get all feeds +all_feeds = list(rss_feeds_collection.find()) +print(f"Total feeds found: {len(all_feeds)}") + +# Find duplicates by URL +seen_urls = {} +duplicates_to_remove = [] + +for feed in all_feeds: + url = feed.get('url') + if url in seen_urls: + # This is a duplicate, mark for removal + duplicates_to_remove.append(feed['_id']) + print(f" Duplicate found: {feed['name']} - {url}") + else: + # First occurrence, keep it + seen_urls[url] = feed['_id'] + +# Remove duplicates +if duplicates_to_remove: + result = rss_feeds_collection.delete_many({'_id': {'$in': duplicates_to_remove}}) + print(f"Removed {result.deleted_count} duplicate feeds") +else: + print("No duplicates found") + +# Drop existing indexes (if any) +print("\nDropping existing indexes...") +try: + rss_feeds_collection.drop_indexes() + print("Indexes dropped") +except Exception as e: + print(f"Note: {e}") + +# Create unique index on URL +print("\nCreating unique index on 'url' field...") +rss_feeds_collection.create_index('url', unique=True) +print("✓ Unique index created successfully") + +# Verify +remaining_feeds = list(rss_feeds_collection.find()) +print(f"\nFinal feed count: {len(remaining_feeds)}") +print("\nRemaining feeds:") +for feed in remaining_feeds: + print(f" - {feed['name']}: {feed['url']}") + +print("\n✓ Done! Duplicates removed and unique index created.") +print("You can now restart your Flask app.") diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..c014e54 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,8 @@ +Flask==3.0.0 +flask-cors==4.0.0 +feedparser==6.0.10 +python-dotenv==1.0.0 +pymongo==4.6.1 +requests==2.31.0 +Jinja2==3.1.2 + diff --git a/backend/routes/__init__.py b/backend/routes/__init__.py new file mode 100644 index 0000000..d212dab --- /dev/null +++ b/backend/routes/__init__.py @@ -0,0 +1 @@ +# Routes package diff --git a/backend/routes/news_routes.py b/backend/routes/news_routes.py new file mode 100644 index 0000000..abbf925 --- /dev/null +++ b/backend/routes/news_routes.py @@ -0,0 +1,123 @@ +from flask import Blueprint, jsonify +from database import articles_collection +from services.news_service import fetch_munich_news, save_articles_to_db + +news_bp = Blueprint('news', __name__) + + +@news_bp.route('/api/news', methods=['GET']) +def get_news(): + """Get latest Munich news""" + try: + # Fetch fresh news and save to database + articles = fetch_munich_news() + save_articles_to_db(articles) + + # Get articles from MongoDB, sorted by created_at (newest first) + cursor = articles_collection.find().sort('created_at', -1).limit(20) + + db_articles = [] + for doc in cursor: + article = { + 'title': doc.get('title', ''), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'source': doc.get('source', ''), + 'published': doc.get('published_at', ''), + 'word_count': doc.get('word_count'), + 'has_full_content': bool(doc.get('content')), + 'has_summary': bool(doc.get('summary')) + } + + # Include AI summary if available + if doc.get('summary'): + article['summary'] = doc.get('summary', '') + article['summary_word_count'] = doc.get('summary_word_count') + article['summarized_at'] = doc.get('summarized_at', '').isoformat() if doc.get('summarized_at') else None + # Fallback: Include preview of content if no summary (first 200 chars) + elif doc.get('content'): + article['preview'] = doc.get('content', '')[:200] + '...' + + db_articles.append(article) + + # Combine fresh articles with database articles and deduplicate + seen_links = set() + combined = [] + + # Add fresh articles first (they're more recent) + for article in articles: + link = article.get('link', '') + if link and link not in seen_links: + seen_links.add(link) + combined.append(article) + + # Add database articles + for article in db_articles: + link = article.get('link', '') + if link and link not in seen_links: + seen_links.add(link) + combined.append(article) + + return jsonify({'articles': combined[:20]}), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@news_bp.route('/api/news/', methods=['GET']) +def get_article_by_url(article_url): + """Get full article content by URL""" + try: + # Decode URL + from urllib.parse import unquote + decoded_url = unquote(article_url) + + # Find article by link + article = articles_collection.find_one({'link': decoded_url}) + + if not article: + return jsonify({'error': 'Article not found'}), 404 + + return jsonify({ + 'title': article.get('title', ''), + 'author': article.get('author'), + 'link': article.get('link', ''), + 'content': article.get('content', ''), + 'summary': article.get('summary'), + 'word_count': article.get('word_count', 0), + 'summary_word_count': article.get('summary_word_count'), + 'source': article.get('source', ''), + 'published_at': article.get('published_at', ''), + 'crawled_at': article.get('crawled_at', '').isoformat() if article.get('crawled_at') else None, + 'summarized_at': article.get('summarized_at', '').isoformat() if article.get('summarized_at') else None, + 'created_at': article.get('created_at', '').isoformat() if article.get('created_at') else None + }), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@news_bp.route('/api/stats', methods=['GET']) +def get_stats(): + """Get subscription statistics""" + try: + from database import subscribers_collection + + # Count only active subscribers + subscriber_count = subscribers_collection.count_documents({'status': 'active'}) + + # Also get total article count + article_count = articles_collection.count_documents({}) + + # Count crawled articles + crawled_count = articles_collection.count_documents({'content': {'$exists': True, '$ne': ''}}) + + # Count summarized articles + summarized_count = articles_collection.count_documents({'summary': {'$exists': True, '$ne': ''}}) + + return jsonify({ + 'subscribers': subscriber_count, + 'articles': article_count, + 'crawled_articles': crawled_count, + 'summarized_articles': summarized_count + }), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 diff --git a/backend/routes/newsletter_routes.py b/backend/routes/newsletter_routes.py new file mode 100644 index 0000000..6f61faa --- /dev/null +++ b/backend/routes/newsletter_routes.py @@ -0,0 +1,62 @@ +from flask import Blueprint, Response +from pathlib import Path +from jinja2 import Template +from datetime import datetime +from database import articles_collection + +newsletter_bp = Blueprint('newsletter', __name__) + + +@newsletter_bp.route('/api/newsletter/preview', methods=['GET']) +def preview_newsletter(): + """Preview the newsletter HTML (for testing)""" + try: + # Get latest articles with AI summaries + cursor = articles_collection.find( + {'summary': {'$exists': True, '$ne': None}} + ).sort('created_at', -1).limit(10) + + articles = [] + for doc in cursor: + articles.append({ + 'title': doc.get('title', ''), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'summary': doc.get('summary', ''), + 'source': doc.get('source', ''), + 'published_at': doc.get('published_at', '') + }) + + if not articles: + return Response( + "

No articles with summaries found

Run the crawler with Ollama enabled first.

", + mimetype='text/html' + ) + + # Load template + template_path = Path(__file__).parent.parent / 'templates' / 'newsletter_template.html' + with open(template_path, 'r', encoding='utf-8') as f: + template_content = f.read() + + template = Template(template_content) + + # Prepare data + now = datetime.now() + template_data = { + 'date': now.strftime('%A, %B %d, %Y'), + 'year': now.year, + 'article_count': len(articles), + 'articles': articles, + 'unsubscribe_link': 'http://localhost:3000/unsubscribe', + 'website_link': 'http://localhost:3000' + } + + # Render and return HTML + html_content = template.render(**template_data) + return Response(html_content, mimetype='text/html') + + except Exception as e: + return Response( + f"

Error

{str(e)}

", + mimetype='text/html' + ), 500 diff --git a/backend/routes/ollama_routes.py b/backend/routes/ollama_routes.py new file mode 100644 index 0000000..84628d8 --- /dev/null +++ b/backend/routes/ollama_routes.py @@ -0,0 +1,158 @@ +from flask import Blueprint, jsonify +from config import Config +from services.ollama_service import call_ollama, list_ollama_models +import os + +ollama_bp = Blueprint('ollama', __name__) + + +@ollama_bp.route('/api/ollama/ping', methods=['GET', 'POST']) +def ping_ollama(): + """Test connection to Ollama server""" + try: + # Check if Ollama is enabled + if not Config.OLLAMA_ENABLED: + return jsonify({ + 'status': 'disabled', + 'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.', + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': False + } + }), 200 + + # Send a simple test prompt + test_prompt = "Say 'Hello! I am connected and working.' in one sentence." + system_prompt = "You are a helpful assistant. Respond briefly and concisely." + + response_text, error_message = call_ollama(test_prompt, system_prompt) + + if response_text: + return jsonify({ + 'status': 'success', + 'message': 'Successfully connected to Ollama', + 'response': response_text, + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': True + } + }), 200 + else: + # Try to get available models for better error message + available_models, _ = list_ollama_models() + + troubleshooting = { + 'check_server': f'Verify Ollama is running at {Config.OLLAMA_BASE_URL}', + 'check_model': f'Verify model "{Config.OLLAMA_MODEL}" is available (run: ollama list)', + 'test_connection': f'Test manually: curl {Config.OLLAMA_BASE_URL}/api/generate -d \'{{"model":"{Config.OLLAMA_MODEL}","prompt":"test"}}\'' + } + + if available_models: + troubleshooting['available_models'] = available_models + troubleshooting['suggestion'] = f'Try setting OLLAMA_MODEL to one of: {", ".join(available_models[:5])}' + + return jsonify({ + 'status': 'error', + 'message': error_message or 'Failed to get response from Ollama', + 'error_details': error_message, + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': True + }, + 'troubleshooting': troubleshooting + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': f'Error connecting to Ollama: {str(e)}', + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': Config.OLLAMA_ENABLED + } + }), 500 + + +@ollama_bp.route('/api/ollama/config', methods=['GET']) +def get_ollama_config(): + """Get current Ollama configuration (for debugging)""" + try: + from pathlib import Path + backend_dir = Path(__file__).parent.parent + env_path = backend_dir / '.env' + + return jsonify({ + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': Config.OLLAMA_ENABLED, + 'has_api_key': bool(Config.OLLAMA_API_KEY) + }, + 'env_file_path': str(env_path), + 'env_file_exists': env_path.exists(), + 'current_working_directory': os.getcwd() + }), 200 + except Exception as e: + return jsonify({ + 'error': str(e), + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': Config.OLLAMA_ENABLED + } + }), 500 + + +@ollama_bp.route('/api/ollama/models', methods=['GET']) +def get_ollama_models(): + """List available models on Ollama server""" + try: + if not Config.OLLAMA_ENABLED: + return jsonify({ + 'status': 'disabled', + 'message': 'Ollama is not enabled. Set OLLAMA_ENABLED=true in your .env file.', + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': False + } + }), 200 + + models, error_message = list_ollama_models() + + if models is not None: + return jsonify({ + 'status': 'success', + 'models': models, + 'current_model': Config.OLLAMA_MODEL, + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': True + } + }), 200 + else: + return jsonify({ + 'status': 'error', + 'message': error_message or 'Failed to list models', + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': True + } + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': f'Error listing models: {str(e)}', + 'ollama_config': { + 'base_url': Config.OLLAMA_BASE_URL, + 'model': Config.OLLAMA_MODEL, + 'enabled': Config.OLLAMA_ENABLED + } + }), 500 diff --git a/backend/routes/rss_routes.py b/backend/routes/rss_routes.py new file mode 100644 index 0000000..8c376a5 --- /dev/null +++ b/backend/routes/rss_routes.py @@ -0,0 +1,124 @@ +from flask import Blueprint, request, jsonify +from datetime import datetime +from pymongo.errors import DuplicateKeyError +from bson.objectid import ObjectId +import feedparser +from database import rss_feeds_collection + +rss_bp = Blueprint('rss', __name__) + + +@rss_bp.route('/api/rss-feeds', methods=['GET']) +def get_rss_feeds(): + """Get all RSS feeds""" + try: + cursor = rss_feeds_collection.find().sort('created_at', -1) + feeds = [] + for feed in cursor: + feeds.append({ + 'id': str(feed['_id']), + 'name': feed.get('name', ''), + 'url': feed.get('url', ''), + 'active': feed.get('active', True), + 'created_at': feed.get('created_at', '').isoformat() if feed.get('created_at') else '' + }) + return jsonify({'feeds': feeds}), 200 + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@rss_bp.route('/api/rss-feeds', methods=['POST']) +def add_rss_feed(): + """Add a new RSS feed""" + data = request.json + name = data.get('name', '').strip() + url = data.get('url', '').strip() + + if not name or not url: + return jsonify({'error': 'Name and URL are required'}), 400 + + if not url.startswith('http://') and not url.startswith('https://'): + return jsonify({'error': 'URL must start with http:// or https://'}), 400 + + try: + # Test if the RSS feed is valid + try: + feed = feedparser.parse(url) + if not feed.entries: + return jsonify({'error': 'Invalid RSS feed or no entries found'}), 400 + except Exception as e: + return jsonify({'error': f'Failed to parse RSS feed: {str(e)}'}), 400 + + feed_doc = { + 'name': name, + 'url': url, + 'active': True, + 'created_at': datetime.utcnow() + } + + try: + result = rss_feeds_collection.insert_one(feed_doc) + return jsonify({ + 'message': 'RSS feed added successfully', + 'id': str(result.inserted_id) + }), 201 + except DuplicateKeyError: + return jsonify({'error': 'RSS feed URL already exists'}), 409 + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@rss_bp.route('/api/rss-feeds/', methods=['DELETE']) +def remove_rss_feed(feed_id): + """Remove an RSS feed""" + try: + # Validate ObjectId + try: + obj_id = ObjectId(feed_id) + except Exception: + return jsonify({'error': 'Invalid feed ID'}), 400 + + result = rss_feeds_collection.delete_one({'_id': obj_id}) + + if result.deleted_count > 0: + return jsonify({'message': 'RSS feed removed successfully'}), 200 + else: + return jsonify({'error': 'RSS feed not found'}), 404 + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@rss_bp.route('/api/rss-feeds//toggle', methods=['PATCH']) +def toggle_rss_feed(feed_id): + """Toggle RSS feed active status""" + try: + # Validate ObjectId + try: + obj_id = ObjectId(feed_id) + except Exception: + return jsonify({'error': 'Invalid feed ID'}), 400 + + # Get current status + feed = rss_feeds_collection.find_one({'_id': obj_id}) + if not feed: + return jsonify({'error': 'RSS feed not found'}), 404 + + # Toggle status + new_status = not feed.get('active', True) + result = rss_feeds_collection.update_one( + {'_id': obj_id}, + {'$set': {'active': new_status}} + ) + + if result.modified_count > 0: + return jsonify({ + 'message': f'RSS feed {"activated" if new_status else "deactivated"} successfully', + 'active': new_status + }), 200 + else: + return jsonify({'error': 'Failed to update RSS feed'}), 500 + + except Exception as e: + return jsonify({'error': str(e)}), 500 diff --git a/backend/routes/subscription_routes.py b/backend/routes/subscription_routes.py new file mode 100644 index 0000000..344bd85 --- /dev/null +++ b/backend/routes/subscription_routes.py @@ -0,0 +1,63 @@ +from flask import Blueprint, request, jsonify +from datetime import datetime +from pymongo.errors import DuplicateKeyError +from database import subscribers_collection + +subscription_bp = Blueprint('subscription', __name__) + + +@subscription_bp.route('/api/subscribe', methods=['POST']) +def subscribe(): + """Subscribe a user to the newsletter""" + data = request.json + email = data.get('email', '').strip().lower() + + if not email or '@' not in email: + return jsonify({'error': 'Invalid email address'}), 400 + + try: + subscriber_doc = { + 'email': email, + 'subscribed_at': datetime.utcnow(), + 'status': 'active' + } + + # Try to insert, if duplicate key error, subscriber already exists + try: + subscribers_collection.insert_one(subscriber_doc) + return jsonify({'message': 'Successfully subscribed!'}), 201 + except DuplicateKeyError: + # Check if subscriber is active + existing = subscribers_collection.find_one({'email': email}) + if existing and existing.get('status') == 'active': + return jsonify({'message': 'Email already subscribed'}), 200 + else: + # Reactivate if previously unsubscribed + subscribers_collection.update_one( + {'email': email}, + {'$set': {'status': 'active', 'subscribed_at': datetime.utcnow()}} + ) + return jsonify({'message': 'Successfully re-subscribed!'}), 200 + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@subscription_bp.route('/api/unsubscribe', methods=['POST']) +def unsubscribe(): + """Unsubscribe a user from the newsletter""" + data = request.json + email = data.get('email', '').strip().lower() + + try: + result = subscribers_collection.update_one( + {'email': email}, + {'$set': {'status': 'inactive'}} + ) + + if result.matched_count > 0: + return jsonify({'message': 'Successfully unsubscribed'}), 200 + else: + return jsonify({'error': 'Email not found in subscribers'}), 404 + except Exception as e: + return jsonify({'error': str(e)}), 500 diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000..a70b302 --- /dev/null +++ b/backend/services/__init__.py @@ -0,0 +1 @@ +# Services package diff --git a/backend/services/email_service.py b/backend/services/email_service.py new file mode 100644 index 0000000..e24f6b5 --- /dev/null +++ b/backend/services/email_service.py @@ -0,0 +1,88 @@ +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from datetime import datetime +from pathlib import Path +from jinja2 import Template +from config import Config +from database import subscribers_collection, articles_collection + + +def send_newsletter(max_articles=10): + """Send newsletter to all subscribers with AI-summarized articles""" + if not Config.EMAIL_USER or not Config.EMAIL_PASSWORD: + print("Email credentials not configured") + return + + # Get latest articles with AI summaries from database + cursor = articles_collection.find( + {'summary': {'$exists': True, '$ne': None}} + ).sort('created_at', -1).limit(max_articles) + + articles = [] + for doc in cursor: + articles.append({ + 'title': doc.get('title', ''), + 'author': doc.get('author'), + 'link': doc.get('link', ''), + 'summary': doc.get('summary', ''), + 'source': doc.get('source', ''), + 'published_at': doc.get('published_at', '') + }) + + if not articles: + print("No articles with summaries to send") + return + + # Load email template + template_path = Path(__file__).parent.parent / 'templates' / 'newsletter_template.html' + with open(template_path, 'r', encoding='utf-8') as f: + template_content = f.read() + + template = Template(template_content) + + # Prepare template data + now = datetime.now() + template_data = { + 'date': now.strftime('%A, %B %d, %Y'), + 'year': now.year, + 'article_count': len(articles), + 'articles': articles, + 'unsubscribe_link': 'http://localhost:3000', # Update with actual unsubscribe link + 'website_link': 'http://localhost:3000' + } + + # Render HTML + html_content = template.render(**template_data) + + # Get all active subscribers + subscribers_cursor = subscribers_collection.find({'status': 'active'}) + subscribers = [doc['email'] for doc in subscribers_cursor] + + # Send emails + for subscriber in subscribers: + try: + msg = MIMEMultipart('alternative') + msg['Subject'] = f'Munich News Daily - {datetime.now().strftime("%B %d, %Y")}' + msg['From'] = f'Munich News Daily <{Config.EMAIL_USER}>' + msg['To'] = subscriber + msg['Date'] = datetime.now().strftime('%a, %d %b %Y %H:%M:%S %z') + msg['Message-ID'] = f'<{datetime.now().timestamp()}.{subscriber}@dongho.kim>' + msg['X-Mailer'] = 'Munich News Daily' + + # Add plain text version as fallback + plain_text = "This email requires HTML support. Please view it in an HTML-capable email client." + msg.attach(MIMEText(plain_text, 'plain', 'utf-8')) + + # Add HTML version + msg.attach(MIMEText(html_content, 'html', 'utf-8')) + + server = smtplib.SMTP(Config.SMTP_SERVER, Config.SMTP_PORT) + server.starttls() + server.login(Config.EMAIL_USER, Config.EMAIL_PASSWORD) + server.send_message(msg) + server.quit() + + print(f"Newsletter sent to {subscriber}") + except Exception as e: + print(f"Error sending to {subscriber}: {e}") diff --git a/backend/services/news_service.py b/backend/services/news_service.py new file mode 100644 index 0000000..e15018a --- /dev/null +++ b/backend/services/news_service.py @@ -0,0 +1,90 @@ +import feedparser +from datetime import datetime +from pymongo.errors import DuplicateKeyError +from database import articles_collection, rss_feeds_collection +from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date + + +def get_active_rss_feeds(): + """Get all active RSS feeds from database""" + feeds = [] + cursor = rss_feeds_collection.find({'active': True}) + for feed in cursor: + feeds.append({ + 'name': feed.get('name', ''), + 'url': feed.get('url', '') + }) + return feeds + + +def fetch_munich_news(): + """Fetch news from Munich news sources""" + articles = [] + + # Get RSS feeds from database instead of hardcoded list + sources = get_active_rss_feeds() + + for source in sources: + try: + feed = feedparser.parse(source['url']) + for entry in feed.entries[:5]: # Get top 5 from each source + # Extract article URL using utility function + article_url = extract_article_url(entry) + + if not article_url: + print(f" ⚠ No valid URL for: {entry.get('title', 'Unknown')[:50]}") + continue # Skip entries without valid URL + + # Extract summary + summary = extract_article_summary(entry) + if summary: + summary = summary[:200] + '...' if len(summary) > 200 else summary + + articles.append({ + 'title': entry.get('title', ''), + 'link': article_url, + 'summary': summary, + 'source': source['name'], + 'published': extract_published_date(entry) + }) + except Exception as e: + print(f"Error fetching from {source['name']}: {e}") + + return articles + + +def save_articles_to_db(articles): + """Save articles to MongoDB, avoiding duplicates""" + saved_count = 0 + + for article in articles: + try: + # Prepare article document + article_doc = { + 'title': article.get('title', ''), + 'link': article.get('link', ''), + 'summary': article.get('summary', ''), + 'source': article.get('source', ''), + 'published_at': article.get('published', ''), + 'created_at': datetime.utcnow() + } + + # Use update_one with upsert to handle duplicates + # This will insert if link doesn't exist, or update if it does + result = articles_collection.update_one( + {'link': article_doc['link']}, + {'$setOnInsert': article_doc}, # Only set on insert, don't update existing + upsert=True + ) + + if result.upserted_id: + saved_count += 1 + + except DuplicateKeyError: + # Link already exists, skip + pass + except Exception as e: + print(f"Error saving article {article.get('link', 'unknown')}: {e}") + + if saved_count > 0: + print(f"Saved {saved_count} new articles to database") diff --git a/backend/services/ollama_service.py b/backend/services/ollama_service.py new file mode 100644 index 0000000..b1b097c --- /dev/null +++ b/backend/services/ollama_service.py @@ -0,0 +1,96 @@ +import requests +from config import Config + + +def list_ollama_models(): + """List available models on Ollama server""" + if not Config.OLLAMA_ENABLED: + return None, "Ollama is not enabled" + + try: + url = f"{Config.OLLAMA_BASE_URL}/api/tags" + headers = {} + if Config.OLLAMA_API_KEY: + headers["Authorization"] = f"Bearer {Config.OLLAMA_API_KEY}" + + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() + + result = response.json() + models = result.get('models', []) + model_names = [model.get('name', '') for model in models] + + return model_names, None + except requests.exceptions.RequestException as e: + return None, f"Error listing models: {str(e)}" + except Exception as e: + return None, f"Unexpected error: {str(e)}" + + +def call_ollama(prompt, system_prompt=None): + """Call Ollama API to generate text""" + if not Config.OLLAMA_ENABLED: + return None, "Ollama is not enabled" + + try: + url = f"{Config.OLLAMA_BASE_URL}/api/generate" + payload = { + "model": Config.OLLAMA_MODEL, + "prompt": prompt, + "stream": False + } + + if system_prompt: + payload["system"] = system_prompt + + headers = {} + if Config.OLLAMA_API_KEY: + headers["Authorization"] = f"Bearer {Config.OLLAMA_API_KEY}" + + print(f"Calling Ollama at {url} with model {Config.OLLAMA_MODEL}") + response = requests.post(url, json=payload, headers=headers, timeout=30) + response.raise_for_status() + + result = response.json() + response_text = result.get('response', '').strip() + + if not response_text: + return None, "Ollama returned empty response" + + return response_text, None + except requests.exceptions.ConnectionError as e: + error_msg = f"Cannot connect to Ollama server at {Config.OLLAMA_BASE_URL}. Is Ollama running?" + print(f"Connection error: {error_msg}") + return None, error_msg + except requests.exceptions.Timeout: + error_msg = "Request to Ollama timed out after 30 seconds" + print(f"Timeout error: {error_msg}") + return None, error_msg + except requests.exceptions.HTTPError as e: + # Check if it's a model not found error + if e.response.status_code == 404: + try: + error_data = e.response.json() + if 'model' in error_data.get('error', '').lower() and 'not found' in error_data.get('error', '').lower(): + # Try to get available models + available_models, _ = list_ollama_models() + if available_models: + error_msg = f"Model '{Config.OLLAMA_MODEL}' not found. Available models: {', '.join(available_models)}" + else: + error_msg = f"Model '{Config.OLLAMA_MODEL}' not found. Use 'ollama list' on the server to see available models." + else: + error_msg = f"HTTP error from Ollama: {e.response.status_code} - {e.response.text}" + except (ValueError, KeyError): + error_msg = f"HTTP error from Ollama: {e.response.status_code} - {e.response.text}" + else: + error_msg = f"HTTP error from Ollama: {e.response.status_code} - {e.response.text}" + print(f"HTTP error: {error_msg}") + return None, error_msg + except requests.exceptions.RequestException as e: + error_msg = f"Request error: {str(e)}" + print(f"Request error: {error_msg}") + return None, error_msg + except Exception as e: + error_msg = f"Unexpected error: {str(e)}" + print(f"Unexpected error: {error_msg}") + return None, error_msg diff --git a/backend/templates/newsletter_template.html b/backend/templates/newsletter_template.html new file mode 100644 index 0000000..134eb81 --- /dev/null +++ b/backend/templates/newsletter_template.html @@ -0,0 +1,162 @@ + + + + + + + Munich News Daily + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + {% for article in articles %} + + + + + + {% if not loop.last %} + + + + {% endif %} + {% endfor %} + + + + + + + + + + + + + + + + +
+

+ Munich News Daily +

+

+ {{ date }} +

+
+

+ Good morning ☀️ +

+

+ Here's what's happening in Munich today. We've summarized {{ article_count }} stories using AI so you can stay informed in under 5 minutes. +

+
+
+
+ + + + + +
+ + {{ loop.index }} + +
+ + +

+ {{ article.title }} +

+ + +

+ {{ article.source }} + {% if article.author %} + • {{ article.author }} + {% endif %} +

+ + +

+ {{ article.summary }} +

+ + + + Read more → + +
+
+
+
+
+ + + + +
+

+ Today's Digest +

+

+ {{ article_count }} +

+

+ stories • AI-summarized • 5 min read +

+
+
+

+ Munich News Daily +

+

+ AI-powered news summaries for busy people.
+ Delivered daily to your inbox. +

+ + +

+ Visit Website + + Unsubscribe +

+ +

+ © {{ year }} Munich News Daily. All rights reserved. +

+
+ +
+ + + diff --git a/backend/test_rss_extraction.py b/backend/test_rss_extraction.py new file mode 100644 index 0000000..ab72362 --- /dev/null +++ b/backend/test_rss_extraction.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +""" +Test RSS feed URL extraction +Run from backend directory with venv activated: + cd backend + source venv/bin/activate # or venv\Scripts\activate on Windows + python test_rss_extraction.py +""" +from pymongo import MongoClient +from config import Config +import feedparser +from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date + +print("\n" + "="*80) +print("RSS Feed URL Extraction Test") +print("="*80) + +# Connect to database +print(f"\nConnecting to MongoDB: {Config.MONGODB_URI}") +client = MongoClient(Config.MONGODB_URI) +db = client[Config.DB_NAME] + +# Get RSS feeds +print("Fetching RSS feeds from database...") +feeds = list(db['rss_feeds'].find()) + +if not feeds: + print("\n❌ No RSS feeds in database!") + print("\nAdd a feed first:") + print(" curl -X POST http://localhost:5001/api/rss-feeds \\") + print(" -H 'Content-Type: application/json' \\") + print(" -d '{\"name\": \"Süddeutsche Politik\", \"url\": \"https://rss.sueddeutsche.de/rss/Politik\"}'") + exit(1) + +print(f"✓ Found {len(feeds)} feed(s)\n") + +# Test each feed +total_success = 0 +total_fail = 0 + +for feed_doc in feeds: + name = feed_doc.get('name', 'Unknown') + url = feed_doc.get('url', '') + active = feed_doc.get('active', True) + + print("\n" + "="*80) + print(f"Feed: {name}") + print(f"URL: {url}") + print(f"Active: {'Yes' if active else 'No'}") + print("="*80) + + if not active: + print("⏭ Skipping (inactive)") + continue + + try: + # Parse RSS + print("\nFetching RSS feed...") + feed = feedparser.parse(url) + + if not feed.entries: + print("❌ No entries found in feed") + continue + + print(f"✓ Found {len(feed.entries)} entries") + + # Test first 3 entries + print(f"\nTesting first 3 entries:") + print("-" * 80) + + for i, entry in enumerate(feed.entries[:3], 1): + print(f"\n📰 Entry {i}:") + + # Title + title = entry.get('title', 'No title') + print(f" Title: {title[:65]}") + + # Test URL extraction + article_url = extract_article_url(entry) + if article_url: + print(f" ✓ URL: {article_url}") + total_success += 1 + else: + print(f" ❌ Could not extract URL") + print(f" Available fields: {list(entry.keys())[:10]}") + print(f" link: {entry.get('link', 'N/A')}") + print(f" guid: {entry.get('guid', 'N/A')}") + print(f" id: {entry.get('id', 'N/A')}") + total_fail += 1 + + # Test summary + summary = extract_article_summary(entry) + if summary: + print(f" ✓ Summary: {summary[:70]}...") + else: + print(f" ⚠ No summary") + + # Test date + pub_date = extract_published_date(entry) + if pub_date: + print(f" ✓ Date: {pub_date}") + else: + print(f" ⚠ No date") + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +# Summary +print("\n" + "="*80) +print("SUMMARY") +print("="*80) +print(f"Total URLs tested: {total_success + total_fail}") +print(f"✓ Successfully extracted: {total_success}") +print(f"❌ Failed to extract: {total_fail}") + +if total_fail == 0: + print("\n🎉 All URLs extracted successfully!") + print("\nYou can now run the crawler:") + print(" cd ../news_crawler") + print(" pip install -r requirements.txt") + print(" python crawler_service.py 5") +else: + print(f"\n⚠ {total_fail} URL(s) could not be extracted") + print("Check the output above for details") + +print("="*80 + "\n") diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py new file mode 100644 index 0000000..dd7ee44 --- /dev/null +++ b/backend/utils/__init__.py @@ -0,0 +1 @@ +# Utils package diff --git a/backend/utils/rss_utils.py b/backend/utils/rss_utils.py new file mode 100644 index 0000000..4b024c1 --- /dev/null +++ b/backend/utils/rss_utils.py @@ -0,0 +1,98 @@ +""" +Utility functions for RSS feed processing +""" + + +def extract_article_url(entry): + """ + Extract article URL from RSS entry. + Different RSS feeds use different fields for the article URL. + + Args: + entry: feedparser entry object + + Returns: + str: Article URL or None if not found + + Examples: + - Most feeds use 'link' + - Some use 'guid' as the URL + - Some use 'id' as the URL + - Some have guid as a dict with 'href' + """ + # Try 'link' first (most common) + if entry.get('link') and entry.get('link', '').startswith('http'): + return entry.get('link') + + # Try 'guid' if it's a valid URL + if entry.get('guid'): + guid = entry.get('guid') + # guid can be a string + if isinstance(guid, str) and guid.startswith('http'): + return guid + # or a dict with 'href' + elif isinstance(guid, dict) and guid.get('href', '').startswith('http'): + return guid.get('href') + + # Try 'id' if it's a valid URL + if entry.get('id') and entry.get('id', '').startswith('http'): + return entry.get('id') + + # Try 'links' array (some feeds have multiple links) + if entry.get('links'): + for link in entry.get('links', []): + if isinstance(link, dict) and link.get('href', '').startswith('http'): + # Prefer 'alternate' type, but accept any http link + if link.get('type') == 'text/html' or link.get('rel') == 'alternate': + return link.get('href') + # If no alternate found, return first http link + for link in entry.get('links', []): + if isinstance(link, dict) and link.get('href', '').startswith('http'): + return link.get('href') + + return None + + +def extract_article_summary(entry): + """ + Extract article summary/description from RSS entry. + + Args: + entry: feedparser entry object + + Returns: + str: Article summary or empty string + """ + # Try different fields + if entry.get('summary'): + return entry.get('summary', '') + elif entry.get('description'): + return entry.get('description', '') + elif entry.get('content'): + # content is usually a list of dicts + content = entry.get('content', []) + if content and isinstance(content, list) and len(content) > 0: + return content[0].get('value', '') + + return '' + + +def extract_published_date(entry): + """ + Extract published date from RSS entry. + + Args: + entry: feedparser entry object + + Returns: + str: Published date or empty string + """ + # Try different fields + if entry.get('published'): + return entry.get('published', '') + elif entry.get('updated'): + return entry.get('updated', '') + elif entry.get('created'): + return entry.get('created', '') + + return '' diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..431dc6f --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,33 @@ +version: '3.8' + +# Production version with authentication enabled +# Usage: docker-compose -f docker-compose.prod.yml up -d + +services: + mongodb: + image: mongo:7.0 + container_name: munich-news-mongodb + restart: unless-stopped + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: admin + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASSWORD:-changeme} + MONGO_INITDB_DATABASE: munich_news + volumes: + - mongodb_data:/data/db + - mongodb_config:/data/configdb + networks: + - munich-news-network + command: mongod --bind_ip_all --auth + +volumes: + mongodb_data: + driver: local + mongodb_config: + driver: local + +networks: + munich-news-network: + driver: bridge + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..eafab4b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +version: '3.8' + +services: + mongodb: + image: mongo:7.0 + container_name: munich-news-mongodb + restart: unless-stopped + ports: + - "27017:27017" + # For development: MongoDB runs without authentication + # For production: Uncomment the environment variables below and update MONGODB_URI + # environment: + # MONGO_INITDB_ROOT_USERNAME: admin + # MONGO_INITDB_ROOT_PASSWORD: password + # MONGO_INITDB_DATABASE: munich_news + volumes: + - mongodb_data:/data/db + - mongodb_config:/data/configdb + networks: + - munich-news-network + command: mongod --bind_ip_all + +volumes: + mongodb_data: + driver: local + mongodb_config: + driver: local + +networks: + munich-news-network: + driver: bridge + diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000..3b813b1 --- /dev/null +++ b/frontend/package-lock.json @@ -0,0 +1,1320 @@ +{ + "name": "munich-news-frontend", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "munich-news-frontend", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "axios": "^1.6.2", + "express": "^4.18.2" + }, + "devDependencies": { + "nodemon": "^3.0.2" + } + }, + "node_modules/accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "license": "MIT", + "dependencies": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "license": "ISC", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", + "license": "MIT" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz", + "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.4", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/body-parser": { + "version": "1.20.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz", + "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==", + "license": "MIT", + "dependencies": { + "bytes": "3.1.2", + "content-type": "~1.0.5", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "on-finished": "2.4.1", + "qs": "6.13.0", + "raw-body": "2.5.2", + "type-is": "~1.6.18", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/brace-expansion": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz", + "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==", + "license": "MIT" + }, + "node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", + "license": "MIT", + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz", + "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==", + "license": "MIT", + "dependencies": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "1.20.3", + "content-disposition": "0.5.4", + "content-type": "~1.0.4", + "cookie": "0.7.1", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "1.3.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "merge-descriptors": "1.0.3", + "methods": "~1.1.2", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.12", + "proxy-addr": "~2.0.7", + "qs": "6.13.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "0.19.0", + "serve-static": "1.16.2", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/finalhandler": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz", + "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "statuses": "2.0.1", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/http-errors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", + "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", + "license": "MIT", + "dependencies": { + "depd": "2.0.0", + "inherits": "2.0.4", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "toidentifier": "1.0.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ignore-by-default": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/ignore-by-default/-/ignore-by-default-1.0.1.tgz", + "integrity": "sha512-Ius2VYcGNk7T90CppJqcIkS5ooHUZyIQK+ClZfMfMNFEF9VSE73Fq+906u/CWu92x4gzZMWOwfFYckPObzdEbA==", + "dev": true, + "license": "ISC" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, + "node_modules/negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/nodemon": { + "version": "3.1.10", + "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.1.10.tgz", + "integrity": "sha512-WDjw3pJ0/0jMFmyNDp3gvY2YizjLmmOUQo6DEBY+JgdvW/yQ9mEeSw6H5ythl5Ny2ytb7f9C2nIbjSxMNzbJXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "chokidar": "^3.5.2", + "debug": "^4", + "ignore-by-default": "^1.0.1", + "minimatch": "^3.1.2", + "pstree.remy": "^1.1.8", + "semver": "^7.5.3", + "simple-update-notifier": "^2.0.0", + "supports-color": "^5.5.0", + "touch": "^3.1.0", + "undefsafe": "^2.0.5" + }, + "bin": { + "nodemon": "bin/nodemon.js" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/nodemon" + } + }, + "node_modules/nodemon/node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/nodemon/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-to-regexp": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", + "license": "MIT" + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/pstree.remy": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz", + "integrity": "sha512-77DZwxQmxKnu3aR542U+X8FypNzbfJ+C5XQDk3uWjWxn6151aIMGthWYRXTqT1E5oJvg+ljaa2OJi+VfvCOQ8w==", + "dev": true, + "license": "MIT" + }, + "node_modules/qs": { + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz", + "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==", + "license": "MIT", + "dependencies": { + "bytes": "3.1.2", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/send": { + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", + "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "2.4.1", + "range-parser": "~1.2.1", + "statuses": "2.0.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/send/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/serve-static": { + "version": "1.16.2", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz", + "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.19.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/simple-update-notifier": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-2.0.0.tgz", + "integrity": "sha512-a2B9Y0KlNXl9u/vsW6sTIu9vGEpfKu2wRV6l1H3XEas/0gUIzGzBoP/IouTcUQbm9JWZLH3COxyn03TYlFax6w==", + "dev": true, + "license": "MIT", + "dependencies": { + "semver": "^7.5.3" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/statuses": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", + "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/touch": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/touch/-/touch-3.1.1.tgz", + "integrity": "sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==", + "dev": true, + "license": "ISC", + "bin": { + "nodetouch": "bin/nodetouch.js" + } + }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/undefsafe": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/undefsafe/-/undefsafe-2.0.5.tgz", + "integrity": "sha512-WxONCrssBM8TSPRqN5EmsjVrsv4A8X12J4ArBiiayv3DyyG3ZlIg6yysuuSYdZsVz3TKcTg2fd//Ujd4CHV1iA==", + "dev": true, + "license": "MIT" + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + } + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..a8d2998 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,21 @@ +{ + "name": "munich-news-frontend", + "version": "1.0.0", + "description": "Munich News Email Platform Frontend", + "main": "server.js", + "scripts": { + "start": "node server.js", + "dev": "nodemon server.js" + }, + "keywords": ["news", "munich", "email"], + "author": "", + "license": "MIT", + "dependencies": { + "express": "^4.18.2", + "axios": "^1.6.2" + }, + "devDependencies": { + "nodemon": "^3.0.2" + } +} + diff --git a/frontend/public/app.js b/frontend/public/app.js new file mode 100644 index 0000000..a56c127 --- /dev/null +++ b/frontend/public/app.js @@ -0,0 +1,170 @@ +// Load news on page load +document.addEventListener('DOMContentLoaded', () => { + loadNews(); + loadStats(); +}); + +async function loadNews() { + const newsGrid = document.getElementById('newsGrid'); + newsGrid.innerHTML = '
Loading news...
'; + + try { + const response = await fetch('/api/news'); + const data = await response.json(); + + if (data.articles && data.articles.length > 0) { + displayNews(data.articles); + } else { + newsGrid.innerHTML = '
No news available at the moment. Check back later!
'; + } + } catch (error) { + console.error('Error loading news:', error); + newsGrid.innerHTML = '
Failed to load news. Please try again later.
'; + } +} + +function displayNews(articles) { + const newsGrid = document.getElementById('newsGrid'); + newsGrid.innerHTML = ''; + + articles.forEach(article => { + const card = document.createElement('div'); + card.className = 'news-card'; + card.onclick = () => window.open(article.link, '_blank'); + + card.innerHTML = ` +
${article.source || 'Munich News'}
+

${article.title}

+

${article.summary || 'No summary available.'}

+ Read more → + `; + + newsGrid.appendChild(card); + }); +} + +async function loadStats() { + try { + const response = await fetch('/api/stats'); + const data = await response.json(); + + if (data.subscribers !== undefined) { + document.getElementById('subscriberCount').textContent = data.subscribers.toLocaleString(); + } + } catch (error) { + console.error('Error loading stats:', error); + } +} + +async function subscribe() { + const emailInput = document.getElementById('emailInput'); + const subscribeBtn = document.getElementById('subscribeBtn'); + const formMessage = document.getElementById('formMessage'); + + const email = emailInput.value.trim(); + + if (!email || !email.includes('@')) { + formMessage.textContent = 'Please enter a valid email address'; + formMessage.className = 'form-message error'; + return; + } + + subscribeBtn.disabled = true; + subscribeBtn.textContent = 'Subscribing...'; + formMessage.textContent = ''; + + try { + const response = await fetch('/api/subscribe', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ email: email }) + }); + + const data = await response.json(); + + if (response.ok) { + formMessage.textContent = data.message || 'Successfully subscribed! Check your email for confirmation.'; + formMessage.className = 'form-message success'; + emailInput.value = ''; + loadStats(); // Refresh stats + } else { + formMessage.textContent = data.error || 'Failed to subscribe. Please try again.'; + formMessage.className = 'form-message error'; + } + } catch (error) { + formMessage.textContent = 'Network error. Please try again later.'; + formMessage.className = 'form-message error'; + } finally { + subscribeBtn.disabled = false; + subscribeBtn.textContent = 'Subscribe Free'; + } +} + +// Allow Enter key to submit +document.getElementById('emailInput').addEventListener('keypress', (e) => { + if (e.key === 'Enter') { + subscribe(); + } +}); + +function showUnsubscribe() { + document.getElementById('unsubscribeModal').style.display = 'block'; +} + +function closeUnsubscribe() { + document.getElementById('unsubscribeModal').style.display = 'none'; + document.getElementById('unsubscribeEmail').value = ''; + document.getElementById('unsubscribeMessage').textContent = ''; +} + +async function unsubscribe() { + const emailInput = document.getElementById('unsubscribeEmail'); + const unsubscribeMessage = document.getElementById('unsubscribeMessage'); + + const email = emailInput.value.trim(); + + if (!email || !email.includes('@')) { + unsubscribeMessage.textContent = 'Please enter a valid email address'; + unsubscribeMessage.className = 'form-message error'; + return; + } + + try { + const response = await fetch('/api/unsubscribe', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ email: email }) + }); + + const data = await response.json(); + + if (response.ok) { + unsubscribeMessage.textContent = data.message || 'Successfully unsubscribed.'; + unsubscribeMessage.className = 'form-message success'; + emailInput.value = ''; + setTimeout(() => { + closeUnsubscribe(); + loadStats(); + }, 2000); + } else { + unsubscribeMessage.textContent = data.error || 'Failed to unsubscribe. Please try again.'; + unsubscribeMessage.className = 'form-message error'; + } + } catch (error) { + unsubscribeMessage.textContent = 'Network error. Please try again later.'; + unsubscribeMessage.className = 'form-message error'; + } +} + +// Close modal when clicking outside +window.onclick = function(event) { + const modal = document.getElementById('unsubscribeModal'); + if (event.target === modal) { + closeUnsubscribe(); + } +} + diff --git a/frontend/public/index.html b/frontend/public/index.html new file mode 100644 index 0000000..de91c4a --- /dev/null +++ b/frontend/public/index.html @@ -0,0 +1,65 @@ + + + + + + Munich News Daily - Your Daily Dose of Munich News + + + +
+
+
+

📰 Munich News Daily

+

Get the latest Munich news delivered to your inbox every morning

+

Stay informed about what's happening in Munich with our curated daily newsletter. No fluff, just the news that matters.

+ +
+ + +

+
+ +
+
+ - + Subscribers +
+
+
+
+ +
+

Latest Munich News

+
+
Loading news...
+
+
+ +
+

© 2024 Munich News Daily. Made with ❤️ for Munich.

+

Unsubscribe

+
+
+ + + + + + + + diff --git a/frontend/public/styles.css b/frontend/public/styles.css new file mode 100644 index 0000000..07d6ae9 --- /dev/null +++ b/frontend/public/styles.css @@ -0,0 +1,306 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; + line-height: 1.6; + color: #333; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + min-height: 100vh; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; +} + +.hero { + text-align: center; + padding: 60px 20px; + color: white; +} + +.hero-content { + max-width: 700px; + margin: 0 auto; +} + +.hero h1 { + font-size: 3.5rem; + margin-bottom: 20px; + font-weight: 700; +} + +.tagline { + font-size: 1.5rem; + margin-bottom: 15px; + font-weight: 300; +} + +.description { + font-size: 1.1rem; + margin-bottom: 40px; + opacity: 0.9; +} + +.subscription-form { + display: flex; + flex-direction: column; + gap: 15px; + max-width: 500px; + margin: 0 auto 40px; +} + +.subscription-form input { + padding: 15px 20px; + font-size: 1rem; + border: none; + border-radius: 8px; + outline: none; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +.subscription-form button { + padding: 15px 30px; + font-size: 1.1rem; + font-weight: 600; + background: #ff6b6b; + color: white; + border: none; + border-radius: 8px; + cursor: pointer; + transition: all 0.3s ease; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +.subscription-form button:hover { + background: #ff5252; + transform: translateY(-2px); + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15); +} + +.subscription-form button:active { + transform: translateY(0); +} + +.form-message { + margin-top: 10px; + font-size: 0.9rem; + min-height: 20px; +} + +.form-message.success { + color: #4caf50; +} + +.form-message.error { + color: #f44336; +} + +.stats { + display: flex; + justify-content: center; + gap: 40px; + margin-top: 40px; +} + +.stat-item { + text-align: center; +} + +.stat-number { + display: block; + font-size: 2.5rem; + font-weight: 700; + margin-bottom: 5px; +} + +.stat-label { + font-size: 0.9rem; + opacity: 0.8; +} + +.news-section { + background: white; + border-radius: 20px; + padding: 40px; + margin: 40px 0; + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2); +} + +.news-section h2 { + font-size: 2rem; + margin-bottom: 30px; + color: #333; + text-align: center; +} + +.news-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); + gap: 25px; +} + +.news-card { + background: #f8f9fa; + border-radius: 12px; + padding: 20px; + transition: all 0.3s ease; + border-left: 4px solid #667eea; + cursor: pointer; +} + +.news-card:hover { + transform: translateY(-5px); + box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1); + background: white; +} + +.news-card h3 { + font-size: 1.2rem; + margin-bottom: 10px; + color: #333; + line-height: 1.4; +} + +.news-card p { + color: #666; + font-size: 0.95rem; + margin-bottom: 15px; + line-height: 1.5; +} + +.news-card .source { + font-size: 0.85rem; + color: #667eea; + font-weight: 600; + margin-bottom: 10px; +} + +.news-card .read-more { + color: #667eea; + text-decoration: none; + font-weight: 600; + font-size: 0.9rem; + display: inline-block; + margin-top: 10px; +} + +.news-card .read-more:hover { + text-decoration: underline; +} + +.loading { + text-align: center; + padding: 40px; + color: #666; + font-size: 1.1rem; + grid-column: 1 / -1; +} + +footer { + text-align: center; + padding: 40px 20px; + color: white; +} + +footer a { + color: white; + text-decoration: underline; + cursor: pointer; +} + +footer a:hover { + opacity: 0.8; +} + +/* Modal Styles */ +.modal { + display: none; + position: fixed; + z-index: 1000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.5); + backdrop-filter: blur(5px); +} + +.modal-content { + background-color: white; + margin: 15% auto; + padding: 30px; + border-radius: 12px; + width: 90%; + max-width: 500px; + box-shadow: 0 10px 40px rgba(0, 0, 0, 0.3); +} + +.close { + color: #aaa; + float: right; + font-size: 28px; + font-weight: bold; + cursor: pointer; +} + +.close:hover { + color: #000; +} + +.modal-content h2 { + margin-bottom: 20px; + color: #333; +} + +.modal-content input { + width: 100%; + padding: 12px; + margin: 15px 0; + border: 2px solid #ddd; + border-radius: 8px; + font-size: 1rem; +} + +.modal-content button { + width: 100%; + padding: 12px; + background: #ff6b6b; + color: white; + border: none; + border-radius: 8px; + font-size: 1rem; + font-weight: 600; + cursor: pointer; + margin-top: 10px; +} + +.modal-content button:hover { + background: #ff5252; +} + +/* Responsive Design */ +@media (max-width: 768px) { + .hero h1 { + font-size: 2.5rem; + } + + .tagline { + font-size: 1.2rem; + } + + .news-grid { + grid-template-columns: 1fr; + } + + .stats { + flex-direction: column; + gap: 20px; + } +} + diff --git a/frontend/server.js b/frontend/server.js new file mode 100644 index 0000000..777bd26 --- /dev/null +++ b/frontend/server.js @@ -0,0 +1,57 @@ +const express = require('express'); +const path = require('path'); +const axios = require('axios'); + +const app = express(); +const PORT = process.env.PORT || 3000; +const API_URL = process.env.API_URL || 'http://localhost:5001'; + +// Serve static files +app.use(express.static('public')); +app.use(express.json()); + +// API proxy +app.get('/api/news', async (req, res) => { + try { + const response = await axios.get(`${API_URL}/api/news`); + res.json(response.data); + } catch (error) { + res.status(500).json({ error: 'Failed to fetch news' }); + } +}); + +app.get('/api/stats', async (req, res) => { + try { + const response = await axios.get(`${API_URL}/api/stats`); + res.json(response.data); + } catch (error) { + res.status(500).json({ error: 'Failed to fetch stats' }); + } +}); + +app.post('/api/subscribe', async (req, res) => { + try { + const response = await axios.post(`${API_URL}/api/subscribe`, req.body); + res.json(response.data); + } catch (error) { + res.status(error.response?.status || 500).json( + error.response?.data || { error: 'Failed to subscribe' } + ); + } +}); + +app.post('/api/unsubscribe', async (req, res) => { + try { + const response = await axios.post(`${API_URL}/api/unsubscribe`, req.body); + res.json(response.data); + } catch (error) { + res.status(error.response?.status || 500).json( + error.response?.data || { error: 'Failed to unsubscribe' } + ); + } +}); + +app.listen(PORT, () => { + console.log(`Frontend server running on http://localhost:${PORT}`); +}); + diff --git a/news_crawler/.gitignore b/news_crawler/.gitignore new file mode 100644 index 0000000..63dcbcd --- /dev/null +++ b/news_crawler/.gitignore @@ -0,0 +1,25 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# Environment variables +.env +.env.local + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/news_crawler/CHANGES.md b/news_crawler/CHANGES.md new file mode 100644 index 0000000..0e48270 --- /dev/null +++ b/news_crawler/CHANGES.md @@ -0,0 +1,191 @@ +# Recent Changes - Full Content Storage + +## ✅ What Changed + +### 1. Removed Content Length Limit +**Before:** +```python +'content': content_text[:10000] # Limited to 10k chars +``` + +**After:** +```python +'content': content_text # Full content, no limit +``` + +### 2. Simplified Database Schema +**Before:** +```javascript +{ + summary: String, // Short summary + full_content: String // Limited content +} +``` + +**After:** +```javascript +{ + content: String // Full article content, no limit +} +``` + +### 3. Enhanced API Response +**Before:** +```javascript +{ + title: "...", + link: "...", + summary: "..." +} +``` + +**After:** +```javascript +{ + title: "...", + author: "...", // NEW! + link: "...", + preview: "...", // First 200 chars + word_count: 1250, // NEW! + has_full_content: true // NEW! +} +``` + +## 📊 Database Structure + +### Articles Collection +```javascript +{ + _id: ObjectId, + title: String, // Article title + author: String, // Article author (extracted) + link: String, // Article URL (unique) + content: String, // FULL article content (no limit) + word_count: Number, // Word count + source: String, // RSS feed name + published_at: String, // Publication date + crawled_at: DateTime, // When crawled + created_at: DateTime // When added +} +``` + +## 🆕 New API Endpoint + +### GET /api/news/ +Get full article content by URL. + +**Example:** +```bash +# URL encode the article URL +curl "http://localhost:5001/api/news/https%3A%2F%2Fexample.com%2Farticle" +``` + +**Response:** +```json +{ + "title": "New U-Bahn Line Opens in Munich", + "author": "Max Mustermann", + "link": "https://example.com/article", + "content": "The full article text here... (complete, no truncation)", + "word_count": 1250, + "source": "Süddeutsche Zeitung München", + "published_at": "2024-11-10T10:00:00Z", + "crawled_at": "2024-11-10T16:30:00Z", + "created_at": "2024-11-10T16:00:00Z" +} +``` + +## 📈 Enhanced Stats + +### GET /api/stats +Now includes crawled article count: + +```json +{ + "subscribers": 150, + "articles": 500, + "crawled_articles": 350 // NEW! +} +``` + +## 🎯 Benefits + +1. **Complete Content** - No truncation, full articles stored +2. **Better for AI** - Full context for summarization/analysis +3. **Cleaner Schema** - Single `content` field instead of `summary` + `full_content` +4. **More Metadata** - Author, word count, crawl timestamp +5. **Better API** - Preview in list, full content on demand + +## 🔄 Migration + +If you have existing articles with `full_content` field, they will continue to work. New articles will use the `content` field. + +To migrate old articles: +```javascript +// MongoDB shell +db.articles.updateMany( + { full_content: { $exists: true } }, + [ + { + $set: { + content: "$full_content" + } + }, + { + $unset: ["full_content", "summary"] + } + ] +) +``` + +## 🚀 Usage + +### Crawl Articles +```bash +cd news_crawler +python crawler_service.py 10 +``` + +### Get Article List (with previews) +```bash +curl http://localhost:5001/api/news +``` + +### Get Full Article Content +```bash +# Get the article URL from the list, then: +curl "http://localhost:5001/api/news/" +``` + +### Check Stats +```bash +curl http://localhost:5001/api/stats +``` + +## 📝 Example Workflow + +1. **Add RSS Feed** +```bash +curl -X POST http://localhost:5001/api/rss-feeds \ + -H "Content-Type: application/json" \ + -d '{"name": "News Source", "url": "https://example.com/rss"}' +``` + +2. **Crawl Articles** +```bash +cd news_crawler +python crawler_service.py 10 +``` + +3. **View Articles** +```bash +curl http://localhost:5001/api/news +``` + +4. **Get Full Content** +```bash +# Copy article link from above, URL encode it +curl "http://localhost:5001/api/news/https%3A%2F%2Fexample.com%2Farticle" +``` + +Now you have complete article content ready for AI processing! 🎉 diff --git a/news_crawler/Dockerfile b/news_crawler/Dockerfile new file mode 100644 index 0000000..4d14a0a --- /dev/null +++ b/news_crawler/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy crawler service +COPY crawler_service.py . + +# Run crawler +CMD ["python", "crawler_service.py"] diff --git a/news_crawler/EXTRACTION_STRATEGIES.md b/news_crawler/EXTRACTION_STRATEGIES.md new file mode 100644 index 0000000..45058c9 --- /dev/null +++ b/news_crawler/EXTRACTION_STRATEGIES.md @@ -0,0 +1,353 @@ +# Content Extraction Strategies + +The crawler uses multiple strategies to dynamically extract article metadata from any website. + +## 🎯 What Gets Extracted + +1. **Title** - Article headline +2. **Author** - Article writer/journalist +3. **Published Date** - When article was published +4. **Content** - Main article text +5. **Description** - Meta description/summary + +## 📋 Extraction Strategies + +### 1. Title Extraction + +Tries multiple methods in order of reliability: + +#### Strategy 1: H1 Tag +```html +

Article Title Here

+``` +✅ Most reliable - usually the main headline + +#### Strategy 2: Open Graph Meta Tag +```html + +``` +✅ Used by Facebook, very reliable + +#### Strategy 3: Twitter Card Meta Tag +```html + +``` +✅ Used by Twitter, reliable + +#### Strategy 4: Title Tag (Fallback) +```html +Article Title | Site Name +``` +⚠️ Often includes site name, needs cleaning + +**Cleaning:** +- Removes " | Site Name" +- Removes " - Site Name" + +--- + +### 2. Author Extraction + +Tries multiple methods: + +#### Strategy 1: Meta Author Tag +```html + +``` +✅ Standard HTML meta tag + +#### Strategy 2: Rel="author" Link +```html + +``` +✅ Semantic HTML + +#### Strategy 3: Common Class Names +```html +
John Doe
+ +

John Doe

+``` +✅ Searches for: author-name, author, byline, writer + +#### Strategy 4: Schema.org Markup +```html + +``` +✅ Structured data + +#### Strategy 5: JSON-LD Structured Data +```html + +``` +✅ Most structured, very reliable + +**Cleaning:** +- Removes "By " prefix +- Validates length (< 100 chars) + +--- + +### 3. Date Extraction + +Tries multiple methods: + +#### Strategy 1: Time Tag with Datetime +```html + +``` +✅ Most reliable - ISO format + +#### Strategy 2: Article Published Time Meta +```html + +``` +✅ Open Graph standard + +#### Strategy 3: OG Published Time +```html + +``` +✅ Facebook standard + +#### Strategy 4: Common Class Names +```html +November 10, 2024 + +
10:00 AM, Nov 10
+``` +✅ Searches for: publish-date, published, date, timestamp + +#### Strategy 5: Schema.org Markup +```html + +``` +✅ Structured data + +#### Strategy 6: JSON-LD Structured Data +```html + +``` +✅ Most structured + +--- + +### 4. Content Extraction + +Tries multiple methods: + +#### Strategy 1: Semantic HTML Tags +```html +
+

Article content here...

+
+``` +✅ Best practice HTML5 + +#### Strategy 2: Common Class Names +```html +
...
+
...
+
...
+
...
+
...
+``` +✅ Searches for common patterns + +#### Strategy 3: Schema.org Markup +```html +
+

Content here...

+
+``` +✅ Structured data + +#### Strategy 4: Main Tag +```html +
+

Content here...

+
+``` +✅ Semantic HTML5 + +#### Strategy 5: Body Tag (Fallback) +```html + +

Content here...

+ +``` +⚠️ Last resort, may include navigation + +**Content Filtering:** +- Removes `