update

2025-11-18 14:45:41 +01:00
parent 2e80d64ff6
commit 84fce9a82c
19 changed files with 2437 additions and 3 deletions
--- a/.env.local
+++ b/.env.local
@@ -0,0 +1,9 @@
+# Munich News Daily - Local Development Environment Variables
+
+# MongoDB Configuration
+MONGO_USERNAME=admin
+MONGO_PASSWORD=local123
+MONGO_AUTH=--auth
+
+# Ollama Model (use smaller/faster model for local dev)
+OLLAMA_MODEL=phi3:latest
--- a/.gitignore
+++ b/.gitignore
@@ -84,7 +84,9 @@ yarn.lock
 .env.production.local
 *.env
 !.env.example
+!.env.local
 !backend/.env.example
+!backend/.env.local

 # ===================================
 # Database
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ A fully automated news aggregation and newsletter system that crawls Munich news
 - **🤖 AI-Powered Clustering** - Automatically detects duplicate stories from different sources
 - **📰 Neutral Summaries** - Combines multiple perspectives into balanced coverage
 - **🎯 Smart Prioritization** - Shows most important stories first (multi-source coverage)
+- **🎨 Personalized Newsletters** - AI-powered content recommendations based on user interests
 - **📊 Engagement Tracking** - Open rates, click tracking, and analytics
 - **⚡ GPU Acceleration** - 5-10x faster AI processing with GPU support
 - **🔒 GDPR Compliant** - Privacy-first with data retention controls
@@ -365,6 +366,8 @@ curl -X POST http://localhost:5001/api/tracking/subscriber/user@example.com/opt-

 ### Core Features
 - **[docs/AI_NEWS_AGGREGATION.md](docs/AI_NEWS_AGGREGATION.md)** - AI-powered clustering & neutral summaries
+- **[docs/PERSONALIZATION.md](docs/PERSONALIZATION.md)** - Personalized newsletter system
+- **[docs/PERSONALIZATION_COMPLETE.md](docs/PERSONALIZATION_COMPLETE.md)** - Personalization implementation guide
 - **[docs/FEATURES.md](docs/FEATURES.md)** - Complete feature list
 - **[docs/API.md](docs/API.md)** - API endpoints reference

@@ -399,6 +402,9 @@ docker-compose exec sender python tests/sender/test_tracking_integration.py

 # Run backend tests
 docker-compose exec backend python tests/backend/test_tracking.py
+
+# Test personalization system (all 4 phases)
+docker exec munich-news-local-backend python test_personalization_system.py
 ```

 ## 🚀 Production Deployment
--- a/backend/.env.local
+++ b/backend/.env.local
@@ -0,0 +1,30 @@
+# Munich News Daily - Local Development Backend Configuration
+
+# MongoDB Configuration
+MONGODB_URI=mongodb://admin:changeme@mongodb:27017/
+
+# Email Configuration (use test credentials or disable)
+SMTP_SERVER=localhost
+SMTP_PORT=587
+EMAIL_USER=test@localhost
+EMAIL_PASSWORD=test123
+
+# Newsletter Settings
+NEWSLETTER_MAX_ARTICLES=5
+NEWSLETTER_HOURS_LOOKBACK=24
+WEBSITE_URL=http://localhost:3000
+
+# Tracking Configuration
+TRACKING_ENABLED=true
+TRACKING_API_URL=http://localhost:5001
+TRACKING_DATA_RETENTION_DAYS=90
+
+# Ollama Configuration (AI Summarization)
+OLLAMA_ENABLED=true
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_MODEL=phi3:latest
+OLLAMA_TIMEOUT=120
+SUMMARY_MAX_WORDS=150
+
+# Flask Server Configuration
+FLASK_PORT=5001
--- a/backend/app.py
+++ b/backend/app.py
@@ -11,6 +11,8 @@ from routes.tracking_routes import tracking_bp
 from routes.analytics_routes import analytics_bp
 from routes.admin_routes import admin_bp
 from routes.transport_routes import transport_bp
+from routes.interests_routes import interests_bp
+from routes.personalization_routes import personalization_bp

 # Initialize Flask app
 app = Flask(__name__)
@@ -29,6 +31,8 @@ app.register_blueprint(tracking_bp)
 app.register_blueprint(analytics_bp)
 app.register_blueprint(admin_bp)
 app.register_blueprint(transport_bp)
+app.register_blueprint(interests_bp)
+app.register_blueprint(personalization_bp)

 # Health check endpoint
@app.route('/health')
--- a/backend/routes/interests_routes.py
+++ b/backend/routes/interests_routes.py
@@ -0,0 +1,239 @@
+"""
+User Interest Profile API routes for Munich News Daily.
+Provides endpoints to view and manage user interest profiles.
+"""
+
+from flask import Blueprint, request, jsonify
+from services.interest_profiling_service import (
+    get_user_interests,
+    get_top_interests,
+    build_interests_from_history,
+    decay_user_interests,
+    get_interest_statistics,
+    delete_user_interests
+)
+
+interests_bp = Blueprint('interests', __name__)
+
+
+@interests_bp.route('/api/interests/<email>', methods=['GET'])
+def get_interests(email):
+    """
+    Get user interest profile.
+    
+    Args:
+        email: Email address of the user
+        
+    Returns:
+        JSON response with user interest profile
+    """
+    try:
+        profile = get_user_interests(email)
+        
+        if not profile:
+            return jsonify({
+                'success': False,
+                'error': 'User profile not found'
+            }), 404
+        
+        # Remove MongoDB _id field
+        if '_id' in profile:
+            del profile['_id']
+        
+        return jsonify({
+            'success': True,
+            'profile': profile
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@interests_bp.route('/api/interests/<email>/top', methods=['GET'])
+def get_top_user_interests(email):
+    """
+    Get user's top interests sorted by score.
+    
+    Query parameters:
+        top_n: Number of top interests to return (default: 10)
+        
+    Args:
+        email: Email address of the user
+        
+    Returns:
+        JSON response with top categories and keywords
+    """
+    try:
+        top_n = request.args.get('top_n', 10, type=int)
+        
+        top_interests = get_top_interests(email, top_n)
+        
+        return jsonify({
+            'success': True,
+            'email': email,
+            'top_categories': [
+                {'category': cat, 'score': score}
+                for cat, score in top_interests['top_categories']
+            ],
+            'top_keywords': [
+                {'keyword': kw, 'score': score}
+                for kw, score in top_interests['top_keywords']
+            ]
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@interests_bp.route('/api/interests/<email>/rebuild', methods=['POST'])
+def rebuild_interests(email):
+    """
+    Rebuild user interest profile from click history.
+    
+    Request body (optional):
+        {
+            "days_lookback": 30  // Number of days of history to analyze
+        }
+        
+    Args:
+        email: Email address of the user
+        
+    Returns:
+        JSON response with rebuilt profile
+    """
+    try:
+        data = request.get_json() or {}
+        days_lookback = data.get('days_lookback', 30)
+        
+        # Validate days_lookback
+        if not isinstance(days_lookback, int) or days_lookback < 1:
+            return jsonify({
+                'success': False,
+                'error': 'days_lookback must be a positive integer'
+            }), 400
+        
+        profile = build_interests_from_history(email, days_lookback)
+        
+        # Remove MongoDB _id field
+        if '_id' in profile:
+            del profile['_id']
+        
+        return jsonify({
+            'success': True,
+            'message': f'Profile rebuilt from {days_lookback} days of history',
+            'profile': profile
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@interests_bp.route('/api/interests/decay', methods=['POST'])
+def decay_interests():
+    """
+    Decay interest scores for inactive users.
+    
+    Request body (optional):
+        {
+            "decay_factor": 0.95,      // Multiplier for scores (default: 0.95)
+            "days_threshold": 7        // Only decay profiles older than N days
+        }
+        
+    Returns:
+        JSON response with decay statistics
+    """
+    try:
+        data = request.get_json() or {}
+        decay_factor = data.get('decay_factor', 0.95)
+        days_threshold = data.get('days_threshold', 7)
+        
+        # Validate parameters
+        if not isinstance(decay_factor, (int, float)) or decay_factor <= 0 or decay_factor > 1:
+            return jsonify({
+                'success': False,
+                'error': 'decay_factor must be between 0 and 1'
+            }), 400
+        
+        if not isinstance(days_threshold, int) or days_threshold < 1:
+            return jsonify({
+                'success': False,
+                'error': 'days_threshold must be a positive integer'
+            }), 400
+        
+        result = decay_user_interests(decay_factor, days_threshold)
+        
+        return jsonify({
+            'success': True,
+            'message': f'Decayed interests for profiles older than {days_threshold} days',
+            'statistics': result
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@interests_bp.route('/api/interests/statistics', methods=['GET'])
+def get_statistics():
+    """
+    Get statistics about user interests across all users.
+    
+    Returns:
+        JSON response with interest statistics
+    """
+    try:
+        stats = get_interest_statistics()
+        
+        return jsonify({
+            'success': True,
+            'statistics': stats
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@interests_bp.route('/api/interests/<email>', methods=['DELETE'])
+def delete_interests(email):
+    """
+    Delete user interest profile (GDPR compliance).
+    
+    Args:
+        email: Email address of the user
+        
+    Returns:
+        JSON response with confirmation
+    """
+    try:
+        deleted = delete_user_interests(email)
+        
+        if not deleted:
+            return jsonify({
+                'success': False,
+                'error': 'User profile not found'
+            }), 404
+        
+        return jsonify({
+            'success': True,
+            'message': f'Interest profile deleted for {email}'
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
--- a/backend/routes/personalization_routes.py
+++ b/backend/routes/personalization_routes.py
@@ -0,0 +1,135 @@
+"""
+Personalization API routes for Munich News Daily.
+Provides endpoints to test and preview personalized content.
+"""
+
+from flask import Blueprint, request, jsonify
+from datetime import datetime, timedelta
+from database import articles_collection
+from services.personalization_service import (
+    rank_articles_for_user,
+    select_personalized_articles,
+    get_personalization_explanation,
+    get_personalization_stats
+)
+
+personalization_bp = Blueprint('personalization', __name__)
+
+
+@personalization_bp.route('/api/personalize/preview/<email>', methods=['GET'])
+def preview_personalized_newsletter(email):
+    """
+    Preview personalized newsletter for a user.
+    
+    Query parameters:
+        max_articles: Maximum articles to return (default: 10)
+        hours_lookback: Hours of articles to consider (default: 24)
+        
+    Returns:
+        JSON with personalized article selection and statistics
+    """
+    try:
+        max_articles = request.args.get('max_articles', 10, type=int)
+        hours_lookback = request.args.get('hours_lookback', 24, type=int)
+        
+        # Get recent articles
+        cutoff_date = datetime.utcnow() - timedelta(hours=hours_lookback)
+        articles = list(articles_collection.find({
+            'created_at': {'$gte': cutoff_date},
+            'summary': {'$exists': True, '$ne': None}
+        }).sort('created_at', -1))
+
+        
+        # Select personalized articles
+        personalized = select_personalized_articles(
+            articles,
+            email,
+            max_articles=max_articles
+        )
+        
+        # Get statistics
+        stats = get_personalization_stats(personalized, email)
+        
+        # Format response
+        articles_response = []
+        for article in personalized:
+            articles_response.append({
+                'title': article.get('title', ''),
+                'title_en': article.get('title_en'),
+                'summary': article.get('summary', ''),
+                'link': article.get('link', ''),
+                'category': article.get('category', 'general'),
+                'keywords': article.get('keywords', []),
+                'personalization_score': article.get('personalization_score', 0.0),
+                'published_at': article.get('published_at', '')
+            })
+        
+        return jsonify({
+            'success': True,
+            'email': email,
+            'articles': articles_response,
+            'statistics': stats
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@personalization_bp.route('/api/personalize/explain', methods=['POST'])
+def explain_recommendation():
+    """
+    Explain why an article was recommended to a user.
+    
+    Request body:
+        {
+            "email": "user@example.com",
+            "article_id": "article-id-here"
+        }
+        
+    Returns:
+        JSON with explanation of recommendation
+    """
+    try:
+        data = request.get_json()
+        
+        if not data or 'email' not in data or 'article_id' not in data:
+            return jsonify({
+                'success': False,
+                'error': 'email and article_id required'
+            }), 400
+        
+        email = data['email']
+        article_id = data['article_id']
+        
+        # Get article
+        from bson import ObjectId
+        article = articles_collection.find_one({'_id': ObjectId(article_id)})
+        
+        if not article:
+            return jsonify({
+                'success': False,
+                'error': 'Article not found'
+            }), 404
+        
+        # Get user interests
+        from services.interest_profiling_service import get_user_interests
+        user_interests = get_user_interests(email)
+        
+        # Generate explanation
+        explanation = get_personalization_explanation(article, user_interests)
+        
+        return jsonify({
+            'success': True,
+            'email': email,
+            'article_title': article.get('title', ''),
+            'explanation': explanation
+        }), 200
+        
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
--- a/backend/routes/tracking_routes.py
+++ b/backend/routes/tracking_routes.py
@@ -79,8 +79,8 @@ def track_click(tracking_id):
    """
    Track link clicks and redirect to original article URL.
    
-    Logs the click event and redirects the user to the original article URL.
-    Handles invalid tracking_id by redirecting to homepage.
+    Logs the click event, updates user interest profile, and redirects the user
+    to the original article URL. Handles invalid tracking_id by redirecting to homepage.
    Ensures redirect completes within 200ms.
    
    Args:
@@ -115,6 +115,19 @@ def track_click(tracking_id):
                    }
                }
            )
+            
+            # Update user interest profile (Phase 3)
+            subscriber_email = tracking_record.get('subscriber_email')
+            keywords = tracking_record.get('keywords', [])
+            category = tracking_record.get('category', 'general')
+            
+            if subscriber_email and subscriber_email != 'anonymized':
+                try:
+                    from services.interest_profiling_service import update_user_interests
+                    update_user_interests(subscriber_email, keywords, category)
+                except Exception as e:
+                    # Don't fail the redirect if interest update fails
+                    print(f"Error updating user interests: {str(e)}")
    except Exception as e:
        # Log error but still redirect
        print(f"Error tracking click for {tracking_id}: {str(e)}")
--- a/backend/services/interest_profiling_service.py
+++ b/backend/services/interest_profiling_service.py
@@ -0,0 +1,323 @@
+"""
+User Interest Profiling Service for Munich News Daily.
+Builds and maintains user interest profiles based on article click behavior.
+"""
+
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional
+from database import link_clicks_collection
+from pymongo import MongoClient
+from config import Config
+
+# Connect to MongoDB
+client = MongoClient(Config.MONGODB_URI)
+db = client[Config.DB_NAME]
+user_interests_collection = db['user_interests']
+
+
+def update_user_interests(subscriber_email: str, keywords: List[str], category: str) -> Dict:
+    """
+    Update user interest profile based on a clicked article.
+    
+    Increments interest scores for the article's keywords and category.
+    Creates a new profile if the user doesn't have one yet.
+    
+    Args:
+        subscriber_email: Email address of the user
+        keywords: List of keywords from the clicked article
+        category: Category of the clicked article
+        
+    Returns:
+        dict: Updated user interest profile
+    """
+    current_time = datetime.utcnow()
+    
+    # Get existing profile or create new one
+    profile = user_interests_collection.find_one({'email': subscriber_email})
+    
+    if not profile:
+        # Create new profile
+        profile = {
+            'email': subscriber_email,
+            'categories': {},
+            'keywords': {},
+            'total_clicks': 0,
+            'last_updated': current_time,
+            'created_at': current_time
+        }
+    
+    # Update category interest (increment by 0.1, max 1.0)
+    current_category_score = profile['categories'].get(category, 0.0)
+    profile['categories'][category] = min(current_category_score + 0.1, 1.0)
+    
+    # Update keyword interests (increment by 0.1, max 1.0)
+    for keyword in keywords:
+        if keyword:  # Skip empty keywords
+            current_keyword_score = profile['keywords'].get(keyword, 0.0)
+            profile['keywords'][keyword] = min(current_keyword_score + 0.1, 1.0)
+    
+    # Update metadata
+    profile['total_clicks'] = profile.get('total_clicks', 0) + 1
+    profile['last_updated'] = current_time
+    
+    # Upsert profile
+    user_interests_collection.update_one(
+        {'email': subscriber_email},
+        {'$set': profile},
+        upsert=True
+    )
+    
+    return profile
+
+
+def get_user_interests(subscriber_email: str) -> Optional[Dict]:
+    """
+    Get user interest profile.
+    
+    Args:
+        subscriber_email: Email address of the user
+        
+    Returns:
+        dict: User interest profile or None if not found
+    """
+    return user_interests_collection.find_one({'email': subscriber_email})
+
+
+def decay_user_interests(decay_factor: float = 0.95, days_threshold: int = 7) -> Dict[str, int]:
+    """
+    Decay interest scores for users who haven't clicked recently.
+    
+    Reduces interest scores over time to reflect changing interests.
+    Only decays profiles that haven't been updated in the last N days.
+    
+    Args:
+        decay_factor: Multiplier for interest scores (default: 0.95 = 5% decay)
+        days_threshold: Only decay profiles older than this many days (default: 7)
+        
+    Returns:
+        dict: Statistics about the decay operation
+            - profiles_decayed: Number of profiles that were decayed
+            - profiles_checked: Total number of profiles checked
+    """
+    cutoff_date = datetime.utcnow() - timedelta(days=days_threshold)
+    
+    # Find profiles that haven't been updated recently
+    old_profiles = user_interests_collection.find({
+        'last_updated': {'$lt': cutoff_date}
+    })
+    
+    profiles_decayed = 0
+    profiles_checked = 0
+    
+    for profile in old_profiles:
+        profiles_checked += 1
+        
+        # Decay category scores
+        decayed_categories = {}
+        for category, score in profile.get('categories', {}).items():
+            new_score = score * decay_factor
+            # Remove categories with very low scores (< 0.05)
+            if new_score >= 0.05:
+                decayed_categories[category] = round(new_score, 3)
+        
+        # Decay keyword scores
+        decayed_keywords = {}
+        for keyword, score in profile.get('keywords', {}).items():
+            new_score = score * decay_factor
+            # Remove keywords with very low scores (< 0.05)
+            if new_score >= 0.05:
+                decayed_keywords[keyword] = round(new_score, 3)
+        
+        # Update profile with decayed scores
+        user_interests_collection.update_one(
+            {'email': profile['email']},
+            {
+                '$set': {
+                    'categories': decayed_categories,
+                    'keywords': decayed_keywords,
+                    'last_decayed': datetime.utcnow()
+                }
+            }
+        )
+        
+        profiles_decayed += 1
+    
+    return {
+        'profiles_decayed': profiles_decayed,
+        'profiles_checked': profiles_checked
+    }
+
+
+def get_top_interests(subscriber_email: str, top_n: int = 10) -> Dict[str, List[tuple]]:
+    """
+    Get user's top interests sorted by score.
+    
+    Args:
+        subscriber_email: Email address of the user
+        top_n: Number of top interests to return (default: 10)
+        
+    Returns:
+        dict: Top interests containing:
+            - top_categories: List of (category, score) tuples
+            - top_keywords: List of (keyword, score) tuples
+    """
+    profile = get_user_interests(subscriber_email)
+    
+    if not profile:
+        return {
+            'top_categories': [],
+            'top_keywords': []
+        }
+    
+    # Sort categories by score
+    categories = profile.get('categories', {})
+    top_categories = sorted(categories.items(), key=lambda x: x[1], reverse=True)[:top_n]
+    
+    # Sort keywords by score
+    keywords = profile.get('keywords', {})
+    top_keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True)[:top_n]
+    
+    return {
+        'top_categories': top_categories,
+        'top_keywords': top_keywords
+    }
+
+
+def build_interests_from_history(subscriber_email: str, days_lookback: int = 30) -> Dict:
+    """
+    Build or rebuild user interest profile from click history.
+    
+    Useful for:
+    - Initializing profiles for existing users
+    - Rebuilding profiles after algorithm changes
+    - Backfilling data
+    
+    Args:
+        subscriber_email: Email address of the user
+        days_lookback: Number of days of history to analyze (default: 30)
+        
+    Returns:
+        dict: Newly built interest profile
+    """
+    cutoff_date = datetime.utcnow() - timedelta(days=days_lookback)
+    
+    # Get all clicks from this user in the lookback period
+    clicks = link_clicks_collection.find({
+        'subscriber_email': subscriber_email,
+        'clicked': True,
+        'clicked_at': {'$gte': cutoff_date}
+    })
+    
+    # Initialize profile
+    profile = {
+        'email': subscriber_email,
+        'categories': {},
+        'keywords': {},
+        'total_clicks': 0,
+        'last_updated': datetime.utcnow(),
+        'created_at': datetime.utcnow()
+    }
+    
+    # Process each click
+    for click in clicks:
+        category = click.get('category', 'general')
+        keywords = click.get('keywords', [])
+        
+        # Update category score
+        profile['categories'][category] = profile['categories'].get(category, 0.0) + 0.1
+        
+        # Update keyword scores
+        for keyword in keywords:
+            if keyword:
+                profile['keywords'][keyword] = profile['keywords'].get(keyword, 0.0) + 0.1
+        
+        profile['total_clicks'] += 1
+    
+    # Cap scores at 1.0
+    for category in profile['categories']:
+        profile['categories'][category] = min(profile['categories'][category], 1.0)
+    
+    for keyword in profile['keywords']:
+        profile['keywords'][keyword] = min(profile['keywords'][keyword], 1.0)
+    
+    # Save profile
+    if profile['total_clicks'] > 0:
+        user_interests_collection.update_one(
+            {'email': subscriber_email},
+            {'$set': profile},
+            upsert=True
+        )
+    
+    return profile
+
+
+def get_interest_statistics() -> Dict:
+    """
+    Get statistics about user interests across all users.
+    
+    Returns:
+        dict: Statistics containing:
+            - total_users: Total number of users with profiles
+            - avg_clicks_per_user: Average number of clicks per user
+            - most_popular_categories: Top categories across all users
+            - most_popular_keywords: Top keywords across all users
+    """
+    total_users = user_interests_collection.count_documents({})
+    
+    if total_users == 0:
+        return {
+            'total_users': 0,
+            'avg_clicks_per_user': 0,
+            'most_popular_categories': [],
+            'most_popular_keywords': []
+        }
+    
+    # Calculate average clicks
+    pipeline = [
+        {
+            '$group': {
+                '_id': None,
+                'total_clicks': {'$sum': '$total_clicks'}
+            }
+        }
+    ]
+    
+    result = list(user_interests_collection.aggregate(pipeline))
+    total_clicks = result[0]['total_clicks'] if result else 0
+    avg_clicks = total_clicks / total_users if total_users > 0 else 0
+    
+    # Get most popular categories
+    category_counts = {}
+    keyword_counts = {}
+    
+    for profile in user_interests_collection.find({}):
+        for category, score in profile.get('categories', {}).items():
+            category_counts[category] = category_counts.get(category, 0) + score
+        
+        for keyword, score in profile.get('keywords', {}).items():
+            keyword_counts[keyword] = keyword_counts.get(keyword, 0) + score
+    
+    # Sort and get top 10
+    top_categories = sorted(category_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+    top_keywords = sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+    
+    return {
+        'total_users': total_users,
+        'avg_clicks_per_user': round(avg_clicks, 2),
+        'most_popular_categories': top_categories,
+        'most_popular_keywords': top_keywords
+    }
+
+
+def delete_user_interests(subscriber_email: str) -> bool:
+    """
+    Delete user interest profile (for GDPR compliance).
+    
+    Args:
+        subscriber_email: Email address of the user
+        
+    Returns:
+        bool: True if profile was deleted, False if not found
+    """
+    result = user_interests_collection.delete_one({'email': subscriber_email})
+    return result.deleted_count > 0
--- a/backend/services/personalization_service.py
+++ b/backend/services/personalization_service.py
@@ -0,0 +1,295 @@
+"""
+Newsletter Personalization Service for Munich News Daily.
+Ranks and selects articles based on user interest profiles.
+"""
+
+from typing import Dict, List, Optional
+from datetime import datetime, timedelta
+from services.interest_profiling_service import get_user_interests
+
+
+def calculate_article_score(
+    article: Dict,
+    user_interests: Optional[Dict],
+    category_weight: float = 0.4,
+    keyword_weight: float = 0.6
+) -> float:
+    """
+    Calculate personalization score for an article based on user interests.
+    
+    Score is calculated as:
+    - Category match: 0-1.0 based on user's interest in the category
+    - Keyword match: Average of user's interest in article keywords
+    - Final score: (category_score * 0.4) + (keyword_score * 0.6)
+    
+    Args:
+        article: Article dictionary with 'category' and 'keywords' fields
+        user_interests: User interest profile (None for non-personalized)
+        category_weight: Weight for category matching (default: 0.4)
+        keyword_weight: Weight for keyword matching (default: 0.6)
+        
+    Returns:
+        float: Personalization score between 0.0 and 1.0
+    """
+    # If no user interests, return neutral score
+    if not user_interests:
+        return 0.5
+    
+    # Get article metadata
+    article_category = article.get('category', 'general')
+    article_keywords = article.get('keywords', [])
+    
+    # Calculate category score
+    user_categories = user_interests.get('categories', {})
+    category_score = user_categories.get(article_category, 0.0)
+    
+    # Calculate keyword score (average of all matching keywords)
+    user_keywords = user_interests.get('keywords', {})
+    keyword_scores = []
+    
+    for keyword in article_keywords:
+        if keyword in user_keywords:
+            keyword_scores.append(user_keywords[keyword])
+    
+    # Average keyword score (0.0 if no matches)
+    keyword_score = sum(keyword_scores) / len(keyword_scores) if keyword_scores else 0.0
+    
+    # Weighted final score
+    final_score = (category_score * category_weight) + (keyword_score * keyword_weight)
+    
+    return round(final_score, 3)
+
+
+def rank_articles_for_user(
+    articles: List[Dict],
+    subscriber_email: str,
+    personalization_ratio: float = 0.7
+) -> List[Dict]:
+    """
+    Rank articles for a specific user based on their interests.
+    
+    Mixes personalized content with trending content to avoid filter bubbles.
+    
+    Args:
+        articles: List of article dictionaries
+        subscriber_email: Email address of the user
+        personalization_ratio: Ratio of personalized vs trending (default: 0.7 = 70% personalized)
+        
+    Returns:
+        list: Articles sorted by personalization score with score added
+    """
+    # Get user interests
+    user_interests = get_user_interests(subscriber_email)
+    
+    # Calculate score for each article
+    scored_articles = []
+    for article in articles:
+        score = calculate_article_score(article, user_interests)
+        
+        # Add score to article (don't modify original)
+        article_with_score = article.copy()
+        article_with_score['personalization_score'] = score
+        scored_articles.append(article_with_score)
+    
+    # Sort by score (highest first)
+    scored_articles.sort(key=lambda x: x['personalization_score'], reverse=True)
+    
+    return scored_articles
+
+
+def select_personalized_articles(
+    articles: List[Dict],
+    subscriber_email: str,
+    max_articles: int = 10,
+    personalization_ratio: float = 0.7,
+    min_score_threshold: float = 0.1
+) -> List[Dict]:
+    """
+    Select and rank articles for a personalized newsletter.
+    
+    Strategy:
+    - Top N * personalization_ratio articles: Highest scoring (personalized)
+    - Remaining articles: Most recent (trending/diverse content)
+    - Ensures mix of personalized + diverse content
+    
+    Args:
+        articles: List of available articles
+        subscriber_email: Email address of the user
+        max_articles: Maximum number of articles to include (default: 10)
+        personalization_ratio: Ratio of personalized content (default: 0.7)
+        min_score_threshold: Minimum score to consider personalized (default: 0.1)
+        
+    Returns:
+        list: Selected articles with personalization scores
+    """
+    if not articles:
+        return []
+    
+    # Rank all articles
+    ranked_articles = rank_articles_for_user(articles, subscriber_email, personalization_ratio)
+    
+    # Calculate split
+    num_personalized = int(max_articles * personalization_ratio)
+    num_trending = max_articles - num_personalized
+    
+    # Get personalized articles (high scoring)
+    personalized = [
+        a for a in ranked_articles 
+        if a['personalization_score'] >= min_score_threshold
+    ][:num_personalized]
+    
+    # Get trending articles (most recent, not already selected)
+    personalized_ids = {a.get('_id') for a in personalized}
+    trending = [
+        a for a in ranked_articles 
+        if a.get('_id') not in personalized_ids
+    ][:num_trending]
+    
+    # Combine: personalized first, then trending
+    selected = personalized + trending
+    
+    # Ensure we don't exceed max_articles
+    return selected[:max_articles]
+
+
+def get_personalization_explanation(
+    article: Dict,
+    user_interests: Optional[Dict]
+) -> Dict[str, any]:
+    """
+    Generate explanation for why an article was recommended.
+    
+    Useful for transparency and debugging.
+    
+    Args:
+        article: Article dictionary
+        user_interests: User interest profile
+        
+    Returns:
+        dict: Explanation containing:
+            - score: Overall personalization score
+            - category_match: Category score
+            - keyword_matches: List of matching keywords with scores
+            - reason: Human-readable explanation
+    """
+    if not user_interests:
+        return {
+            'score': 0.5,
+            'category_match': 0.0,
+            'keyword_matches': [],
+            'reason': 'No personalization data available'
+        }
+    
+    article_category = article.get('category', 'general')
+    article_keywords = article.get('keywords', [])
+    
+    user_categories = user_interests.get('categories', {})
+    user_keywords = user_interests.get('keywords', {})
+    
+    # Category match
+    category_score = user_categories.get(article_category, 0.0)
+    
+    # Keyword matches
+    keyword_matches = []
+    for keyword in article_keywords:
+        if keyword in user_keywords:
+            keyword_matches.append({
+                'keyword': keyword,
+                'score': user_keywords[keyword]
+            })
+    
+    # Calculate overall score
+    overall_score = calculate_article_score(article, user_interests)
+    
+    # Generate reason
+    if overall_score >= 0.5:
+        reason = f"High match with your interests in {article_category}"
+        if keyword_matches:
+            top_keywords = [m['keyword'] for m in keyword_matches[:2]]
+            reason += f" and topics like {', '.join(top_keywords)}"
+    elif overall_score >= 0.3:
+        reason = f"Moderate match with your interests"
+    else:
+        reason = "Trending article for diverse content"
+    
+    return {
+        'score': overall_score,
+        'category_match': category_score,
+        'keyword_matches': keyword_matches,
+        'reason': reason
+    }
+
+
+def get_personalization_stats(
+    selected_articles: List[Dict],
+    subscriber_email: str
+) -> Dict[str, any]:
+    """
+    Get statistics about personalization for a newsletter.
+    
+    Args:
+        selected_articles: Articles selected for the newsletter
+        subscriber_email: Email address of the user
+        
+    Returns:
+        dict: Statistics containing:
+            - total_articles: Number of articles
+            - avg_score: Average personalization score
+            - highly_personalized: Number of articles with score >= 0.5
+            - moderately_personalized: Number with score 0.3-0.5
+            - trending: Number with score < 0.3
+    """
+    if not selected_articles:
+        return {
+            'total_articles': 0,
+            'avg_score': 0.0,
+            'highly_personalized': 0,
+            'moderately_personalized': 0,
+            'trending': 0
+        }
+    
+    scores = [a.get('personalization_score', 0.0) for a in selected_articles]
+    avg_score = sum(scores) / len(scores)
+    
+    highly_personalized = sum(1 for s in scores if s >= 0.5)
+    moderately_personalized = sum(1 for s in scores if 0.3 <= s < 0.5)
+    trending = sum(1 for s in scores if s < 0.3)
+    
+    return {
+        'total_articles': len(selected_articles),
+        'avg_score': round(avg_score, 3),
+        'highly_personalized': highly_personalized,
+        'moderately_personalized': moderately_personalized,
+        'trending': trending
+    }
+
+
+def batch_personalize_newsletters(
+    articles: List[Dict],
+    subscribers: List[str],
+    max_articles_per_user: int = 10
+) -> Dict[str, List[Dict]]:
+    """
+    Generate personalized article selections for multiple subscribers.
+    
+    Useful for batch newsletter generation.
+    
+    Args:
+        articles: List of available articles
+        subscribers: List of subscriber email addresses
+        max_articles_per_user: Max articles per newsletter (default: 10)
+        
+    Returns:
+        dict: Mapping of email -> personalized article list
+    """
+    personalized_newsletters = {}
+    
+    for subscriber_email in subscribers:
+        personalized_articles = select_personalized_articles(
+            articles,
+            subscriber_email,
+            max_articles=max_articles_per_user
+        )
+        personalized_newsletters[subscriber_email] = personalized_articles
+    
+    return personalized_newsletters
--- a/backend/services/tracking_service.py
+++ b/backend/services/tracking_service.py
@@ -80,6 +80,9 @@ def create_newsletter_tracking(
    link_tracking_map = {}
    
    if article_links:
+        # Import here to avoid circular dependency
+        from database import articles_collection
+        
        for article in article_links:
            article_url = article.get('url')
            article_title = article.get('title', '')
@@ -87,13 +90,22 @@ def create_newsletter_tracking(
            if article_url:
                link_tracking_id = generate_tracking_id()
                
-                # Create link click tracking record
+                # Look up article metadata from database for personalization
+                article_doc = articles_collection.find_one({'link': article_url})
+                article_id = str(article_doc['_id']) if article_doc else None
+                category = article_doc.get('category', 'general') if article_doc else 'general'
+                keywords = article_doc.get('keywords', []) if article_doc else []
+                
+                # Create link click tracking record with metadata
                link_click_doc = {
                    'tracking_id': link_tracking_id,
                    'newsletter_id': newsletter_id,
                    'subscriber_email': subscriber_email,
                    'article_url': article_url,
                    'article_title': article_title,
+                    'article_id': article_id,           # NEW: Article database ID
+                    'category': category,                # NEW: Article category
+                    'keywords': keywords,                # NEW: Article keywords for personalization
                    'clicked': False,
                    'clicked_at': None,
                    'user_agent': None,
--- a/backend/test_personalization_system.py
+++ b/backend/test_personalization_system.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test suite for the personalization system.
+Tests all 4 phases: keyword extraction, click tracking, interest profiling, and personalization.
+"""
+
+import sys
+from pymongo import MongoClient
+from datetime import datetime
+
+# Import services
+from services.tracking_service import create_newsletter_tracking
+from services.interest_profiling_service import (
+    update_user_interests,
+    get_user_interests,
+    get_top_interests,
+    build_interests_from_history
+)
+from services.personalization_service import (
+    calculate_article_score,
+    rank_articles_for_user,
+    select_personalized_articles,
+    get_personalization_stats
+)
+from config import Config
+
+# Connect to MongoDB
+client = MongoClient(Config.MONGODB_URI)
+db = client[Config.DB_NAME]
+
+articles_collection = db['articles']
+link_clicks_collection = db['link_clicks']
+user_interests_collection = db['user_interests']
+
+
+def test_phase1_keywords():
+    """Phase 1: Verify articles have keywords extracted"""
+    print("\n" + "="*60)
+    print("Phase 1: Keyword Extraction")
+    print("="*60)
+    
+    articles_with_keywords = articles_collection.count_documents({
+        'keywords': {'$exists': True, '$ne': []}
+    })
+    
+    if articles_with_keywords == 0:
+        print("❌ No articles with keywords found")
+        print("   Run a crawl first to extract keywords")
+        return False
+    
+    sample = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
+    print(f"✓ Found {articles_with_keywords} articles with keywords")
+    print(f"  Sample: {sample.get('title', 'N/A')[:50]}...")
+    print(f"  Keywords: {sample.get('keywords', [])[:3]}")
+    return True
+
+
+def test_phase2_tracking():
+    """Phase 2: Verify tracking includes keywords and metadata"""
+    print("\n" + "="*60)
+    print("Phase 2: Click Tracking Enhancement")
+    print("="*60)
+    
+    test_email = 'test-phase2@example.com'
+    
+    # Clean up
+    link_clicks_collection.delete_many({'subscriber_email': test_email})
+    
+    # Get article with keywords
+    article = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
+    
+    if not article:
+        print("❌ No articles found")
+        return False
+    
+    # Create tracking
+    tracking_data = create_newsletter_tracking(
+        newsletter_id='test-phase2',
+        subscriber_email=test_email,
+        article_links=[{
+            'url': article['link'],
+            'title': article.get('title', '')
+        }]
+    )
+    
+    # Verify tracking record
+    tracking_id = list(tracking_data['link_tracking_map'].values())[0]
+    tracking_record = link_clicks_collection.find_one({'tracking_id': tracking_id})
+    
+    has_metadata = (
+        tracking_record.get('article_id') is not None and
+        tracking_record.get('category') is not None and
+        len(tracking_record.get('keywords', [])) > 0
+    )
+    
+    # Clean up
+    link_clicks_collection.delete_many({'subscriber_email': test_email})
+    db['newsletter_sends'].delete_many({'subscriber_email': test_email})
+    
+    if has_metadata:
+        print(f"✓ Tracking records include metadata")
+        print(f"  Article ID: {tracking_record.get('article_id')}")
+        print(f"  Category: {tracking_record.get('category')}")
+        print(f"  Keywords: {len(tracking_record.get('keywords', []))} keywords")
+        return True
+    else:
+        print("❌ Tracking records missing metadata")
+        return False
+
+
+def test_phase3_profiling():
+    """Phase 3: Verify interest profiles are built from clicks"""
+    print("\n" + "="*60)
+    print("Phase 3: User Interest Profiling")
+    print("="*60)
+    
+    test_email = 'test-phase3@example.com'
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    # Create profile
+    update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
+    update_user_interests(test_email, ['Transportation', 'Munich'], 'local')
+    
+    # Verify profile
+    profile = get_user_interests(test_email)
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    if profile and profile['total_clicks'] == 2:
+        print(f"✓ Interest profile created")
+        print(f"  Total clicks: {profile['total_clicks']}")
+        print(f"  Categories: {len(profile.get('categories', {}))}")
+        print(f"  Keywords: {len(profile.get('keywords', {}))}")
+        return True
+    else:
+        print("❌ Interest profile not created correctly")
+        return False
+
+
+def test_phase4_personalization():
+    """Phase 4: Verify articles are ranked by user interests"""
+    print("\n" + "="*60)
+    print("Phase 4: Personalized Newsletter Generation")
+    print("="*60)
+    
+    test_email = 'test-phase4@example.com'
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    # Get articles
+    articles = list(articles_collection.find(
+        {'keywords': {'$exists': True, '$ne': []}},
+        limit=5
+    ))
+    
+    if len(articles) < 3:
+        print("❌ Not enough articles found")
+        return False
+    
+    # Create profile
+    update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
+    
+    # Rank articles
+    ranked = rank_articles_for_user(articles, test_email)
+    
+    # Select personalized
+    selected = select_personalized_articles(articles, test_email, max_articles=3)
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    has_scores = all('personalization_score' in a for a in selected)
+    
+    if has_scores and len(selected) > 0:
+        print(f"✓ Articles ranked and selected")
+        print(f"  Total ranked: {len(ranked)}")
+        print(f"  Selected: {len(selected)}")
+        print(f"  Top score: {selected[0].get('personalization_score', 0):.3f}")
+        return True
+    else:
+        print("❌ Personalization failed")
+        return False
+
+
+def main():
+    """Run all personalization tests"""
+    print("\n" + "="*60)
+    print("PERSONALIZATION SYSTEM TEST SUITE")
+    print("="*60)
+    
+    results = {
+        'Phase 1: Keyword Extraction': test_phase1_keywords(),
+        'Phase 2: Click Tracking': test_phase2_tracking(),
+        'Phase 3: Interest Profiling': test_phase3_profiling(),
+        'Phase 4: Personalization': test_phase4_personalization()
+    }
+    
+    print("\n" + "="*60)
+    print("TEST RESULTS")
+    print("="*60)
+    
+    for phase, passed in results.items():
+        status = "✅ PASS" if passed else "❌ FAIL"
+        print(f"{status} - {phase}")
+    
+    all_passed = all(results.values())
+    
+    if all_passed:
+        print("\n🎉 All personalization tests PASSED!")
+        return 0
+    else:
+        print("\n❌ Some tests FAILED")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/docker-compose.local.yml
+++ b/docker-compose.local.yml
@@ -0,0 +1,225 @@
+services:
+  # Ollama AI Service (Exposed for local testing)
+  ollama:
+    image: ollama/ollama:latest
+    container_name: munich-news-local-ollama
+    restart: unless-stopped
+    ports:
+      - "11434:11434"  # Exposed for local testing
+    volumes:
+      - ollama_data_local:/root/.ollama
+    networks:
+      - munich-news-network
+    dns:
+      - 8.8.8.8
+      - 1.1.1.1
+    # GPU support (uncomment if you have NVIDIA GPU)
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    healthcheck:
+      test: ["CMD-SHELL", "ollama list || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+
+  # Ollama Model Loader - Pulls phi3:latest (smaller model for local dev)
+  ollama-setup:
+    image: curlimages/curl:latest
+    container_name: munich-news-local-ollama-setup
+    depends_on:
+      ollama:
+        condition: service_healthy
+    networks:
+      - munich-news-network
+    env_file:
+      - backend/.env.local
+    volumes:
+      - ./scripts/setup-ollama-model.sh:/setup-ollama-model.sh:ro
+    dns:
+      - 8.8.8.8
+      - 1.1.1.1
+    command: sh /setup-ollama-model.sh
+    restart: on-failure
+
+  # Redis - Message queue for async tasks (Internal only - not exposed to host)
+  redis:
+    image: redis:7-alpine
+    container_name: munich-news-local-redis
+    restart: unless-stopped
+    # No ports exposed - only accessible within Docker network
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+  # MongoDB Database (Exposed for local debugging)
+  mongodb:
+    image: mongo:latest
+    container_name: munich-news-local-mongodb
+    restart: unless-stopped
+    ports:
+      - "27017:27017"  # Exposed for local debugging
+    environment:
+      # For production, set MONGO_PASSWORD environment variable
+      MONGO_INITDB_ROOT_USERNAME: ${MONGO_USERNAME:-admin}
+      MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASSWORD:-changeme}
+      MONGO_INITDB_DATABASE: munich_news
+    volumes:
+      - mongodb_data_local:/data/db
+      - mongodb_config_local:/data/configdb
+    networks:
+      - munich-news-network
+    command: mongod --bind_ip_all ${MONGO_AUTH:---auth}
+    healthcheck:
+      test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+  # News Crawler - Runs at 6 AM Berlin time
+  crawler:
+    build:
+      context: .
+      dockerfile: news_crawler/Dockerfile
+    container_name: munich-news-local-crawler
+    restart: unless-stopped
+    depends_on:
+      - mongodb
+      - ollama
+      - redis
+    environment:
+      - MONGODB_URI=mongodb://${MONGO_USERNAME:-admin}:${MONGO_PASSWORD:-changeme}@mongodb:27017/
+      - REDIS_URL=redis://redis:6379
+      - TZ=Europe/Berlin
+    volumes:
+      - ./backend/.env.local:/app/.env:ro
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "python", "-c", "import sys; sys.exit(0)"]
+      interval: 1m
+      timeout: 10s
+      retries: 3
+
+  # Backend API - Tracking and analytics
+  backend:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile
+    container_name: munich-news-local-backend
+    restart: unless-stopped
+    depends_on:
+      - mongodb
+      - redis
+    ports:
+      - "5001:5001"
+    environment:
+      - MONGODB_URI=mongodb://${MONGO_USERNAME:-admin}:${MONGO_PASSWORD:-changeme}@mongodb:27017/
+      - REDIS_URL=redis://redis:6379
+      - FLASK_PORT=5001
+      - TZ=Europe/Berlin
+    volumes:
+      - ./backend/.env.local:/app/.env:ro
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5001/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+
+  # Transport Crawler - API service for MVG disruptions (Internal only - not exposed to host)
+  transport-crawler:
+    build:
+      context: ./transport_crawler
+      dockerfile: Dockerfile
+    container_name: munich-news-local-transport-crawler
+    restart: unless-stopped
+    depends_on:
+      - mongodb
+      - redis
+    # No ports exposed - only accessible within Docker network
+    environment:
+      - MONGODB_URI=mongodb://${MONGO_USERNAME:-admin}:${MONGO_PASSWORD:-changeme}@mongodb:27017/
+      - REDIS_URL=redis://redis:6379
+      - TZ=Europe/Berlin
+    volumes:
+      - ./backend/.env.local:/app/.env:ro
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5002/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+
+  # Newsletter Sender - Runs at 7 AM Berlin time
+  sender:
+    build:
+      context: .
+      dockerfile: news_sender/Dockerfile
+    container_name: munich-news-local-sender
+    restart: unless-stopped
+    depends_on:
+      - mongodb
+      - backend
+      - crawler
+      - transport-crawler
+    environment:
+      - MONGODB_URI=mongodb://${MONGO_USERNAME:-admin}:${MONGO_PASSWORD:-changeme}@mongodb:27017/
+      - TZ=Europe/Berlin
+    volumes:
+      - ./backend/.env.local:/app/.env:ro
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "python", "-c", "import sys; sys.exit(0)"]
+      interval: 1m
+      timeout: 10s
+      retries: 3
+
+  # Frontend Web Interface
+  frontend:
+    build: ./frontend
+    container_name: munich-news-local-frontend
+    restart: unless-stopped
+    # ports:
+    #   - "3000:3000"
+    ports:
+      - "3000:3000"
+    environment:
+      - API_URL=http://backend:5001
+      - PORT=3000
+    depends_on:
+      - backend
+    networks:
+      - munich-news-network
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+volumes:
+  mongodb_data_local:
+    driver: local
+  mongodb_config_local:
+    driver: local
+  ollama_data_local:
+    driver: local
+
+networks:
+  munich-news-network:
+    internal: false
--- a/docs/LOCAL_DEVELOPMENT.md
+++ b/docs/LOCAL_DEVELOPMENT.md
@@ -0,0 +1,167 @@
+# Local Development Setup
+
+This guide helps you run Munich News Daily locally for development and testing.
+
+## Quick Start
+
+```bash
+# 1. Copy local environment files
+cp .env.local .env
+cp backend/.env.local backend/.env
+
+# 2. Start services with local configuration
+docker-compose -f docker-compose.local.yml up -d
+
+# 3. Check logs
+docker-compose -f docker-compose.local.yml logs -f
+
+# 4. Access services
+# - Frontend: http://localhost:3000
+# - Backend API: http://localhost:5001
+# - MongoDB: localhost:27017
+# - Ollama: http://localhost:11434
+```
+
+## Differences from Production
+
+| Feature | Production | Local Development |
+|---------|-----------|-------------------|
+| Ollama Model | `gemma3:12b` (large) | `phi3:latest` (small, fast) |
+| MongoDB Port | Internal only | Exposed on 27017 |
+| Ollama Port | Internal only | Exposed on 11434 |
+| Container Names | `munich-news-*` | `munich-news-local-*` |
+| Volumes | `*_data` | `*_data_local` |
+| Email | Production SMTP | Test/disabled |
+
+## Useful Commands
+
+### Start/Stop Services
+```bash
+# Start all services
+docker-compose -f docker-compose.local.yml up -d
+
+# Stop all services
+docker-compose -f docker-compose.local.yml down
+
+# Restart a specific service
+docker-compose -f docker-compose.local.yml restart backend
+
+# View logs
+docker-compose -f docker-compose.local.yml logs -f crawler
+```
+
+### Testing
+
+```bash
+# Trigger a news crawl (2 articles for quick testing)
+curl -X POST http://localhost:5001/api/admin/trigger-crawl \
+  -H "Content-Type: application/json" \
+  -d '{"max_articles": 2}'
+
+# Trigger transport crawl
+curl -X POST http://localhost:5001/api/transport/crawl
+
+# Check articles in MongoDB
+docker exec munich-news-local-mongodb mongosh munich_news \
+  --eval "db.articles.find({}, {title: 1, keywords: 1, category: 1}).limit(3)"
+
+# Check transport disruptions
+curl http://localhost:5001/api/transport/disruptions
+```
+
+### Database Access
+
+```bash
+# Connect to MongoDB
+docker exec -it munich-news-local-mongodb mongosh munich_news
+
+# Or from host (if you have mongosh installed)
+mongosh "mongodb://admin:local123@localhost:27017/munich_news"
+
+# Useful queries
+db.articles.countDocuments()
+db.articles.find({keywords: {$exists: true}}).limit(5)
+db.subscribers.find()
+db.transport_alerts.find()
+```
+
+### Ollama Testing
+
+```bash
+# List models
+curl http://localhost:11434/api/tags
+
+# Test generation
+curl http://localhost:11434/api/generate -d '{
+  "model": "phi3:latest",
+  "prompt": "Summarize: Munich opens new U-Bahn line",
+  "stream": false
+}'
+```
+
+## Cleanup
+
+```bash
+# Stop and remove containers
+docker-compose -f docker-compose.local.yml down
+
+# Remove volumes (WARNING: deletes all data)
+docker-compose -f docker-compose.local.yml down -v
+
+# Remove local volumes specifically
+docker volume rm munich-news_mongodb_data_local
+docker volume rm munich-news_mongodb_config_local
+docker volume rm munich-news_ollama_data_local
+```
+
+## Switching Between Local and Production
+
+```bash
+# Switch to local
+cp .env.local .env
+cp backend/.env.local backend/.env
+docker-compose -f docker-compose.local.yml up -d
+
+# Switch to production
+cp .env.production .env  # (if you have one)
+cp backend/.env.production backend/.env
+docker-compose up -d
+```
+
+## Troubleshooting
+
+### Ollama model not downloading
+```bash
+# Pull model manually
+docker exec munich-news-local-ollama ollama pull phi3:latest
+```
+
+### MongoDB connection refused
+```bash
+# Check if MongoDB is running
+docker-compose -f docker-compose.local.yml ps mongodb
+
+# Check logs
+docker-compose -f docker-compose.local.yml logs mongodb
+```
+
+### Port already in use
+```bash
+# Check what's using the port
+lsof -i :5001  # or :3000, :27017, etc.
+
+# Stop the conflicting service or change port in docker-compose.local.yml
+```
+
+## Tips
+
+1. **Use phi3 for speed** - It's much faster than gemma3 for local testing
+2. **Limit articles** - Use `max_articles: 2` for quick crawl tests
+3. **Watch logs** - Keep logs open to see what's happening
+4. **Separate volumes** - Local and production use different volumes, so they don't interfere
+
+## Next Steps
+
+- See `docs/PERSONALIZATION.md` for personalization feature development
+- See `docs/OLLAMA_SETUP.md` for AI configuration
+- See main `README.md` for general documentation
--- a/docs/PERSONALIZATION.md
+++ b/docs/PERSONALIZATION.md
@@ -0,0 +1,217 @@
+# Newsletter Personalization Implementation
+
+## Overview
+Personalized newsletters based on user click behavior, using keywords and categories to build interest profiles.
+
+## Implementation Phases
+
+### ✅ Phase 1: Keyword Extraction (COMPLETED)
+**Status:** Implemented
+**Files Modified:**
+- `news_crawler/ollama_client.py` - Added `extract_keywords()` method
+- `news_crawler/crawler_service.py` - Integrated keyword extraction into crawl process
+
+**What it does:**
+- Extracts 5 keywords from each article using Ollama AI
+- Keywords stored in `articles` collection: `keywords: ["Bayern Munich", "Football", ...]`
+- Runs automatically during news crawling
+
+**Test it:**
+```bash
+# Trigger a crawl
+curl -X POST http://localhost:5001/api/admin/trigger-crawl -d '{"max_articles": 2}'
+
+# Check articles have keywords
+docker exec munich-news-mongodb mongosh munich_news --eval "db.articles.findOne({}, {title: 1, keywords: 1})"
+```
+
+---
+
+### ✅ Phase 2: Click Tracking Enhancement (COMPLETED)
+**Status:** Implemented
+**Goal:** Track clicks with keyword metadata
+
+**Files Modified:**
+- `backend/services/tracking_service.py` - Enhanced `create_newsletter_tracking()` to look up article metadata
+
+**What it does:**
+- When creating tracking links, looks up article from database
+- Stores article ID, category, and keywords in tracking record
+- Enables building user interest profiles from click behavior
+
+**Database Schema:**
+```javascript
+// link_clicks collection
+{
+  tracking_id: "uuid",
+  newsletter_id: "2024-11-18",
+  subscriber_email: "user@example.com",
+  article_url: "https://...",
+  article_title: "Article Title",
+  article_id: "673abc123...",              // NEW: Article database ID
+  category: "sports",                      // NEW: Article category
+  keywords: ["Bayern Munich", "Bundesliga"], // NEW: Keywords for personalization
+  clicked: false,
+  clicked_at: null,
+  user_agent: null,
+  created_at: ISODate()
+}
+```
+
+**Test it:**
+```bash
+# Send a test newsletter
+curl -X POST http://localhost:5001/api/admin/send-newsletter
+
+# Check tracking records have keywords
+docker exec munich-news-mongodb mongosh munich_news --eval "db.link_clicks.findOne({}, {article_title: 1, keywords: 1, category: 1})"
+```
+
+---
+
+### ✅ Phase 3: User Interest Profiling (COMPLETED)
+**Status:** Implemented
+**Goal:** Build user interest profiles from click history
+
+**Files Created:**
+- `backend/services/interest_profiling_service.py` - Core profiling logic
+- `backend/routes/interests_routes.py` - API endpoints for interest management
+
+**Files Modified:**
+- `backend/routes/tracking_routes.py` - Auto-update interests on click
+- `backend/app.py` - Register interests routes
+
+**What it does:**
+- Automatically builds interest profiles when users click articles
+- Tracks interest scores for categories and keywords (0.0 to 1.0)
+- Increments scores by 0.1 per click, capped at 1.0
+- Provides decay mechanism for old interests
+- Supports rebuilding profiles from click history
+
+**Database Schema:**
+```javascript
+// user_interests collection
+{
+  email: "user@example.com",
+  categories: {
+    sports: 0.8,
+    local: 0.5,
+    science: 0.2
+  },
+  keywords: {
+    "Bayern Munich": 0.9,
+    "Oktoberfest": 0.7,
+    "AI": 0.3
+  },
+  total_clicks: 15,
+  last_updated: ISODate(),
+  created_at: ISODate()
+}
+```
+
+**API Endpoints:**
+```bash
+# Get user interests
+GET /api/interests/<email>
+
+# Get top interests
+GET /api/interests/<email>/top?top_n=10
+
+# Rebuild from history
+POST /api/interests/<email>/rebuild
+Body: {"days_lookback": 30}
+
+# Decay old interests
+POST /api/interests/decay
+Body: {"decay_factor": 0.95, "days_threshold": 7}
+
+# Get statistics
+GET /api/interests/statistics
+
+# Delete profile (GDPR)
+DELETE /api/interests/<email>
+```
+
+**Test it:**
+```bash
+# Run test script
+docker exec munich-news-local-backend python test_interest_profiling.py
+
+# View a user's interests
+curl http://localhost:5001/api/interests/user@example.com
+
+# Get statistics
+curl http://localhost:5001/api/interests/statistics
+```
+
+---
+
+### ✅ Phase 4: Personalized Newsletter (COMPLETED)
+**Status:** Implemented
+**Goal:** Rank and select articles based on user interests
+
+**Files Created:**
+- `backend/services/personalization_service.py` - Core personalization logic
+- `backend/routes/personalization_routes.py` - API endpoints for testing
+
+**Files Modified:**
+- `backend/app.py` - Register personalization routes
+
+**What it does:**
+- Scores articles based on user's category and keyword interests
+- Ranks articles by personalization score (0.0 to 1.0)
+- Selects mix of personalized (70%) + trending (30%) content
+- Provides explanations for recommendations
+
+**Algorithm:**
+```python
+score = (category_match * 0.4) + (keyword_match * 0.6)
+
+# Example:
+# User interests: sports=0.8, "Bayern Munich"=0.9
+# Article: sports category, keywords=["Bayern Munich", "Football"]
+# Score = (0.8 * 0.4) + (0.9 * 0.6) = 0.32 + 0.54 = 0.86
+```
+
+**API Endpoints:**
+```bash
+# Preview personalized newsletter
+GET /api/personalize/preview/<email>?max_articles=10&hours_lookback=24
+
+# Explain recommendation
+POST /api/personalize/explain
+Body: {"email": "user@example.com", "article_id": "..."}
+```
+
+**Test it:**
+```bash
+# Run test script
+docker exec munich-news-local-backend python test_personalization.py
+
+# Preview personalized newsletter
+curl "http://localhost:5001/api/personalize/preview/demo@example.com?max_articles=5"
+```
+
+---
+
+## ✅ All Phases Complete!
+
+1. ~~**Phase 1:** Keyword extraction from articles~~ ✅ DONE
+2. ~~**Phase 2:** Click tracking with keywords~~ ✅ DONE
+3. ~~**Phase 3:** User interest profiling~~ ✅ DONE
+4. ~~**Phase 4:** Personalized newsletter generation~~ ✅ DONE
+
+## Next Steps for Production
+
+1. **Integrate with newsletter sender** - Modify `news_sender/sender_service.py` to use personalization
+2. **A/B testing** - Compare personalized vs non-personalized engagement
+3. **Tune parameters** - Adjust personalization_ratio, weights, decay rates
+4. **Monitor metrics** - Track click-through rates, open rates by personalization score
+5. **User controls** - Add UI for users to view/edit their interests
+
+## Configuration
+
+No configuration needed yet. Keyword extraction uses existing Ollama settings from `backend/.env`:
+- `OLLAMA_ENABLED=true`
+- `OLLAMA_MODEL=gemma3:12b`
+- `OLLAMA_BASE_URL=http://ollama:11434`
--- a/docs/PERSONALIZATION_COMPLETE.md
+++ b/docs/PERSONALIZATION_COMPLETE.md
@@ -0,0 +1,195 @@
+# 🎉 Newsletter Personalization System - Complete!
+
+All 4 phases of the personalization system have been successfully implemented and tested.
+
+## ✅ What Was Built
+
+### Phase 1: Keyword Extraction
+- AI-powered keyword extraction from articles using Ollama
+- 5 keywords per article automatically extracted during crawling
+- Keywords stored in database for personalization
+
+### Phase 2: Click Tracking Enhancement  
+- Enhanced tracking to capture article keywords and category
+- Tracking records now include metadata for building interest profiles
+- Privacy-compliant with opt-out and GDPR support
+
+### Phase 3: User Interest Profiling
+- Automatic profile building from click behavior
+- Interest scores (0.0-1.0) for categories and keywords
+- Decay mechanism for old interests
+- API endpoints for viewing and managing profiles
+
+### Phase 4: Personalized Newsletter Generation
+- Article scoring based on user interests
+- Smart ranking algorithm (40% category + 60% keywords)
+- Mix of personalized (70%) + trending (30%) content
+- Explanation system for recommendations
+
+## 📊 How It Works
+
+```
+1. User clicks article in newsletter
+   ↓
+2. System records: keywords + category
+   ↓
+3. Interest profile updates automatically
+   ↓
+4. Next newsletter: articles ranked by interests
+   ↓
+5. User receives personalized content
+```
+
+## 🧪 Testing
+
+All phases have been tested and verified:
+
+```bash
+# Run comprehensive test suite (tests all 4 phases)
+docker exec munich-news-local-backend python test_personalization_system.py
+
+# Or test keyword extraction separately
+docker exec munich-news-local-crawler python -c "from crawler_service import crawl_all_feeds; crawl_all_feeds(max_articles_per_feed=2)"
+```
+
+## 🔌 API Endpoints
+
+### Interest Management
+```bash
+GET    /api/interests/<email>              # View profile
+GET    /api/interests/<email>/top          # Top interests
+POST   /api/interests/<email>/rebuild      # Rebuild from history
+GET    /api/interests/statistics           # Platform stats
+DELETE /api/interests/<email>              # Delete (GDPR)
+```
+
+### Personalization
+```bash
+GET  /api/personalize/preview/<email>      # Preview personalized newsletter
+POST /api/personalize/explain              # Explain recommendation
+```
+
+## 📈 Example Results
+
+### User Profile
+```json
+{
+  "email": "user@example.com",
+  "categories": {
+    "sports": 0.30,
+    "local": 0.10
+  },
+  "keywords": {
+    "Bayern Munich": 0.30,
+    "Football": 0.20,
+    "Transportation": 0.10
+  },
+  "total_clicks": 5
+}
+```
+
+### Personalized Newsletter
+```json
+{
+  "articles": [
+    {
+      "title": "Bayern Munich wins championship",
+      "personalization_score": 0.86,
+      "category": "sports",
+      "keywords": ["Bayern Munich", "Football"]
+    },
+    {
+      "title": "New S-Bahn line opens",
+      "personalization_score": 0.42,
+      "category": "local",
+      "keywords": ["Transportation", "Munich"]
+    }
+  ],
+  "statistics": {
+    "highly_personalized": 1,
+    "moderately_personalized": 1,
+    "trending": 0
+  }
+}
+```
+
+## 🎯 Scoring Algorithm
+
+```python
+# Article score calculation
+category_score = user_interests.categories[article.category]
+keyword_score = average(user_interests.keywords[kw] for kw in article.keywords)
+
+final_score = (category_score * 0.4) + (keyword_score * 0.6)
+```
+
+**Example:**
+- User: sports=0.8, "Bayern Munich"=0.9
+- Article: sports category, keywords=["Bayern Munich", "Football"]
+- Score = (0.8 × 0.4) + (0.9 × 0.6) = 0.32 + 0.54 = **0.86**
+
+## 🚀 Production Integration
+
+To integrate with the newsletter sender:
+
+1. **Modify `news_sender/sender_service.py`:**
+```python
+from services.personalization_service import select_personalized_articles
+
+# For each subscriber
+personalized_articles = select_personalized_articles(
+    all_articles,
+    subscriber_email,
+    max_articles=10
+)
+```
+
+2. **Enable personalization flag in config:**
+```env
+PERSONALIZATION_ENABLED=true
+PERSONALIZATION_RATIO=0.7  # 70% personalized, 30% trending
+```
+
+3. **Monitor metrics:**
+- Click-through rate by personalization score
+- Open rates for personalized vs non-personalized
+- User engagement over time
+
+## 🔐 Privacy & Compliance
+
+- ✅ Users can opt out of tracking
+- ✅ Interest profiles can be deleted (GDPR)
+- ✅ Automatic anonymization after 90 days
+- ✅ No PII beyond email address
+- ✅ Transparent recommendation explanations
+
+## 📁 Files Created/Modified
+
+### New Files
+- `backend/services/interest_profiling_service.py`
+- `backend/services/personalization_service.py`
+- `backend/routes/interests_routes.py`
+- `backend/routes/personalization_routes.py`
+- `backend/test_tracking_phase2.py`
+- `backend/test_interest_profiling.py`
+- `backend/test_personalization.py`
+- `docs/PERSONALIZATION.md`
+
+### Modified Files
+- `news_crawler/ollama_client.py` - Added keyword extraction
+- `news_crawler/crawler_service.py` - Integrated keyword extraction
+- `backend/services/tracking_service.py` - Enhanced with metadata
+- `backend/routes/tracking_routes.py` - Auto-update interests
+- `backend/app.py` - Registered new routes
+
+## 🎓 Key Learnings
+
+1. **Incremental scoring works well** - 0.1 per click prevents over-weighting
+2. **Mix is important** - 70/30 personalized/trending avoids filter bubbles
+3. **Keywords > Categories** - 60/40 weight reflects keyword importance
+4. **Decay is essential** - Prevents stale interests from dominating
+5. **Transparency matters** - Explanation API helps users understand recommendations
+
+## 🎉 Status: COMPLETE
+
+All 4 phases implemented, tested, and documented. The personalization system is ready for production integration!
--- a/news_crawler/crawler_service.py
+++ b/news_crawler/crawler_service.py
@@ -388,6 +388,21 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
                        print(f"   ⚠ Summarization failed: {summary_result['error']}")
                        failed_summaries += 1
                
+                # Extract keywords for personalization
+                keywords_result = None
+                if Config.OLLAMA_ENABLED and summary_result and summary_result['success']:
+                    print(f"   🔑 Extracting keywords...")
+                    keywords_result = ollama_client.extract_keywords(
+                        original_title,
+                        summary_result['summary'],
+                        max_keywords=5
+                    )
+                    
+                    if keywords_result['success']:
+                        print(f"   ✓ Keywords: {', '.join(keywords_result['keywords'])} ({keywords_result['duration']:.1f}s)")
+                    else:
+                        print(f"   ⚠ Keyword extraction failed: {keywords_result['error']}")
+                
                # Prepare document
                article_doc = {
                    'title': original_title,
@@ -396,6 +411,7 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
                    'link': article_url,
                    'content': article_data.get('content', ''),  # Full article content
                    'summary': summary_result['summary'] if summary_result and summary_result['success'] else None,
+                    'keywords': keywords_result['keywords'] if keywords_result and keywords_result['success'] else [],
                    'word_count': article_data.get('word_count', 0),
                    'summary_word_count': summary_result['summary_word_count'] if summary_result and summary_result['success'] else None,
                    'source': feed_name,
--- a/news_crawler/ollama_client.py
+++ b/news_crawler/ollama_client.py
@@ -509,6 +509,110 @@ New York Times-style summary (max {max_words} words):"""
                'duration': time.time() - start_time
            }
    
+    def extract_keywords(self, title, summary, max_keywords=5):
+        """
+        Extract keywords/topics from article for personalization
+        
+        Args:
+            title: Article title
+            summary: Article summary
+            max_keywords: Maximum number of keywords to extract (default 5)
+            
+        Returns:
+            {
+                'keywords': list,      # List of extracted keywords
+                'success': bool,       # Whether extraction succeeded
+                'error': str or None,  # Error message if failed
+                'duration': float      # Time taken in seconds
+            }
+        """
+        if not self.enabled:
+            return {
+                'keywords': [],
+                'success': False,
+                'error': 'Ollama is disabled',
+                'duration': 0
+            }
+        
+        start_time = time.time()
+        
+        try:
+            # Construct prompt for keyword extraction
+            prompt = f"""Extract {max_keywords} key topics or keywords from this article. These will be used to understand user interests.
+
+Title: {title}
+Summary: {summary}
+
+Return ONLY the keywords separated by commas, nothing else. Focus on:
+- Main topics (e.g., "Bayern Munich", "Oktoberfest", "City Council")
+- Locations (e.g., "Marienplatz", "Airport")
+- Events or themes (e.g., "Transportation", "Housing", "Technology")
+
+Keywords:"""
+            
+            # Prepare request
+            url = f"{self.base_url}/api/generate"
+            headers = {'Content-Type': 'application/json'}
+            if self.api_key:
+                headers['Authorization'] = f'Bearer {self.api_key}'
+            
+            payload = {
+                'model': self.model,
+                'prompt': prompt,
+                'stream': False,
+                'options': {
+                    'temperature': 0.3,  # Lower temperature for consistent extraction
+                    'num_predict': 100   # Limit response length
+                }
+            }
+            
+            # Make request
+            response = requests.post(
+                url,
+                json=payload,
+                headers=headers,
+                timeout=self.timeout
+            )
+            response.raise_for_status()
+            
+            # Parse response
+            result = response.json()
+            keywords_text = result.get('response', '').strip()
+            
+            if not keywords_text:
+                return {
+                    'keywords': [],
+                    'success': False,
+                    'error': 'Ollama returned empty response',
+                    'duration': time.time() - start_time
+                }
+            
+            # Parse keywords from response
+            keywords = [k.strip() for k in keywords_text.split(',')]
+            keywords = [k for k in keywords if k and len(k) > 2][:max_keywords]
+            
+            return {
+                'keywords': keywords,
+                'success': True,
+                'error': None,
+                'duration': time.time() - start_time
+            }
+            
+        except requests.exceptions.Timeout:
+            return {
+                'keywords': [],
+                'success': False,
+                'error': f"Request timed out after {self.timeout}s",
+                'duration': time.time() - start_time
+            }
+        except Exception as e:
+            return {
+                'keywords': [],
+                'success': False,
+                'error': str(e),
+                'duration': time.time() - start_time
+            }
+

 if __name__ == '__main__':
    # Quick test
--- a/tests/backend/test_personalization_system.py
+++ b/tests/backend/test_personalization_system.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test suite for the personalization system.
+Tests all 4 phases: keyword extraction, click tracking, interest profiling, and personalization.
+"""
+
+import sys
+from pymongo import MongoClient
+from datetime import datetime
+
+# Import services
+from services.tracking_service import create_newsletter_tracking
+from services.interest_profiling_service import (
+    update_user_interests,
+    get_user_interests,
+    get_top_interests,
+    build_interests_from_history
+)
+from services.personalization_service import (
+    calculate_article_score,
+    rank_articles_for_user,
+    select_personalized_articles,
+    get_personalization_stats
+)
+from config import Config
+
+# Connect to MongoDB
+client = MongoClient(Config.MONGODB_URI)
+db = client[Config.DB_NAME]
+
+articles_collection = db['articles']
+link_clicks_collection = db['link_clicks']
+user_interests_collection = db['user_interests']
+
+
+def test_phase1_keywords():
+    """Phase 1: Verify articles have keywords extracted"""
+    print("\n" + "="*60)
+    print("Phase 1: Keyword Extraction")
+    print("="*60)
+    
+    articles_with_keywords = articles_collection.count_documents({
+        'keywords': {'$exists': True, '$ne': []}
+    })
+    
+    if articles_with_keywords == 0:
+        print("❌ No articles with keywords found")
+        print("   Run a crawl first to extract keywords")
+        return False
+    
+    sample = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
+    print(f"✓ Found {articles_with_keywords} articles with keywords")
+    print(f"  Sample: {sample.get('title', 'N/A')[:50]}...")
+    print(f"  Keywords: {sample.get('keywords', [])[:3]}")
+    return True
+
+
+def test_phase2_tracking():
+    """Phase 2: Verify tracking includes keywords and metadata"""
+    print("\n" + "="*60)
+    print("Phase 2: Click Tracking Enhancement")
+    print("="*60)
+    
+    test_email = 'test-phase2@example.com'
+    
+    # Clean up
+    link_clicks_collection.delete_many({'subscriber_email': test_email})
+    
+    # Get article with keywords
+    article = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
+    
+    if not article:
+        print("❌ No articles found")
+        return False
+    
+    # Create tracking
+    tracking_data = create_newsletter_tracking(
+        newsletter_id='test-phase2',
+        subscriber_email=test_email,
+        article_links=[{
+            'url': article['link'],
+            'title': article.get('title', '')
+        }]
+    )
+    
+    # Verify tracking record
+    tracking_id = list(tracking_data['link_tracking_map'].values())[0]
+    tracking_record = link_clicks_collection.find_one({'tracking_id': tracking_id})
+    
+    has_metadata = (
+        tracking_record.get('article_id') is not None and
+        tracking_record.get('category') is not None and
+        len(tracking_record.get('keywords', [])) > 0
+    )
+    
+    # Clean up
+    link_clicks_collection.delete_many({'subscriber_email': test_email})
+    db['newsletter_sends'].delete_many({'subscriber_email': test_email})
+    
+    if has_metadata:
+        print(f"✓ Tracking records include metadata")
+        print(f"  Article ID: {tracking_record.get('article_id')}")
+        print(f"  Category: {tracking_record.get('category')}")
+        print(f"  Keywords: {len(tracking_record.get('keywords', []))} keywords")
+        return True
+    else:
+        print("❌ Tracking records missing metadata")
+        return False
+
+
+def test_phase3_profiling():
+    """Phase 3: Verify interest profiles are built from clicks"""
+    print("\n" + "="*60)
+    print("Phase 3: User Interest Profiling")
+    print("="*60)
+    
+    test_email = 'test-phase3@example.com'
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    # Create profile
+    update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
+    update_user_interests(test_email, ['Transportation', 'Munich'], 'local')
+    
+    # Verify profile
+    profile = get_user_interests(test_email)
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    if profile and profile['total_clicks'] == 2:
+        print(f"✓ Interest profile created")
+        print(f"  Total clicks: {profile['total_clicks']}")
+        print(f"  Categories: {len(profile.get('categories', {}))}")
+        print(f"  Keywords: {len(profile.get('keywords', {}))}")
+        return True
+    else:
+        print("❌ Interest profile not created correctly")
+        return False
+
+
+def test_phase4_personalization():
+    """Phase 4: Verify articles are ranked by user interests"""
+    print("\n" + "="*60)
+    print("Phase 4: Personalized Newsletter Generation")
+    print("="*60)
+    
+    test_email = 'test-phase4@example.com'
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    # Get articles
+    articles = list(articles_collection.find(
+        {'keywords': {'$exists': True, '$ne': []}},
+        limit=5
+    ))
+    
+    if len(articles) < 3:
+        print("❌ Not enough articles found")
+        return False
+    
+    # Create profile
+    update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
+    
+    # Rank articles
+    ranked = rank_articles_for_user(articles, test_email)
+    
+    # Select personalized
+    selected = select_personalized_articles(articles, test_email, max_articles=3)
+    
+    # Clean up
+    user_interests_collection.delete_many({'email': test_email})
+    
+    has_scores = all('personalization_score' in a for a in selected)
+    
+    if has_scores and len(selected) > 0:
+        print(f"✓ Articles ranked and selected")
+        print(f"  Total ranked: {len(ranked)}")
+        print(f"  Selected: {len(selected)}")
+        print(f"  Top score: {selected[0].get('personalization_score', 0):.3f}")
+        return True
+    else:
+        print("❌ Personalization failed")
+        return False
+
+
+def main():
+    """Run all personalization tests"""
+    print("\n" + "="*60)
+    print("PERSONALIZATION SYSTEM TEST SUITE")
+    print("="*60)
+    
+    results = {
+        'Phase 1: Keyword Extraction': test_phase1_keywords(),
+        'Phase 2: Click Tracking': test_phase2_tracking(),
+        'Phase 3: Interest Profiling': test_phase3_profiling(),
+        'Phase 4: Personalization': test_phase4_personalization()
+    }
+    
+    print("\n" + "="*60)
+    print("TEST RESULTS")
+    print("="*60)
+    
+    for phase, passed in results.items():
+        status = "✅ PASS" if passed else "❌ FAIL"
+        print(f"{status} - {phase}")
+    
+    all_passed = all(results.values())
+    
+    if all_passed:
+        print("\n🎉 All personalization tests PASSED!")
+        return 0
+    else:
+        print("\n❌ Some tests FAILED")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())