""" Newsletter Personalization Service for Munich News Daily. Ranks and selects articles based on user interest profiles. """ from typing import Dict, List, Optional from datetime import datetime, timedelta from services.interest_profiling_service import get_user_interests def calculate_article_score( article: Dict, user_interests: Optional[Dict], category_weight: float = 0.4, keyword_weight: float = 0.6 ) -> float: """ Calculate personalization score for an article based on user interests. Score is calculated as: - Category match: 0-1.0 based on user's interest in the category - Keyword match: Average of user's interest in article keywords - Final score: (category_score * 0.4) + (keyword_score * 0.6) Args: article: Article dictionary with 'category' and 'keywords' fields user_interests: User interest profile (None for non-personalized) category_weight: Weight for category matching (default: 0.4) keyword_weight: Weight for keyword matching (default: 0.6) Returns: float: Personalization score between 0.0 and 1.0 """ # If no user interests, return neutral score if not user_interests: return 0.5 # Get article metadata article_category = article.get('category', 'general') article_keywords = article.get('keywords', []) # Calculate category score user_categories = user_interests.get('categories', {}) category_score = user_categories.get(article_category, 0.0) # Calculate keyword score (average of all matching keywords) user_keywords = user_interests.get('keywords', {}) keyword_scores = [] for keyword in article_keywords: if keyword in user_keywords: keyword_scores.append(user_keywords[keyword]) # Average keyword score (0.0 if no matches) keyword_score = sum(keyword_scores) / len(keyword_scores) if keyword_scores else 0.0 # Weighted final score final_score = (category_score * category_weight) + (keyword_score * keyword_weight) return round(final_score, 3) def rank_articles_for_user( articles: List[Dict], subscriber_email: str, personalization_ratio: float = 0.7 ) -> List[Dict]: """ Rank articles for a specific user based on their interests. Mixes personalized content with trending content to avoid filter bubbles. Args: articles: List of article dictionaries subscriber_email: Email address of the user personalization_ratio: Ratio of personalized vs trending (default: 0.7 = 70% personalized) Returns: list: Articles sorted by personalization score with score added """ # Get user interests user_interests = get_user_interests(subscriber_email) # Calculate score for each article scored_articles = [] for article in articles: score = calculate_article_score(article, user_interests) # Add score to article (don't modify original) article_with_score = article.copy() article_with_score['personalization_score'] = score scored_articles.append(article_with_score) # Sort by score (highest first) scored_articles.sort(key=lambda x: x['personalization_score'], reverse=True) return scored_articles def select_personalized_articles( articles: List[Dict], subscriber_email: str, max_articles: int = 10, personalization_ratio: float = 0.7, min_score_threshold: float = 0.1 ) -> List[Dict]: """ Select and rank articles for a personalized newsletter. Strategy: - Top N * personalization_ratio articles: Highest scoring (personalized) - Remaining articles: Most recent (trending/diverse content) - Ensures mix of personalized + diverse content Args: articles: List of available articles subscriber_email: Email address of the user max_articles: Maximum number of articles to include (default: 10) personalization_ratio: Ratio of personalized content (default: 0.7) min_score_threshold: Minimum score to consider personalized (default: 0.1) Returns: list: Selected articles with personalization scores """ if not articles: return [] # Rank all articles ranked_articles = rank_articles_for_user(articles, subscriber_email, personalization_ratio) # Calculate split num_personalized = int(max_articles * personalization_ratio) num_trending = max_articles - num_personalized # Get personalized articles (high scoring) personalized = [ a for a in ranked_articles if a['personalization_score'] >= min_score_threshold ][:num_personalized] # Get trending articles (most recent, not already selected) personalized_ids = {a.get('_id') for a in personalized} trending = [ a for a in ranked_articles if a.get('_id') not in personalized_ids ][:num_trending] # Combine: personalized first, then trending selected = personalized + trending # Ensure we don't exceed max_articles return selected[:max_articles] def get_personalization_explanation( article: Dict, user_interests: Optional[Dict] ) -> Dict[str, any]: """ Generate explanation for why an article was recommended. Useful for transparency and debugging. Args: article: Article dictionary user_interests: User interest profile Returns: dict: Explanation containing: - score: Overall personalization score - category_match: Category score - keyword_matches: List of matching keywords with scores - reason: Human-readable explanation """ if not user_interests: return { 'score': 0.5, 'category_match': 0.0, 'keyword_matches': [], 'reason': 'No personalization data available' } article_category = article.get('category', 'general') article_keywords = article.get('keywords', []) user_categories = user_interests.get('categories', {}) user_keywords = user_interests.get('keywords', {}) # Category match category_score = user_categories.get(article_category, 0.0) # Keyword matches keyword_matches = [] for keyword in article_keywords: if keyword in user_keywords: keyword_matches.append({ 'keyword': keyword, 'score': user_keywords[keyword] }) # Calculate overall score overall_score = calculate_article_score(article, user_interests) # Generate reason if overall_score >= 0.5: reason = f"High match with your interests in {article_category}" if keyword_matches: top_keywords = [m['keyword'] for m in keyword_matches[:2]] reason += f" and topics like {', '.join(top_keywords)}" elif overall_score >= 0.3: reason = f"Moderate match with your interests" else: reason = "Trending article for diverse content" return { 'score': overall_score, 'category_match': category_score, 'keyword_matches': keyword_matches, 'reason': reason } def get_personalization_stats( selected_articles: List[Dict], subscriber_email: str ) -> Dict[str, any]: """ Get statistics about personalization for a newsletter. Args: selected_articles: Articles selected for the newsletter subscriber_email: Email address of the user Returns: dict: Statistics containing: - total_articles: Number of articles - avg_score: Average personalization score - highly_personalized: Number of articles with score >= 0.5 - moderately_personalized: Number with score 0.3-0.5 - trending: Number with score < 0.3 """ if not selected_articles: return { 'total_articles': 0, 'avg_score': 0.0, 'highly_personalized': 0, 'moderately_personalized': 0, 'trending': 0 } scores = [a.get('personalization_score', 0.0) for a in selected_articles] avg_score = sum(scores) / len(scores) highly_personalized = sum(1 for s in scores if s >= 0.5) moderately_personalized = sum(1 for s in scores if 0.3 <= s < 0.5) trending = sum(1 for s in scores if s < 0.3) return { 'total_articles': len(selected_articles), 'avg_score': round(avg_score, 3), 'highly_personalized': highly_personalized, 'moderately_personalized': moderately_personalized, 'trending': trending } def batch_personalize_newsletters( articles: List[Dict], subscribers: List[str], max_articles_per_user: int = 10 ) -> Dict[str, List[Dict]]: """ Generate personalized article selections for multiple subscribers. Useful for batch newsletter generation. Args: articles: List of available articles subscribers: List of subscriber email addresses max_articles_per_user: Max articles per newsletter (default: 10) Returns: dict: Mapping of email -> personalized article list """ personalized_newsletters = {} for subscriber_email in subscribers: personalized_articles = select_personalized_articles( articles, subscriber_email, max_articles=max_articles_per_user ) personalized_newsletters[subscriber_email] = personalized_articles return personalized_newsletters