#!/usr/bin/env python3 """ Comprehensive test suite for the personalization system. Tests all 4 phases: keyword extraction, click tracking, interest profiling, and personalization. """ import sys from pymongo import MongoClient from datetime import datetime # Import services from services.tracking_service import create_newsletter_tracking from services.interest_profiling_service import ( update_user_interests, get_user_interests, get_top_interests, build_interests_from_history ) from services.personalization_service import ( calculate_article_score, rank_articles_for_user, select_personalized_articles, get_personalization_stats ) from config import Config # Connect to MongoDB client = MongoClient(Config.MONGODB_URI) db = client[Config.DB_NAME] articles_collection = db['articles'] link_clicks_collection = db['link_clicks'] user_interests_collection = db['user_interests'] def test_phase1_keywords(): """Phase 1: Verify articles have keywords extracted""" print("\n" + "="*60) print("Phase 1: Keyword Extraction") print("="*60) articles_with_keywords = articles_collection.count_documents({ 'keywords': {'$exists': True, '$ne': []} }) if articles_with_keywords == 0: print("❌ No articles with keywords found") print(" Run a crawl first to extract keywords") return False sample = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}}) print(f"✓ Found {articles_with_keywords} articles with keywords") print(f" Sample: {sample.get('title', 'N/A')[:50]}...") print(f" Keywords: {sample.get('keywords', [])[:3]}") return True def test_phase2_tracking(): """Phase 2: Verify tracking includes keywords and metadata""" print("\n" + "="*60) print("Phase 2: Click Tracking Enhancement") print("="*60) test_email = 'test-phase2@example.com' # Clean up link_clicks_collection.delete_many({'subscriber_email': test_email}) # Get article with keywords article = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}}) if not article: print("❌ No articles found") return False # Create tracking tracking_data = create_newsletter_tracking( newsletter_id='test-phase2', subscriber_email=test_email, article_links=[{ 'url': article['link'], 'title': article.get('title', '') }] ) # Verify tracking record tracking_id = list(tracking_data['link_tracking_map'].values())[0] tracking_record = link_clicks_collection.find_one({'tracking_id': tracking_id}) has_metadata = ( tracking_record.get('article_id') is not None and tracking_record.get('category') is not None and len(tracking_record.get('keywords', [])) > 0 ) # Clean up link_clicks_collection.delete_many({'subscriber_email': test_email}) db['newsletter_sends'].delete_many({'subscriber_email': test_email}) if has_metadata: print(f"✓ Tracking records include metadata") print(f" Article ID: {tracking_record.get('article_id')}") print(f" Category: {tracking_record.get('category')}") print(f" Keywords: {len(tracking_record.get('keywords', []))} keywords") return True else: print("❌ Tracking records missing metadata") return False def test_phase3_profiling(): """Phase 3: Verify interest profiles are built from clicks""" print("\n" + "="*60) print("Phase 3: User Interest Profiling") print("="*60) test_email = 'test-phase3@example.com' # Clean up user_interests_collection.delete_many({'email': test_email}) # Create profile update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports') update_user_interests(test_email, ['Transportation', 'Munich'], 'local') # Verify profile profile = get_user_interests(test_email) # Clean up user_interests_collection.delete_many({'email': test_email}) if profile and profile['total_clicks'] == 2: print(f"✓ Interest profile created") print(f" Total clicks: {profile['total_clicks']}") print(f" Categories: {len(profile.get('categories', {}))}") print(f" Keywords: {len(profile.get('keywords', {}))}") return True else: print("❌ Interest profile not created correctly") return False def test_phase4_personalization(): """Phase 4: Verify articles are ranked by user interests""" print("\n" + "="*60) print("Phase 4: Personalized Newsletter Generation") print("="*60) test_email = 'test-phase4@example.com' # Clean up user_interests_collection.delete_many({'email': test_email}) # Get articles articles = list(articles_collection.find( {'keywords': {'$exists': True, '$ne': []}}, limit=5 )) if len(articles) < 3: print("❌ Not enough articles found") return False # Create profile update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports') # Rank articles ranked = rank_articles_for_user(articles, test_email) # Select personalized selected = select_personalized_articles(articles, test_email, max_articles=3) # Clean up user_interests_collection.delete_many({'email': test_email}) has_scores = all('personalization_score' in a for a in selected) if has_scores and len(selected) > 0: print(f"✓ Articles ranked and selected") print(f" Total ranked: {len(ranked)}") print(f" Selected: {len(selected)}") print(f" Top score: {selected[0].get('personalization_score', 0):.3f}") return True else: print("❌ Personalization failed") return False def main(): """Run all personalization tests""" print("\n" + "="*60) print("PERSONALIZATION SYSTEM TEST SUITE") print("="*60) results = { 'Phase 1: Keyword Extraction': test_phase1_keywords(), 'Phase 2: Click Tracking': test_phase2_tracking(), 'Phase 3: Interest Profiling': test_phase3_profiling(), 'Phase 4: Personalization': test_phase4_personalization() } print("\n" + "="*60) print("TEST RESULTS") print("="*60) for phase, passed in results.items(): status = "✅ PASS" if passed else "❌ FAIL" print(f"{status} - {phase}") all_passed = all(results.values()) if all_passed: print("\n🎉 All personalization tests PASSED!") return 0 else: print("\n❌ Some tests FAILED") return 1 if __name__ == '__main__': sys.exit(main())