update
This commit is contained in:
221
tests/backend/test_personalization_system.py
Normal file
221
tests/backend/test_personalization_system.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comprehensive test suite for the personalization system.
|
||||
Tests all 4 phases: keyword extraction, click tracking, interest profiling, and personalization.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pymongo import MongoClient
|
||||
from datetime import datetime
|
||||
|
||||
# Import services
|
||||
from services.tracking_service import create_newsletter_tracking
|
||||
from services.interest_profiling_service import (
|
||||
update_user_interests,
|
||||
get_user_interests,
|
||||
get_top_interests,
|
||||
build_interests_from_history
|
||||
)
|
||||
from services.personalization_service import (
|
||||
calculate_article_score,
|
||||
rank_articles_for_user,
|
||||
select_personalized_articles,
|
||||
get_personalization_stats
|
||||
)
|
||||
from config import Config
|
||||
|
||||
# Connect to MongoDB
|
||||
client = MongoClient(Config.MONGODB_URI)
|
||||
db = client[Config.DB_NAME]
|
||||
|
||||
articles_collection = db['articles']
|
||||
link_clicks_collection = db['link_clicks']
|
||||
user_interests_collection = db['user_interests']
|
||||
|
||||
|
||||
def test_phase1_keywords():
|
||||
"""Phase 1: Verify articles have keywords extracted"""
|
||||
print("\n" + "="*60)
|
||||
print("Phase 1: Keyword Extraction")
|
||||
print("="*60)
|
||||
|
||||
articles_with_keywords = articles_collection.count_documents({
|
||||
'keywords': {'$exists': True, '$ne': []}
|
||||
})
|
||||
|
||||
if articles_with_keywords == 0:
|
||||
print("❌ No articles with keywords found")
|
||||
print(" Run a crawl first to extract keywords")
|
||||
return False
|
||||
|
||||
sample = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
|
||||
print(f"✓ Found {articles_with_keywords} articles with keywords")
|
||||
print(f" Sample: {sample.get('title', 'N/A')[:50]}...")
|
||||
print(f" Keywords: {sample.get('keywords', [])[:3]}")
|
||||
return True
|
||||
|
||||
|
||||
def test_phase2_tracking():
|
||||
"""Phase 2: Verify tracking includes keywords and metadata"""
|
||||
print("\n" + "="*60)
|
||||
print("Phase 2: Click Tracking Enhancement")
|
||||
print("="*60)
|
||||
|
||||
test_email = 'test-phase2@example.com'
|
||||
|
||||
# Clean up
|
||||
link_clicks_collection.delete_many({'subscriber_email': test_email})
|
||||
|
||||
# Get article with keywords
|
||||
article = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
|
||||
|
||||
if not article:
|
||||
print("❌ No articles found")
|
||||
return False
|
||||
|
||||
# Create tracking
|
||||
tracking_data = create_newsletter_tracking(
|
||||
newsletter_id='test-phase2',
|
||||
subscriber_email=test_email,
|
||||
article_links=[{
|
||||
'url': article['link'],
|
||||
'title': article.get('title', '')
|
||||
}]
|
||||
)
|
||||
|
||||
# Verify tracking record
|
||||
tracking_id = list(tracking_data['link_tracking_map'].values())[0]
|
||||
tracking_record = link_clicks_collection.find_one({'tracking_id': tracking_id})
|
||||
|
||||
has_metadata = (
|
||||
tracking_record.get('article_id') is not None and
|
||||
tracking_record.get('category') is not None and
|
||||
len(tracking_record.get('keywords', [])) > 0
|
||||
)
|
||||
|
||||
# Clean up
|
||||
link_clicks_collection.delete_many({'subscriber_email': test_email})
|
||||
db['newsletter_sends'].delete_many({'subscriber_email': test_email})
|
||||
|
||||
if has_metadata:
|
||||
print(f"✓ Tracking records include metadata")
|
||||
print(f" Article ID: {tracking_record.get('article_id')}")
|
||||
print(f" Category: {tracking_record.get('category')}")
|
||||
print(f" Keywords: {len(tracking_record.get('keywords', []))} keywords")
|
||||
return True
|
||||
else:
|
||||
print("❌ Tracking records missing metadata")
|
||||
return False
|
||||
|
||||
|
||||
def test_phase3_profiling():
|
||||
"""Phase 3: Verify interest profiles are built from clicks"""
|
||||
print("\n" + "="*60)
|
||||
print("Phase 3: User Interest Profiling")
|
||||
print("="*60)
|
||||
|
||||
test_email = 'test-phase3@example.com'
|
||||
|
||||
# Clean up
|
||||
user_interests_collection.delete_many({'email': test_email})
|
||||
|
||||
# Create profile
|
||||
update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
|
||||
update_user_interests(test_email, ['Transportation', 'Munich'], 'local')
|
||||
|
||||
# Verify profile
|
||||
profile = get_user_interests(test_email)
|
||||
|
||||
# Clean up
|
||||
user_interests_collection.delete_many({'email': test_email})
|
||||
|
||||
if profile and profile['total_clicks'] == 2:
|
||||
print(f"✓ Interest profile created")
|
||||
print(f" Total clicks: {profile['total_clicks']}")
|
||||
print(f" Categories: {len(profile.get('categories', {}))}")
|
||||
print(f" Keywords: {len(profile.get('keywords', {}))}")
|
||||
return True
|
||||
else:
|
||||
print("❌ Interest profile not created correctly")
|
||||
return False
|
||||
|
||||
|
||||
def test_phase4_personalization():
|
||||
"""Phase 4: Verify articles are ranked by user interests"""
|
||||
print("\n" + "="*60)
|
||||
print("Phase 4: Personalized Newsletter Generation")
|
||||
print("="*60)
|
||||
|
||||
test_email = 'test-phase4@example.com'
|
||||
|
||||
# Clean up
|
||||
user_interests_collection.delete_many({'email': test_email})
|
||||
|
||||
# Get articles
|
||||
articles = list(articles_collection.find(
|
||||
{'keywords': {'$exists': True, '$ne': []}},
|
||||
limit=5
|
||||
))
|
||||
|
||||
if len(articles) < 3:
|
||||
print("❌ Not enough articles found")
|
||||
return False
|
||||
|
||||
# Create profile
|
||||
update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
|
||||
|
||||
# Rank articles
|
||||
ranked = rank_articles_for_user(articles, test_email)
|
||||
|
||||
# Select personalized
|
||||
selected = select_personalized_articles(articles, test_email, max_articles=3)
|
||||
|
||||
# Clean up
|
||||
user_interests_collection.delete_many({'email': test_email})
|
||||
|
||||
has_scores = all('personalization_score' in a for a in selected)
|
||||
|
||||
if has_scores and len(selected) > 0:
|
||||
print(f"✓ Articles ranked and selected")
|
||||
print(f" Total ranked: {len(ranked)}")
|
||||
print(f" Selected: {len(selected)}")
|
||||
print(f" Top score: {selected[0].get('personalization_score', 0):.3f}")
|
||||
return True
|
||||
else:
|
||||
print("❌ Personalization failed")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all personalization tests"""
|
||||
print("\n" + "="*60)
|
||||
print("PERSONALIZATION SYSTEM TEST SUITE")
|
||||
print("="*60)
|
||||
|
||||
results = {
|
||||
'Phase 1: Keyword Extraction': test_phase1_keywords(),
|
||||
'Phase 2: Click Tracking': test_phase2_tracking(),
|
||||
'Phase 3: Interest Profiling': test_phase3_profiling(),
|
||||
'Phase 4: Personalization': test_phase4_personalization()
|
||||
}
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("TEST RESULTS")
|
||||
print("="*60)
|
||||
|
||||
for phase, passed in results.items():
|
||||
status = "✅ PASS" if passed else "❌ FAIL"
|
||||
print(f"{status} - {phase}")
|
||||
|
||||
all_passed = all(results.values())
|
||||
|
||||
if all_passed:
|
||||
print("\n🎉 All personalization tests PASSED!")
|
||||
return 0
|
||||
else:
|
||||
print("\n❌ Some tests FAILED")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user