97 lines
2.8 KiB
Python
97 lines
2.8 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Quick test script - Run from project root with backend venv activated
|
|
Usage:
|
|
cd /path/to/munich-news
|
|
source backend/venv/bin/activate # or backend/venv/Scripts/activate on Windows
|
|
python test_feeds_quick.py
|
|
"""
|
|
import sys
|
|
sys.path.insert(0, 'backend')
|
|
|
|
from pymongo import MongoClient
|
|
from config import Config
|
|
import feedparser
|
|
from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date
|
|
|
|
print("="*80)
|
|
print("RSS Feed Test - Checking Database Feeds")
|
|
print("="*80)
|
|
|
|
# Connect to database
|
|
client = MongoClient(Config.MONGODB_URI)
|
|
db = client[Config.DB_NAME]
|
|
|
|
# Get RSS feeds
|
|
feeds = list(db['rss_feeds'].find())
|
|
|
|
if not feeds:
|
|
print("\n❌ No RSS feeds in database!")
|
|
print("\nAdd a feed first:")
|
|
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
|
|
print(" -H 'Content-Type: application/json' \\")
|
|
print(" -d '{\"name\": \"Test Feed\", \"url\": \"https://rss.sueddeutsche.de/rss/Politik\"}'")
|
|
sys.exit(1)
|
|
|
|
print(f"\n✓ Found {len(feeds)} feed(s) in database\n")
|
|
|
|
# Test each feed
|
|
for feed_doc in feeds:
|
|
name = feed_doc.get('name', 'Unknown')
|
|
url = feed_doc.get('url', '')
|
|
active = feed_doc.get('active', True)
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"Feed: {name}")
|
|
print(f"URL: {url}")
|
|
print(f"Active: {active}")
|
|
print('='*80)
|
|
|
|
if not active:
|
|
print("⏭ Skipping (inactive)")
|
|
continue
|
|
|
|
try:
|
|
# Parse RSS
|
|
print("Fetching RSS feed...")
|
|
feed = feedparser.parse(url)
|
|
|
|
if not feed.entries:
|
|
print("❌ No entries found")
|
|
continue
|
|
|
|
print(f"✓ Found {len(feed.entries)} entries\n")
|
|
|
|
# Test first 3 entries
|
|
for i, entry in enumerate(feed.entries[:3], 1):
|
|
print(f"\n--- Entry {i} ---")
|
|
title = entry.get('title', 'No title')
|
|
print(f"Title: {title[:70]}")
|
|
|
|
# Test URL extraction
|
|
article_url = extract_article_url(entry)
|
|
if article_url:
|
|
print(f"✓ URL extracted: {article_url}")
|
|
else:
|
|
print(f"❌ Could not extract URL")
|
|
print(f" Available fields: {list(entry.keys())[:10]}")
|
|
print(f" link: {entry.get('link', 'N/A')}")
|
|
print(f" guid: {entry.get('guid', 'N/A')}")
|
|
|
|
# Test summary
|
|
summary = extract_article_summary(entry)
|
|
if summary:
|
|
print(f"✓ Summary: {summary[:80]}...")
|
|
|
|
# Test date
|
|
pub_date = extract_published_date(entry)
|
|
if pub_date:
|
|
print(f"✓ Date: {pub_date}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
print("\n" + "="*80)
|
|
print("Test complete!")
|
|
print("="*80)
|