This commit is contained in:
2025-11-11 14:09:21 +01:00
parent bcd0a10576
commit 1075a91eac
57 changed files with 5598 additions and 1366 deletions

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python
"""
Test script to verify crawler functionality
"""
from crawler_service import extract_article_content, get_active_rss_feeds
import sys
def test_content_extraction():
"""Test content extraction from a sample URL"""
print("Testing content extraction...")
# Test with a simple news site
test_url = "https://www.bbc.com/news"
print(f"Extracting content from: {test_url}")
result = extract_article_content(test_url, timeout=10)
if result:
print("✓ Content extraction successful!")
print(f" Title: {result.get('title', 'N/A')[:50]}...")
print(f" Content length: {len(result.get('content', ''))} chars")
print(f" Word count: {result.get('word_count', 0)}")
return True
else:
print("✗ Content extraction failed")
return False
def test_database_connection():
"""Test MongoDB connection"""
print("\nTesting database connection...")
try:
feeds = get_active_rss_feeds()
print(f"✓ Database connection successful!")
print(f" Found {len(feeds)} active RSS feed(s)")
if feeds:
print("\n Active feeds:")
for feed in feeds:
print(f" - {feed['name']}: {feed['url']}")
else:
print("\n ⚠ No active feeds found. Add feeds via the backend API:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Test Feed\", \"url\": \"https://example.com/rss\"}'")
return True
except Exception as e:
print(f"✗ Database connection failed: {e}")
return False
def main():
print("="*60)
print("News Crawler - Test Suite")
print("="*60 + "\n")
# Test database connection
db_ok = test_database_connection()
# Test content extraction
extract_ok = test_content_extraction()
print("\n" + "="*60)
print("Test Results:")
print(f" Database Connection: {'✓ PASS' if db_ok else '✗ FAIL'}")
print(f" Content Extraction: {'✓ PASS' if extract_ok else '✗ FAIL'}")
print("="*60 + "\n")
if db_ok and extract_ok:
print("✓ All tests passed! Crawler is ready to use.")
print("\nRun the crawler with:")
print(" python crawler_service.py")
return 0
else:
print("✗ Some tests failed. Please check the errors above.")
return 1
if __name__ == '__main__':
sys.exit(main())