#!/usr/bin/env python """ Test script to verify crawler functionality """ from crawler_service import extract_article_content, get_active_rss_feeds import sys def test_content_extraction(): """Test content extraction from a sample URL""" print("Testing content extraction...") # Test with a simple news site test_url = "https://www.bbc.com/news" print(f"Extracting content from: {test_url}") result = extract_article_content(test_url, timeout=10) if result: print("✓ Content extraction successful!") print(f" Title: {result.get('title', 'N/A')[:50]}...") print(f" Content length: {len(result.get('content', ''))} chars") print(f" Word count: {result.get('word_count', 0)}") return True else: print("✗ Content extraction failed") return False def test_database_connection(): """Test MongoDB connection""" print("\nTesting database connection...") try: feeds = get_active_rss_feeds() print(f"✓ Database connection successful!") print(f" Found {len(feeds)} active RSS feed(s)") if feeds: print("\n Active feeds:") for feed in feeds: print(f" - {feed['name']}: {feed['url']}") else: print("\n ⚠ No active feeds found. Add feeds via the backend API:") print(" curl -X POST http://localhost:5001/api/rss-feeds \\") print(" -H 'Content-Type: application/json' \\") print(" -d '{\"name\": \"Test Feed\", \"url\": \"https://example.com/rss\"}'") return True except Exception as e: print(f"✗ Database connection failed: {e}") return False def main(): print("="*60) print("News Crawler - Test Suite") print("="*60 + "\n") # Test database connection db_ok = test_database_connection() # Test content extraction extract_ok = test_content_extraction() print("\n" + "="*60) print("Test Results:") print(f" Database Connection: {'✓ PASS' if db_ok else '✗ FAIL'}") print(f" Content Extraction: {'✓ PASS' if extract_ok else '✗ FAIL'}") print("="*60 + "\n") if db_ok and extract_ok: print("✓ All tests passed! Crawler is ready to use.") print("\nRun the crawler with:") print(" python crawler_service.py") return 0 else: print("✗ Some tests failed. Please check the errors above.") return 1 if __name__ == '__main__': sys.exit(main())