#!/usr/bin/env python3 """ News Crawler Worker - Listens to Redis queue and processes crawl tasks """ import redis import json import os import time from crawler_service import crawl_all_feeds REDIS_URL = os.getenv('REDIS_URL', 'redis://redis:6379') QUEUE_NAME = 'news_crawl_queue' def get_redis_client(): """Get Redis client""" return redis.from_url(REDIS_URL, decode_responses=True) def process_crawl_task(message): """Process a crawl task""" try: max_articles = message.get('max_articles', 10) print(f"\nšŸ“Ø Received task: {message.get('task')}") print(f" Max articles per feed: {max_articles}") print(f" Timestamp: {message.get('timestamp')}") # Run the crawler result = crawl_all_feeds(max_articles_per_feed=max_articles) print(f"āœ… Task completed: {result.get('total_articles_crawled')} articles crawled") return True except Exception as e: print(f"āŒ Task failed: {e}") import traceback traceback.print_exc() return False def main(): """Main worker loop""" print("="*70) print("šŸ“° News Crawler Worker Starting") print("="*70) print(f"Redis URL: {REDIS_URL}") print(f"Queue: {QUEUE_NAME}") print("Waiting for tasks...") print("="*70) r = get_redis_client() while True: try: # Block and wait for messages (timeout 1 second) result = r.brpop(QUEUE_NAME, timeout=1) if result: queue_name, message_json = result message = json.loads(message_json) process_crawl_task(message) except KeyboardInterrupt: print("\n\nšŸ‘‹ Worker stopped by user") break except Exception as e: print(f"\nāŒ Worker error: {e}") time.sleep(5) # Wait before retrying if __name__ == '__main__': main()