This commit is contained in:
2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions

72
news_crawler/worker.py Normal file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""
News Crawler Worker - Listens to Redis queue and processes crawl tasks
"""
import redis
import json
import os
import time
from crawler_service import crawl_all_feeds
REDIS_URL = os.getenv('REDIS_URL', 'redis://redis:6379')
QUEUE_NAME = 'news_crawl_queue'
def get_redis_client():
"""Get Redis client"""
return redis.from_url(REDIS_URL, decode_responses=True)
def process_crawl_task(message):
"""Process a crawl task"""
try:
max_articles = message.get('max_articles', 10)
print(f"\n📨 Received task: {message.get('task')}")
print(f" Max articles per feed: {max_articles}")
print(f" Timestamp: {message.get('timestamp')}")
# Run the crawler
result = crawl_all_feeds(max_articles_per_feed=max_articles)
print(f"✅ Task completed: {result.get('total_articles_crawled')} articles crawled")
return True
except Exception as e:
print(f"❌ Task failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Main worker loop"""
print("="*70)
print("📰 News Crawler Worker Starting")
print("="*70)
print(f"Redis URL: {REDIS_URL}")
print(f"Queue: {QUEUE_NAME}")
print("Waiting for tasks...")
print("="*70)
r = get_redis_client()
while True:
try:
# Block and wait for messages (timeout 1 second)
result = r.brpop(QUEUE_NAME, timeout=1)
if result:
queue_name, message_json = result
message = json.loads(message_json)
process_crawl_task(message)
except KeyboardInterrupt:
print("\n\n👋 Worker stopped by user")
break
except Exception as e:
print(f"\n❌ Worker error: {e}")
time.sleep(5) # Wait before retrying
if __name__ == '__main__':
main()