73 lines
1.9 KiB
Python
73 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
News Crawler Worker - Listens to Redis queue and processes crawl tasks
|
|
"""
|
|
import redis
|
|
import json
|
|
import os
|
|
import time
|
|
from crawler_service import crawl_all_feeds
|
|
|
|
REDIS_URL = os.getenv('REDIS_URL', 'redis://redis:6379')
|
|
QUEUE_NAME = 'news_crawl_queue'
|
|
|
|
|
|
def get_redis_client():
|
|
"""Get Redis client"""
|
|
return redis.from_url(REDIS_URL, decode_responses=True)
|
|
|
|
|
|
def process_crawl_task(message):
|
|
"""Process a crawl task"""
|
|
try:
|
|
max_articles = message.get('max_articles', 10)
|
|
print(f"\n📨 Received task: {message.get('task')}")
|
|
print(f" Max articles per feed: {max_articles}")
|
|
print(f" Timestamp: {message.get('timestamp')}")
|
|
|
|
# Run the crawler
|
|
result = crawl_all_feeds(max_articles_per_feed=max_articles)
|
|
|
|
print(f"✅ Task completed: {result.get('total_articles_crawled')} articles crawled")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Task failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main worker loop"""
|
|
print("="*70)
|
|
print("📰 News Crawler Worker Starting")
|
|
print("="*70)
|
|
print(f"Redis URL: {REDIS_URL}")
|
|
print(f"Queue: {QUEUE_NAME}")
|
|
print("Waiting for tasks...")
|
|
print("="*70)
|
|
|
|
r = get_redis_client()
|
|
|
|
while True:
|
|
try:
|
|
# Block and wait for messages (timeout 1 second)
|
|
result = r.brpop(QUEUE_NAME, timeout=1)
|
|
|
|
if result:
|
|
queue_name, message_json = result
|
|
message = json.loads(message_json)
|
|
process_crawl_task(message)
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\n👋 Worker stopped by user")
|
|
break
|
|
except Exception as e:
|
|
print(f"\n❌ Worker error: {e}")
|
|
time.sleep(5) # Wait before retrying
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|