This commit is contained in:
2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python3
"""
Transport Crawler Worker - Listens to Redis queue and processes crawl tasks
"""
import redis
import json
import os
import time
from crawler_service import run_crawler
REDIS_URL = os.getenv('REDIS_URL', 'redis://redis:6379')
QUEUE_NAME = 'transport_crawl_queue'
def get_redis_client():
"""Get Redis client"""
return redis.from_url(REDIS_URL, decode_responses=True)
def process_crawl_task(message):
"""Process a crawl task"""
try:
print(f"\n📨 Received task: {message.get('task')}")
print(f" Timestamp: {message.get('timestamp')}")
# Run the crawler
result = run_crawler()
print(f"✅ Task completed: {result.get('total_disruptions')} disruptions found")
return True
except Exception as e:
print(f"❌ Task failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Main worker loop"""
print("="*70)
print("🚇 Transport Crawler Worker Starting")
print("="*70)
print(f"Redis URL: {REDIS_URL}")
print(f"Queue: {QUEUE_NAME}")
print("Waiting for tasks...")
print("="*70)
r = get_redis_client()
while True:
try:
# Block and wait for messages (timeout 1 second)
result = r.brpop(QUEUE_NAME, timeout=1)
if result:
queue_name, message_json = result
message = json.loads(message_json)
process_crawl_task(message)
except KeyboardInterrupt:
print("\n\n👋 Worker stopped by user")
break
except Exception as e:
print(f"\n❌ Worker error: {e}")
time.sleep(5) # Wait before retrying
if __name__ == '__main__':
main()