This commit is contained in:
2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions

View File

@@ -12,7 +12,8 @@ admin_bp = Blueprint('admin', __name__)
@admin_bp.route('/api/admin/trigger-crawl', methods=['POST'])
def trigger_crawl():
"""
Manually trigger the news crawler
Manually trigger the news crawler asynchronously via Redis queue
Uses Redis message queue for non-blocking execution
Request body (optional):
{
@@ -20,6 +21,9 @@ def trigger_crawl():
}
"""
try:
import redis
import json
# Handle both JSON and empty body
try:
data = request.get_json(silent=True) or {}
@@ -35,41 +39,29 @@ def trigger_crawl():
'error': 'max_articles must be an integer between 1 and 100'
}), 400
# Execute crawler in crawler container using docker exec
try:
result = subprocess.run(
['docker', 'exec', 'munich-news-crawler', 'python', 'crawler_service.py', str(max_articles)],
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
# Check result
success = result.returncode == 0
return jsonify({
'success': success,
'message': f'Crawler {"executed successfully" if success else "failed"}',
'max_articles': max_articles,
'output': result.stdout[-1000:] if result.stdout else '', # Last 1000 chars
'errors': result.stderr[-500:] if result.stderr else ''
}), 200 if success else 500
except FileNotFoundError:
return jsonify({
'success': False,
'error': 'Docker command not found. Make sure Docker is installed and the socket is mounted.'
}), 500
except subprocess.TimeoutExpired:
# Get Redis client
redis_url = os.getenv('REDIS_URL', 'redis://redis:6379')
r = redis.from_url(redis_url, decode_responses=True)
# Publish message to Redis queue
message = {
'task': 'crawl_news',
'max_articles': max_articles,
'timestamp': str(os.times())
}
r.lpush('news_crawl_queue', json.dumps(message))
# Return immediately without waiting
return jsonify({
'success': False,
'error': 'Crawler timed out after 5 minutes'
}), 500
'success': True,
'message': 'News crawl task queued',
'max_articles': max_articles
}), 202 # 202 Accepted
except Exception as e:
return jsonify({
'success': False,
'error': f'Failed to run crawler: {str(e)}'
'error': f'Failed to queue news crawl: {str(e)}'
}), 500