update
This commit is contained in:
@@ -12,7 +12,8 @@ admin_bp = Blueprint('admin', __name__)
|
||||
@admin_bp.route('/api/admin/trigger-crawl', methods=['POST'])
|
||||
def trigger_crawl():
|
||||
"""
|
||||
Manually trigger the news crawler
|
||||
Manually trigger the news crawler asynchronously via Redis queue
|
||||
Uses Redis message queue for non-blocking execution
|
||||
|
||||
Request body (optional):
|
||||
{
|
||||
@@ -20,6 +21,9 @@ def trigger_crawl():
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import redis
|
||||
import json
|
||||
|
||||
# Handle both JSON and empty body
|
||||
try:
|
||||
data = request.get_json(silent=True) or {}
|
||||
@@ -35,41 +39,29 @@ def trigger_crawl():
|
||||
'error': 'max_articles must be an integer between 1 and 100'
|
||||
}), 400
|
||||
|
||||
# Execute crawler in crawler container using docker exec
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['docker', 'exec', 'munich-news-crawler', 'python', 'crawler_service.py', str(max_articles)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300 # 5 minute timeout
|
||||
)
|
||||
|
||||
# Check result
|
||||
success = result.returncode == 0
|
||||
|
||||
return jsonify({
|
||||
'success': success,
|
||||
'message': f'Crawler {"executed successfully" if success else "failed"}',
|
||||
'max_articles': max_articles,
|
||||
'output': result.stdout[-1000:] if result.stdout else '', # Last 1000 chars
|
||||
'errors': result.stderr[-500:] if result.stderr else ''
|
||||
}), 200 if success else 500
|
||||
|
||||
except FileNotFoundError:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'Docker command not found. Make sure Docker is installed and the socket is mounted.'
|
||||
}), 500
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
# Get Redis client
|
||||
redis_url = os.getenv('REDIS_URL', 'redis://redis:6379')
|
||||
r = redis.from_url(redis_url, decode_responses=True)
|
||||
|
||||
# Publish message to Redis queue
|
||||
message = {
|
||||
'task': 'crawl_news',
|
||||
'max_articles': max_articles,
|
||||
'timestamp': str(os.times())
|
||||
}
|
||||
r.lpush('news_crawl_queue', json.dumps(message))
|
||||
|
||||
# Return immediately without waiting
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': 'Crawler timed out after 5 minutes'
|
||||
}), 500
|
||||
'success': True,
|
||||
'message': 'News crawl task queued',
|
||||
'max_articles': max_articles
|
||||
}), 202 # 202 Accepted
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f'Failed to run crawler: {str(e)}'
|
||||
'error': f'Failed to queue news crawl: {str(e)}'
|
||||
}), 500
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user