This commit is contained in:
2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions

View File

@@ -10,6 +10,7 @@ from routes.newsletter_routes import newsletter_bp
from routes.tracking_routes import tracking_bp
from routes.analytics_routes import analytics_bp
from routes.admin_routes import admin_bp
from routes.transport_routes import transport_bp
# Initialize Flask app
app = Flask(__name__)
@@ -27,6 +28,7 @@ app.register_blueprint(newsletter_bp)
app.register_blueprint(tracking_bp)
app.register_blueprint(analytics_bp)
app.register_blueprint(admin_bp)
app.register_blueprint(transport_bp)
# Health check endpoint
@app.route('/health')

View File

@@ -5,4 +5,5 @@ python-dotenv==1.0.0
pymongo==4.6.1
requests==2.31.0
Jinja2==3.1.2
redis==5.0.1

View File

@@ -12,7 +12,8 @@ admin_bp = Blueprint('admin', __name__)
@admin_bp.route('/api/admin/trigger-crawl', methods=['POST'])
def trigger_crawl():
"""
Manually trigger the news crawler
Manually trigger the news crawler asynchronously via Redis queue
Uses Redis message queue for non-blocking execution
Request body (optional):
{
@@ -20,6 +21,9 @@ def trigger_crawl():
}
"""
try:
import redis
import json
# Handle both JSON and empty body
try:
data = request.get_json(silent=True) or {}
@@ -35,41 +39,29 @@ def trigger_crawl():
'error': 'max_articles must be an integer between 1 and 100'
}), 400
# Execute crawler in crawler container using docker exec
try:
result = subprocess.run(
['docker', 'exec', 'munich-news-crawler', 'python', 'crawler_service.py', str(max_articles)],
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
# Check result
success = result.returncode == 0
return jsonify({
'success': success,
'message': f'Crawler {"executed successfully" if success else "failed"}',
'max_articles': max_articles,
'output': result.stdout[-1000:] if result.stdout else '', # Last 1000 chars
'errors': result.stderr[-500:] if result.stderr else ''
}), 200 if success else 500
except FileNotFoundError:
return jsonify({
'success': False,
'error': 'Docker command not found. Make sure Docker is installed and the socket is mounted.'
}), 500
except subprocess.TimeoutExpired:
# Get Redis client
redis_url = os.getenv('REDIS_URL', 'redis://redis:6379')
r = redis.from_url(redis_url, decode_responses=True)
# Publish message to Redis queue
message = {
'task': 'crawl_news',
'max_articles': max_articles,
'timestamp': str(os.times())
}
r.lpush('news_crawl_queue', json.dumps(message))
# Return immediately without waiting
return jsonify({
'success': False,
'error': 'Crawler timed out after 5 minutes'
}), 500
'success': True,
'message': 'News crawl task queued',
'max_articles': max_articles
}), 202 # 202 Accepted
except Exception as e:
return jsonify({
'success': False,
'error': f'Failed to run crawler: {str(e)}'
'error': f'Failed to queue news crawl: {str(e)}'
}), 500

View File

@@ -0,0 +1,74 @@
from flask import Blueprint, jsonify
from database import db
import redis
import os
import json
transport_bp = Blueprint('transport', __name__)
REDIS_URL = os.getenv('REDIS_URL', 'redis://redis:6379')
def get_redis_client():
"""Get Redis client"""
return redis.from_url(REDIS_URL, decode_responses=True)
@transport_bp.route('/api/transport/crawl', methods=['POST'])
def trigger_transport_crawl():
"""Trigger transport disruption crawl asynchronously via Redis queue"""
try:
r = get_redis_client()
# Publish message to Redis queue
message = {
'task': 'crawl_transport',
'timestamp': str(os.times())
}
r.lpush('transport_crawl_queue', json.dumps(message))
# Return immediately without waiting
return jsonify({
'status': 'success',
'message': 'Transport crawl task queued'
}), 202 # 202 Accepted
except Exception as e:
return jsonify({
'status': 'error',
'message': 'Failed to queue transport crawl',
'details': str(e)
}), 500
@transport_bp.route('/api/transport/disruptions', methods=['GET'])
def get_transport_disruptions():
"""Get current transport disruptions from MongoDB"""
try:
collection = db['transport_alerts']
# Get active disruptions
disruptions = list(collection.find(
{'is_active': True},
{'_id': 0}
).sort('updated_at', -1))
# Convert datetime to ISO format
for d in disruptions:
if d.get('start_time'):
d['start_time'] = d['start_time'].isoformat()
if d.get('end_time'):
d['end_time'] = d['end_time'].isoformat()
if d.get('updated_at'):
d['updated_at'] = d['updated_at'].isoformat()
return jsonify({
'total': len(disruptions),
'disruptions': disruptions
}), 200
except Exception as e:
return jsonify({
'error': 'Failed to fetch disruptions from database',
'details': str(e)
}), 500