This commit is contained in:
2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Transport Crawler Service - Main orchestrator
Fetches disruptions from multiple sources and displays them
"""
from datetime import datetime
from mvg_api_client import MVGClient
from db_api_client import DBClient
def print_header():
"""Print header"""
print("\n" + "="*70)
print("🚇 Munich Transport Disruption Crawler")
print("="*70)
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)
def print_disruption_summary(all_disruptions):
"""Print summary of all disruptions"""
if not all_disruptions:
print("\n✅ No disruptions found - All lines operating normally!")
return
print(f"\n📊 SUMMARY: {len(all_disruptions)} Active Disruptions")
print("="*70)
# Group by type
by_type = {}
for d in all_disruptions:
dtype = d.get('type', 'unknown')
by_type[dtype] = by_type.get(dtype, 0) + 1
print("\nBy Type:")
for dtype, count in sorted(by_type.items()):
icon = {
'maintenance': '🔧',
'disruption': '⚠️',
'delay': '⏱️',
'info': ''
}.get(dtype, '')
print(f" {icon} {dtype.title()}: {count}")
# Group by source
by_source = {}
for d in all_disruptions:
source = d.get('source', 'unknown')
by_source[source] = by_source.get(source, 0) + 1
print("\nBy Source:")
for source, count in sorted(by_source.items()):
print(f"{source}: {count}")
def print_disruptions(disruptions, title):
"""Print disruptions in a formatted way"""
if not disruptions:
return
print(f"\n{title}")
print("-"*70)
for i, d in enumerate(disruptions, 1):
# Icon based on type
icon = {
'maintenance': '🔧',
'disruption': '⚠️',
'delay': '⏱️',
'info': ''
}.get(d.get('type', 'info'), '')
print(f"\n{icon} [{i}] {d.get('title', 'No title')}")
# Lines affected
lines = d.get('lines', [])
if lines:
line_str = ', '.join(lines)
print(f" 🚇 Lines: {line_str}")
# Time range
start = d.get('start_time')
end = d.get('end_time')
if start or end:
time_str = ""
if start:
time_str += f"From: {start.strftime('%d.%m %H:%M')}"
if end:
if time_str:
time_str += ""
time_str += f"Until: {end.strftime('%d.%m %H:%M')}"
print(f"{time_str}")
# Description
desc = d.get('description', '')
if desc:
# Truncate long descriptions
if len(desc) > 150:
desc = desc[:150] + "..."
print(f" 📝 {desc}")
# Severity
severity = d.get('severity', 'medium')
severity_icon = {
'high': '🔴',
'medium': '🟡',
'low': '🟢'
}.get(severity, '')
print(f" {severity_icon} Severity: {severity}")
def save_to_mongodb(disruptions):
"""Save disruptions to MongoDB"""
try:
from pymongo import MongoClient
import os
mongo_uri = os.getenv('MONGODB_URI', 'mongodb://admin:changeme@mongodb:27017/')
client = MongoClient(mongo_uri)
db = client['munich_news']
collection = db['transport_alerts']
print("\n💾 Saving to MongoDB...")
# Mark all existing alerts as inactive
collection.update_many({}, {'$set': {'is_active': False}})
# Insert or update current disruptions
saved_count = 0
for d in disruptions:
# Use disruption ID as unique identifier
collection.update_one(
{'alert_id': d['id']},
{
'$set': {
'alert_id': d['id'],
'title': d['title'],
'description': d['description'],
'lines': d['lines'],
'type': d['type'],
'severity': d['severity'],
'start_time': d['start_time'],
'end_time': d['end_time'],
'source': d['source'],
'is_active': True,
'updated_at': datetime.utcnow()
}
},
upsert=True
)
saved_count += 1
print(f"✓ Saved {saved_count} disruptions to MongoDB")
return True
except Exception as e:
print(f"✗ MongoDB error: {e}")
return False
def run_crawler():
"""Main crawler function"""
print_header()
all_disruptions = []
# 1. Fetch MVG disruptions (U-Bahn, Tram, Bus)
print("\n📡 Fetching data from sources...")
print("-"*70)
mvg_client = MVGClient()
mvg_disruptions = mvg_client.get_disruptions()
all_disruptions.extend(mvg_disruptions)
# 2. Fetch S-Bahn disruptions
db_client = DBClient()
sbahn_disruptions = db_client.get_sbahn_disruptions()
all_disruptions.extend(sbahn_disruptions)
# 3. Print summary
print_disruption_summary(all_disruptions)
# 4. Print detailed disruptions
if mvg_disruptions:
print_disruptions(mvg_disruptions, "\n🚇 MVG DISRUPTIONS (U-Bahn, Tram, Bus)")
if sbahn_disruptions:
print_disruptions(sbahn_disruptions, "\n🚆 S-BAHN DISRUPTIONS")
# 5. Output JSON
print("\n" + "="*70)
print("📄 JSON OUTPUT")
print("="*70)
import json
output = {
'timestamp': datetime.now().isoformat(),
'total_disruptions': len(all_disruptions),
'mvg_disruptions': len(mvg_disruptions),
'sbahn_disruptions': len(sbahn_disruptions),
'disruptions': []
}
for d in all_disruptions:
output['disruptions'].append({
'id': d.get('id'),
'title': d.get('title'),
'description': d.get('description'),
'lines': d.get('lines', []),
'type': d.get('type'),
'severity': d.get('severity'),
'start_time': d.get('start_time').isoformat() if d.get('start_time') else None,
'end_time': d.get('end_time').isoformat() if d.get('end_time') else None,
'source': d.get('source')
})
print(json.dumps(output, indent=2, ensure_ascii=False))
# 6. Save to MongoDB
save_to_mongodb(all_disruptions)
# Footer
print("\n" + "="*70)
print("✓ Crawler finished")
print("="*70 + "\n")
return output
if __name__ == '__main__':
try:
disruptions = run_crawler()
except KeyboardInterrupt:
print("\n\n👋 Crawler stopped by user")
except Exception as e:
print(f"\n\n❌ Crawler error: {e}")
import traceback
traceback.print_exc()