update

2025-11-14 12:51:18 +01:00
parent 433a16ee0e
commit 869ca3a894
20 changed files with 1606 additions and 38 deletions
--- a/transport_crawler/db_api_client.py
+++ b/transport_crawler/db_api_client.py
@@ -0,0 +1,789 @@
+#!/usr/bin/env python3
+"""
+Deutsche Bahn API Client - Fetch S-Bahn disruptions using Selenium
+"""
+import requests
+from datetime import datetime
+import time
+
+class DBClient:
+    """Client for Deutsche Bahn (S-Bahn) disruptions"""
+    
+    # DB S-Bahn München map page
+    MAP_URL = "https://karte.bahn.de/en/region/DB_SBahn_Muenchen"
+    
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
+        })
+    
+    def get_sbahn_disruptions(self):
+        """
+        Fetch S-Bahn disruptions for Munich from DB Karte using Selenium
+        
+        Returns:
+            list: Disruption data
+        """
+        print("\n🔍 Fetching S-Bahn disruptions from DB Karte (using Selenium)...")
+        
+        driver = None
+        try:
+            from selenium import webdriver
+            from selenium.webdriver.chrome.options import Options
+            from selenium.webdriver.chrome.service import Service
+            from selenium.webdriver.common.by import By
+            from selenium.webdriver.support.ui import WebDriverWait
+            from selenium.webdriver.support import expected_conditions as EC
+            import os
+            
+            # Setup Chrome options for Chromium
+            chrome_options = Options()
+            chrome_options.add_argument('--headless')
+            chrome_options.add_argument('--no-sandbox')
+            chrome_options.add_argument('--disable-dev-shm-usage')
+            chrome_options.add_argument('--disable-blink-features=AutomationControlled')
+            chrome_options.add_argument('--window-size=1920,1080')
+            chrome_options.add_argument('--disable-gpu')
+            chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
+            chrome_options.add_experimental_option('useAutomationExtension', False)
+            
+            # Set realistic user agent
+            chrome_options.add_argument('user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
+            
+            # Use system Chromium if available (Docker container)
+            chrome_bin = os.getenv('CHROME_BIN', '/usr/bin/chromium')
+            chromedriver_path = os.getenv('CHROMEDRIVER_PATH', '/usr/bin/chromedriver')
+            
+            if os.path.exists(chrome_bin):
+                chrome_options.binary_location = chrome_bin
+                print(f"   Using system Chromium: {chrome_bin}")
+            
+            print("   Starting Chromium browser...")
+            
+            # Try to use system chromedriver
+            try:
+                if os.path.exists(chromedriver_path):
+                    service = Service(chromedriver_path)
+                    driver = webdriver.Chrome(service=service, options=chrome_options)
+                else:
+                    driver = webdriver.Chrome(options=chrome_options)
+            except Exception as e:
+                print(f"   ✗ Failed to start Chromium: {e}")
+                print(f"   ℹ️  Falling back to webdriver-manager...")
+                try:
+                    from webdriver_manager.chrome import ChromeDriverManager
+                    service = Service(ChromeDriverManager().install())
+                    driver = webdriver.Chrome(service=service, options=chrome_options)
+                except Exception as e2:
+                    print(f"   ✗ webdriver-manager also failed: {e2}")
+                    raise
+            
+            print(f"   Loading: {self.MAP_URL}")
+            driver.get(self.MAP_URL)
+            
+            # Wait for page to load
+            print("   Waiting for page to load...")
+            
+            # Wait for disruption boxes to appear
+            try:
+                print("   Waiting for disruption boxes...")
+                WebDriverWait(driver, 15).until(
+                    EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-cy='disruptionbox']"))
+                )
+                # Give extra time for all boxes to load
+                time.sleep(3)
+                print("   ✓ Disruption boxes should be loaded")
+            except Exception as e:
+                print(f"   ⚠ Timeout waiting for disruption boxes: {e}")
+                time.sleep(5)
+            
+            print(f"   ✓ Page loaded (title: {driver.title[:50]}...)")
+            
+            # Debug: Save screenshot and page source
+            try:
+                screenshot_path = "/tmp/db_karte_screenshot.png"
+                driver.save_screenshot(screenshot_path)
+                print(f"   📸 Screenshot saved to: {screenshot_path}")
+            except:
+                pass
+            
+            # Debug: Print page structure
+            print("   Analyzing page structure...")
+            page_source = driver.page_source
+            
+            # Save page source for inspection
+            try:
+                with open("/tmp/db_karte_source.html", "w", encoding="utf-8") as f:
+                    f.write(page_source)
+                print(f"   📄 Page source saved to: /tmp/db_karte_source.html")
+            except:
+                pass
+            
+            # Look for disruption markers/icons on the map
+            disruptions = self._find_and_click_disruptions(driver)
+            
+            # If no disruptions found via clicking, parse the page source
+            if not disruptions:
+                print("   No clickable disruptions found, parsing page source...")
+                
+                # Debug: Show what elements are on the page
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(page_source, 'html.parser')
+                
+                # Count different element types
+                print(f"   Page stats: {len(soup.find_all('div'))} divs, {len(soup.find_all('button'))} buttons")
+                
+                # Look for any text mentioning disruptions
+                text = soup.get_text().lower()
+                if 'disruption' in text or 'störung' in text or 'incident' in text:
+                    print(f"   ℹ️  Page contains disruption-related text")
+                
+                # Check for common map libraries
+                if 'leaflet' in page_source.lower():
+                    print(f"   ℹ️  Page uses Leaflet maps")
+                if 'mapbox' in page_source.lower():
+                    print(f"   ℹ️  Page uses Mapbox")
+                if 'google.maps' in page_source.lower():
+                    print(f"   ℹ️  Page uses Google Maps")
+                
+                disruptions = self._parse_selenium_page(page_source, driver)
+            
+            if disruptions:
+                print(f"✓ Found {len(disruptions)} S-Bahn disruptions")
+            else:
+                print(f"   ℹ️  No S-Bahn disruptions found (all lines operating normally)")
+            
+            return disruptions
+            
+        except ImportError as e:
+            print(f"   ✗ Selenium not available: {e}")
+            print(f"   ℹ️  Install with: pip install selenium webdriver-manager")
+            return []
+        except Exception as e:
+            print(f"   ✗ Error: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+        finally:
+            if driver:
+                driver.quit()
+    
+    def _find_and_click_disruptions(self, driver):
+        """Find disruption boxes in the sidebar"""
+        try:
+            from selenium.webdriver.common.by import By
+            
+            disruptions = []
+            
+            print("   Looking for disruption boxes...")
+            
+            # Find all disruption boxes in the sidebar
+            disruption_boxes = driver.find_elements(By.CSS_SELECTOR, "div[data-cy='disruptionbox']")
+            
+            if not disruption_boxes:
+                print("   No disruption boxes found")
+                return []
+            
+            print(f"   Found {len(disruption_boxes)} disruption boxes")
+            
+            # First pass: collect all basic info without clicking
+            basic_info = []
+            for i, box in enumerate(disruption_boxes):
+                try:
+                    
+                    # Extract disruption ID
+                    disruption_id = box.get_attribute('id')
+                    
+                    # Extract title
+                    title_elem = box.find_element(By.CSS_SELECTOR, "span[data-cy='disruptionboxTitle']")
+                    title = title_elem.text.strip()
+                    
+                    # Extract subtitle (type)
+                    subtitle_elem = box.find_element(By.CSS_SELECTOR, "span[data-cy='disruptionboxSubtitle']")
+                    subtitle = subtitle_elem.text.strip()
+                    
+                    # Extract affected lines
+                    lines = []
+                    badge_list = box.find_element(By.CSS_SELECTOR, "div[data-cy='disruptionBadgeList']")
+                    badges = badge_list.find_elements(By.CSS_SELECTOR, "span[data-cy='disruptionBadge']")
+                    for badge in badges:
+                        line_text = badge.text.strip()
+                        if line_text and line_text.startswith('S'):
+                            lines.append(line_text)
+                    
+                    # Determine severity from icon
+                    severity = 'medium'
+                    try:
+                        icon = box.find_element(By.CSS_SELECTOR, "img[data-cy='disruptionboxIcon']")
+                        icon_src = icon.get_attribute('src')
+                        if 'red' in icon_src:
+                            severity = 'high'
+                        elif 'orange' in icon_src:
+                            severity = 'medium'
+                        elif 'yellow' in icon_src:
+                            severity = 'low'
+                    except:
+                        pass
+                    
+                    # Store basic info
+                    basic_info.append({
+                        'id': disruption_id or f"sbahn_{i}",
+                        'title': title,
+                        'subtitle': subtitle,
+                        'lines': lines,
+                        'severity': severity,
+                        'index': i
+                    })
+                    
+                    print(f"   ✓ [{i}] {title[:60]}... (Lines: {', '.join(lines)})")
+                    
+                except Exception as e:
+                    print(f"   ✗ Error extracting disruption {i}: {e}")
+                    continue
+            
+            # Second pass: click each one to get time details
+            print(f"\n   Extracting time details for {len(basic_info)} disruptions...")
+            for info in basic_info:
+                print(f"   Processing disruption {info['index']}...")
+                try:
+                    # Make sure we're back at the list view
+                    driver.execute_script("window.scrollTo(0, 0);")
+                    time.sleep(0.5)
+                    
+                    # Wait for boxes to be present again
+                    try:
+                        WebDriverWait(driver, 3).until(
+                            EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-cy='disruptionbox']"))
+                        )
+                    except:
+                        pass
+                    
+                    # Refetch boxes each time
+                    boxes = driver.find_elements(By.CSS_SELECTOR, "div[data-cy='disruptionbox']")
+                    print(f"   Found {len(boxes)} boxes after refetch")
+                    
+                    if info['index'] >= len(boxes):
+                        print(f"   ⚠ Box {info['index']} not found (only {len(boxes)} boxes available)")
+                        continue
+                    
+                    # Get fresh reference to the box and button
+                    box = boxes[info['index']]
+                    button = box.find_element(By.TAG_NAME, "button")
+                    
+                    # Click to open details
+                    driver.execute_script("arguments[0].scrollIntoView(true);", button)
+                    time.sleep(0.3)
+                    driver.execute_script("arguments[0].click();", button)  # Use JS click
+                    time.sleep(1.5)  # Wait for detail panel to fully open
+                    
+                    # Extract time from page text
+                    detail_text = driver.find_element(By.TAG_NAME, "body").text
+                    
+                    # Debug: show a snippet of the detail text
+                    if "From:" in detail_text and "To:" in detail_text:
+                        snippet_start = detail_text.find("From:")
+                        snippet_end = detail_text.find("To:", snippet_start) + 50
+                        snippet = detail_text[snippet_start:snippet_end]
+                        print(f"   Time snippet: {snippet.replace(chr(10), ' ')}")
+                    
+                    start_time, end_time = self._extract_time_range(detail_text)
+                    
+                    # Go back to original page to reset the view
+                    driver.get(self.MAP_URL)
+                    time.sleep(3)  # Wait for page to reload and boxes to appear
+                    
+                    # Create disruption object
+                    disruption_type = self._classify_type(info['title'] + ' ' + info['subtitle'])
+                    
+                    disruption = {
+                        'id': info['id'],
+                        'title': info['title'],
+                        'description': info['subtitle'],
+                        'lines': info['lines'],
+                        'type': disruption_type,
+                        'start_time': start_time,
+                        'end_time': end_time,
+                        'severity': info['severity'],
+                        'source': 'db_karte_sidebar',
+                        'created_at': datetime.utcnow()
+                    }
+                    
+                    disruptions.append(disruption)
+                    
+                    time_info = ""
+                    if start_time:
+                        time_info += f" From: {start_time.strftime('%d.%m %H:%M')}"
+                    if end_time:
+                        time_info += f" To: {end_time.strftime('%d.%m %H:%M')}"
+                    
+                    if time_info:
+                        print(f"   ✓ [{info['index']}]{time_info}")
+                    
+                except Exception as e:
+                    print(f"   ⚠ Could not get time for disruption {info['index']}: {e}")
+                    # Still add the disruption without time info
+                    disruption = {
+                        'id': info['id'],
+                        'title': info['title'],
+                        'description': info['subtitle'],
+                        'lines': info['lines'],
+                        'type': self._classify_type(info['title']),
+                        'start_time': None,
+                        'end_time': None,
+                        'severity': info['severity'],
+                        'source': 'db_karte_sidebar',
+                        'created_at': datetime.utcnow()
+                    }
+                    disruptions.append(disruption)
+            
+            return disruptions
+            
+        except Exception as e:
+            print(f"   ✗ Error finding disruption boxes: {e}")
+            return []
+    
+    def _extract_disruption_details(self, driver):
+        """Extract disruption details from popup/modal"""
+        try:
+            from selenium.webdriver.common.by import By
+            
+            # Look for popup/modal/tooltip containers
+            popup_selectors = [
+                "div[class*='popup']",
+                "div[class*='modal']",
+                "div[class*='tooltip']",
+                "div[class*='detail']",
+                "div[class*='info']",
+                "[role='dialog']",
+                "[role='tooltip']",
+            ]
+            
+            popup = None
+            for selector in popup_selectors:
+                try:
+                    elements = driver.find_elements(By.CSS_SELECTOR, selector)
+                    for elem in elements:
+                        if elem.is_displayed() and len(elem.text) > 20:
+                            popup = elem
+                            break
+                    if popup:
+                        break
+                except:
+                    continue
+            
+            if not popup:
+                # Try to get any recently appeared text
+                body = driver.find_element(By.TAG_NAME, "body")
+                popup_text = body.text
+            else:
+                popup_text = popup.text
+            
+            # Check if it's S-Bahn related
+            if not self._contains_sbahn_reference(popup_text):
+                return None
+            
+            # Extract title (usually first line or heading)
+            title = popup_text.split('\n')[0][:100] if '\n' in popup_text else popup_text[:100]
+            
+            # Extract time information
+            start_time, end_time = self._extract_time_range(popup_text)
+            
+            # Extract affected lines
+            lines = self._extract_lines_from_text(popup_text)
+            
+            return {
+                'id': f"sbahn_detail_{hash(popup_text) % 10000}",
+                'title': title,
+                'description': popup_text[:500],
+                'lines': lines,
+                'type': self._classify_type(title),
+                'start_time': start_time,
+                'end_time': end_time,
+                'severity': self._determine_severity(popup_text),
+                'source': 'db_karte_detail',
+                'created_at': datetime.utcnow()
+            }
+            
+        except Exception as e:
+            return None
+    
+    def _extract_time_range(self, text):
+        """Extract start and end time from text"""
+        import re
+        from datetime import datetime
+        
+        start_time = None
+        end_time = None
+        
+        # Look for the specific format with possible newlines
+        # Pattern: From:XX.YYYY-MM-DD, HH:MMTo:XX.YYYY-MM-DD, HH:MM
+        # Remove newlines first to make matching easier
+        text_clean = text.replace('\n', ' ').replace('\r', ' ')
+        
+        pattern = r'From:\s*[A-Za-z]{2}\.\s*(\d{4}-\d{2}-\d{2}),\s*(\d{2}:\d{2})\s*To:\s*[A-Za-z]{2}\.\s*(\d{4}-\d{2}-\d{2}),\s*(\d{2}:\d{2})'
+        match = re.search(pattern, text_clean)
+        
+        if match:
+            try:
+                start_date = match.group(1)  # 2025-11-13
+                start_time_str = match.group(2)  # 10:02
+                end_date = match.group(3)  # 2025-11-13
+                end_time_str = match.group(4)  # 14:30
+                
+                start_time = datetime.strptime(f"{start_date} {start_time_str}", "%Y-%m-%d %H:%M")
+                end_time = datetime.strptime(f"{end_date} {end_time_str}", "%Y-%m-%d %H:%M")
+            except Exception as e:
+                print(f"   ⚠ Error parsing time: {e}")
+        
+        # Fallback: Try other German formats
+        if not start_time:
+            # Look for "ab DD.MM.YYYY HH:MM" or "bis DD.MM.YYYY HH:MM"
+            ab_pattern = r'ab\s+(\d{1,2}\.\d{1,2}\.\d{4})[,\s]+(\d{1,2}:\d{2})'
+            bis_pattern = r'bis\s+(\d{1,2}\.\d{1,2}\.\d{4})[,\s]+(\d{1,2}:\d{2})'
+            
+            ab_match = re.search(ab_pattern, text, re.IGNORECASE)
+            if ab_match:
+                try:
+                    start_time = datetime.strptime(f"{ab_match.group(1)} {ab_match.group(2)}", "%d.%m.%Y %H:%M")
+                except:
+                    pass
+            
+            bis_match = re.search(bis_pattern, text, re.IGNORECASE)
+            if bis_match:
+                try:
+                    end_time = datetime.strptime(f"{bis_match.group(1)} {bis_match.group(2)}", "%d.%m.%Y %H:%M")
+                except:
+                    pass
+        
+        return start_time, end_time
+    
+    def _determine_severity(self, text):
+        """Determine severity based on keywords"""
+        text_lower = text.lower()
+        
+        if any(word in text_lower for word in ['ausfall', 'gesperrt', 'eingestellt', 'komplett']):
+            return 'high'
+        elif any(word in text_lower for word in ['verspätung', 'verzögerung', 'teilweise']):
+            return 'medium'
+        else:
+            return 'low'
+    
+    def _parse_selenium_page(self, page_source, driver):
+        """Parse page loaded by Selenium"""
+        try:
+            from bs4 import BeautifulSoup
+            from selenium.webdriver.common.by import By
+            
+            print("   Analyzing rendered page...")
+            soup = BeautifulSoup(page_source, 'html.parser')
+            disruptions = []
+            
+            # Method 1: Try to find disruption elements directly via Selenium
+            try:
+                # Look for common disruption indicators
+                selectors = [
+                    "div[class*='disruption']",
+                    "div[class*='stoerung']",
+                    "div[class*='incident']",
+                    "div[class*='message']",
+                    "div[class*='alert']",
+                    "[data-disruption]",
+                    "[data-incident]"
+                ]
+                
+                for selector in selectors:
+                    try:
+                        elements = driver.find_elements(By.CSS_SELECTOR, selector)
+                        if elements:
+                            print(f"   Found {len(elements)} elements with selector: {selector}")
+                            for elem in elements:
+                                text = elem.text.strip()
+                                if len(text) > 20 and self._contains_sbahn_reference(text):
+                                    disruptions.append(self._create_disruption_from_text(text))
+                    except:
+                        continue
+            except Exception as e:
+                print(f"   ✗ Selenium element search error: {e}")
+            
+            # Method 2: Parse the page source with BeautifulSoup
+            if not disruptions:
+                print("   Trying BeautifulSoup parsing...")
+                disruptions = self._parse_map_page(page_source.encode(), page_source)
+            
+            # Method 3: Check for any text mentioning S-Bahn lines with disruptions
+            if not disruptions:
+                print("   Checking page text for S-Bahn mentions...")
+                page_text = soup.get_text()
+                if self._contains_sbahn_reference(page_text):
+                    # Extract paragraphs or sections mentioning S-Bahn
+                    for elem in soup.find_all(['p', 'div', 'span']):
+                        text = elem.get_text(strip=True)
+                        if len(text) > 30 and self._contains_sbahn_reference(text):
+                            lines = self._extract_lines_from_text(text)
+                            if lines:
+                                disruptions.append(self._create_disruption_from_text(text))
+            
+            # Remove duplicates
+            seen = set()
+            unique = []
+            for d in disruptions:
+                key = d['title'][:50]
+                if key not in seen:
+                    seen.add(key)
+                    unique.append(d)
+            
+            return unique
+            
+        except Exception as e:
+            print(f"   ✗ Parse error: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    
+    def _contains_sbahn_reference(self, text):
+        """Check if text contains S-Bahn line references"""
+        import re
+        return bool(re.search(r'S[\s-]?[1-8]', text, re.IGNORECASE))
+    
+    def _create_disruption_from_text(self, text):
+        """Create disruption object from text"""
+        # Extract first sentence or first 100 chars as title
+        sentences = text.split('.')
+        title = sentences[0][:100] if sentences else text[:100]
+        
+        return {
+            'id': f"sbahn_{hash(text) % 10000}",
+            'title': title,
+            'description': text[:500],
+            'lines': self._extract_lines_from_text(text),
+            'type': self._classify_type(title),
+            'start_time': None,
+            'end_time': None,
+            'severity': 'medium',
+            'source': 'db_karte_selenium',
+            'created_at': datetime.utcnow()
+        }
+    
+    def _parse_map_page(self, html_content, html_text):
+        """Parse DB Karte map page for S-Bahn disruptions"""
+        try:
+            from bs4 import BeautifulSoup
+            import re
+            import json
+            
+            disruptions = []
+            
+            # Method 1: Look for embedded JSON data in script tags
+            print("   Analyzing page for disruption data...")
+            
+            # The map page likely has JSON data embedded in <script> tags
+            soup = BeautifulSoup(html_content, 'html.parser')
+            scripts = soup.find_all('script')
+            
+            for script in scripts:
+                if script.string:
+                    # Look for JSON data containing disruption/störung information
+                    script_text = script.string
+                    
+                    # Try to find JSON objects
+                    json_pattern = r'\{[^{}]*(?:"disruption"|"störung"|"incident"|"message")[^{}]*\}'
+                    matches = re.finditer(json_pattern, script_text, re.IGNORECASE)
+                    
+                    for match in matches:
+                        try:
+                            data = json.loads(match.group())
+                            # Process found JSON data
+                            if self._is_disruption_data(data):
+                                disruption = self._parse_disruption_json(data)
+                                if disruption:
+                                    disruptions.append(disruption)
+                        except json.JSONDecodeError:
+                            continue
+            
+            # Method 2: Look for API endpoint URLs in the page
+            api_pattern = r'https?://[^\s"\']+(?:api|disruption|stoerung)[^\s"\']+'
+            api_urls = re.findall(api_pattern, html_text, re.IGNORECASE)
+            
+            if api_urls:
+                print(f"   Found {len(api_urls)} potential API endpoints")
+                for api_url in set(api_urls[:3]):  # Try first 3 unique URLs
+                    try:
+                        print(f"   Trying API: {api_url[:60]}...")
+                        api_response = self.session.get(api_url, timeout=10)
+                        if api_response.status_code == 200:
+                            api_data = api_response.json()
+                            api_disruptions = self._parse_api_response(api_data)
+                            disruptions.extend(api_disruptions)
+                    except:
+                        continue
+            
+            # Method 3: Look for visible disruption messages on the page
+            if not disruptions:
+                print("   Checking for visible disruption messages...")
+                disruptions = self._scrape_visible_disruptions(soup)
+            
+            # Remove duplicates based on title
+            seen_titles = set()
+            unique_disruptions = []
+            for d in disruptions:
+                if d['title'] not in seen_titles:
+                    seen_titles.add(d['title'])
+                    unique_disruptions.append(d)
+            
+            return unique_disruptions
+            
+        except Exception as e:
+            print(f"   ✗ Parse error: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    
+    def _is_disruption_data(self, data):
+        """Check if JSON data contains disruption information"""
+        if not isinstance(data, dict):
+            return False
+        
+        disruption_keys = ['disruption', 'störung', 'incident', 'message', 'title', 'description']
+        return any(key in str(data).lower() for key in disruption_keys)
+    
+    def _parse_disruption_json(self, data):
+        """Parse disruption from JSON data"""
+        try:
+            title = data.get('title') or data.get('headline') or data.get('message', '')
+            if not title or len(title) < 5:
+                return None
+            
+            return {
+                'id': data.get('id', f"json_{hash(title)}"),
+                'title': title,
+                'description': data.get('description') or data.get('text') or data.get('content', ''),
+                'lines': self._extract_lines_from_text(title),
+                'type': self._classify_type(title),
+                'start_time': None,
+                'end_time': None,
+                'severity': data.get('severity', 'medium'),
+                'source': 'db_karte_json',
+                'created_at': datetime.utcnow()
+            }
+        except:
+            return None
+    
+    def _parse_api_response(self, data):
+        """Parse API response for disruptions"""
+        disruptions = []
+        
+        try:
+            # Handle different response formats
+            if isinstance(data, dict):
+                if 'disruptions' in data:
+                    data = data['disruptions']
+                elif 'items' in data:
+                    data = data['items']
+                elif 'data' in data:
+                    data = data['data']
+                else:
+                    data = [data]
+            
+            if isinstance(data, list):
+                for item in data:
+                    disruption = self._parse_disruption_json(item)
+                    if disruption:
+                        disruptions.append(disruption)
+        except:
+            pass
+        
+        return disruptions
+    
+    def _scrape_visible_disruptions(self, soup):
+        """Scrape visible disruption messages from the page"""
+        disruptions = []
+        
+        try:
+            # Look for common disruption container classes
+            selectors = [
+                'div[class*="disruption"]',
+                'div[class*="stoerung"]',
+                'div[class*="incident"]',
+                'div[class*="message"]',
+                'div[class*="alert"]',
+                'article[class*="disruption"]',
+            ]
+            
+            for selector in selectors:
+                elements = soup.select(selector)
+                for elem in elements:
+                    text = elem.get_text(strip=True)
+                    if len(text) > 20 and any(word in text.lower() for word in ['s-bahn', 's1', 's2', 's3', 's4', 's6', 's7', 's8']):
+                        # Extract title (first line or heading)
+                        title_elem = elem.find(['h1', 'h2', 'h3', 'h4', 'strong'])
+                        title = title_elem.get_text(strip=True) if title_elem else text[:100]
+                        
+                        disruptions.append({
+                            'id': f"visible_{len(disruptions)}",
+                            'title': title,
+                            'description': text[:500],
+                            'lines': self._extract_lines_from_text(text),
+                            'type': self._classify_type(title),
+                            'start_time': None,
+                            'end_time': None,
+                            'severity': 'medium',
+                            'source': 'db_karte_page',
+                            'created_at': datetime.utcnow()
+                        })
+        except:
+            pass
+        
+        return disruptions
+    
+    def _extract_lines_from_text(self, text):
+        """Extract S-Bahn line numbers from text"""
+        import re
+        # Match S1, S2, S 3, S-4, etc.
+        pattern = r'S[\s-]?[1-8]'
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        # Normalize to format like "S1", "S2"
+        lines = [re.sub(r'[^\dS]', '', m.upper()) for m in matches]
+        return list(set(lines))  # Remove duplicates
+    
+    def _classify_type(self, title):
+        """Classify disruption type based on title"""
+        title_lower = title.lower()
+        if 'bauarbeit' in title_lower or 'wartung' in title_lower:
+            return 'maintenance'
+        elif 'ausfall' in title_lower or 'störung' in title_lower:
+            return 'disruption'
+        elif 'verspätung' in title_lower:
+            return 'delay'
+        else:
+            return 'info'
+
+
+def test_db_client():
+    """Test the DB client and print results"""
+    print("="*70)
+    print("🚆 Deutsche Bahn S-Bahn Client Test")
+    print("="*70)
+    
+    client = DBClient()
+    disruptions = client.get_sbahn_disruptions()
+    
+    if not disruptions:
+        print("\n⚠ No S-Bahn disruptions found (or not yet implemented)")
+        return
+    
+    print(f"\n📊 Total S-Bahn Disruptions: {len(disruptions)}")
+    print("="*70)
+    
+    for i, d in enumerate(disruptions, 1):
+        print(f"\n[{i}] {d['title']}")
+        print(f"    Lines: {', '.join(d['lines'])}")
+        print(f"    Type: {d['type']}")
+    
+    print("\n" + "="*70)
+
+
+if __name__ == '__main__':
+    test_db_client()