Dependency Discovery
Dependency discovery is the foundation of accurate service mapping. NopeSight uses multiple techniques to automatically discover how components interact, communicate, and depend on each other, creating a complete picture of your service architecture and data flows.
Discovery Techniques
Network Traffic Analysis
class NetworkTrafficAnalyzer:
def __init__(self):
self.packet_capture = PacketCapture()
self.flow_analyzer = FlowAnalyzer()
self.protocol_decoder = ProtocolDecoder()
def discover_dependencies(self, target_service):
"""Discover dependencies through network traffic analysis"""
# Capture network traffic
capture_filter = f"host {target_service.ip} or " \
f"host {target_service.hostname}"
packets = self.packet_capture.capture(
filter=capture_filter,
duration=300, # 5 minutes
interfaces=self.get_relevant_interfaces()
)
# Analyze flows
flows = self.flow_analyzer.extract_flows(packets)
dependencies = []
for flow in flows:
# Decode application protocol
protocol_info = self.protocol_decoder.decode(flow)
dependency = {
'source': self.identify_component(flow.source),
'destination': self.identify_component(flow.destination),
'protocol': protocol_info.protocol,
'service_type': protocol_info.service_type,
'communication_pattern': self.analyze_pattern(flow),
'volume': flow.bytes_transferred,
'frequency': flow.packet_count,
'latency': flow.avg_latency,
'confidence': self.calculate_confidence(flow, protocol_info)
}
# Add application-specific details
if protocol_info.protocol == 'HTTP':
dependency['http_details'] = {
'methods': protocol_info.methods,
'endpoints': protocol_info.endpoints,
'status_codes': protocol_info.status_codes
}
elif protocol_info.protocol == 'SQL':
dependency['database_details'] = {
'queries': protocol_info.query_types,
'tables': protocol_info.tables_accessed,
'database': protocol_info.database_name
}
dependencies.append(dependency)
return self.consolidate_dependencies(dependencies)
Application Configuration Mining
Configuration Sources:
Application Configs:
- database.yml
- application.properties
- config.json
- .env files
Container Configs:
- docker-compose.yml
- kubernetes manifests
- helm charts
Infrastructure as Code:
- terraform files
- cloudformation templates
- ansible playbooks
Service Mesh:
- istio configurations
- envoy configs
- consul definitions
Configuration Parser
class ConfigurationParser:
def parse_application_config(self, config_file):
"""Extract dependencies from configuration files"""
dependencies = []
config_type = self.identify_config_type(config_file)
if config_type == 'spring_boot':
# Parse Spring Boot application.properties
config = self.parse_properties_file(config_file)
# Database connections
if 'spring.datasource.url' in config:
db_dep = self.parse_jdbc_url(config['spring.datasource.url'])
dependencies.append({
'type': 'database',
'target': db_dep,
'driver': config.get('spring.datasource.driver-class-name'),
'pool_size': config.get('spring.datasource.hikari.maximum-pool-size', 10)
})
# Message queues
if 'spring.rabbitmq.host' in config:
dependencies.append({
'type': 'message_queue',
'target': {
'host': config['spring.rabbitmq.host'],
'port': config.get('spring.rabbitmq.port', 5672),
'virtual_host': config.get('spring.rabbitmq.virtual-host', '/')
},
'protocol': 'AMQP'
})
# External services
for key, value in config.items():
if key.endswith('.url') and 'http' in value:
service_name = key.split('.')[0]
dependencies.append({
'type': 'external_service',
'name': service_name,
'target': self.parse_url(value),
'protocol': 'HTTP'
})
elif config_type == 'docker_compose':
# Parse Docker Compose file
compose = yaml.safe_load(config_file)
for service_name, service_config in compose.get('services', {}).items():
# Environment variables
env_vars = service_config.get('environment', {})
dependencies.extend(self.parse_env_dependencies(env_vars))
# Depends_on
for dep in service_config.get('depends_on', []):
dependencies.append({
'type': 'container_dependency',
'source': service_name,
'target': dep,
'startup_dependency': True
})
# Network links
for link in service_config.get('links', []):
dependencies.append({
'type': 'container_link',
'source': service_name,
'target': link.split(':')[0]
})
return dependencies
Log Analysis
class LogAnalyzer:
def __init__(self):
self.patterns = self.load_dependency_patterns()
self.ml_model = self.load_ml_model()
def analyze_logs_for_dependencies(self, log_source):
"""Extract dependencies from application logs"""
dependencies = []
connection_patterns = []
# Stream logs
for log_entry in self.stream_logs(log_source):
# Check for connection patterns
for pattern in self.patterns:
match = pattern.search(log_entry.message)
if match:
dependency = self.extract_dependency_from_match(
match,
pattern.type,
log_entry
)
dependencies.append(dependency)
# ML-based detection
if self.ml_model.is_connection_log(log_entry):
ml_dependency = self.ml_model.extract_dependency(log_entry)
dependencies.append(ml_dependency)
# Consolidate and validate
return self.validate_dependencies(dependencies)
def extract_dependency_from_match(self, match, pattern_type, log_entry):
"""Extract dependency information from regex match"""
if pattern_type == 'database_connection':
return {
'type': 'database',
'source': log_entry.source,
'target': {
'host': match.group('host'),
'port': match.group('port'),
'database': match.group('database')
},
'timestamp': log_entry.timestamp,
'status': match.group('status')
}
elif pattern_type == 'api_call':
return {
'type': 'api',
'source': log_entry.source,
'target': {
'url': match.group('url'),
'method': match.group('method'),
'endpoint': match.group('endpoint')
},
'response_time': match.group('response_time'),
'status_code': match.group('status_code')
}
elif pattern_type == 'service_discovery':
return {
'type': 'service_registry',
'source': log_entry.source,
'target': match.group('service_name'),
'action': match.group('action'), # register/discover/health_check
'registry': match.group('registry_url')
}
API and Service Mesh Discovery
Service Mesh Discovery:
Istio:
- Service registry
- Virtual services
- Destination rules
- Traffic policies
Consul:
- Service catalog
- Health checks
- Connect proxies
Kubernetes:
- Service definitions
- Ingress rules
- Network policies
- Service endpoints
Service Mesh Analyzer
class ServiceMeshAnalyzer:
def discover_from_istio(self, namespace=None):
"""Discover dependencies from Istio service mesh"""
istio_client = IstioClient()
dependencies = []
# Get all virtual services
virtual_services = istio_client.get_virtual_services(namespace)
for vs in virtual_services:
source_service = vs.metadata.name
# Analyze routing rules
for http_route in vs.spec.http:
for destination in http_route.route:
target_service = destination.destination.host
dependency = {
'type': 'service_mesh',
'source': source_service,
'target': target_service,
'weight': destination.weight,
'subset': destination.destination.subset,
'routing_rules': self.extract_routing_rules(http_route)
}
# Get destination rule details
dest_rule = istio_client.get_destination_rule(
target_service,
namespace
)
if dest_rule:
dependency['load_balancing'] = dest_rule.spec.trafficPolicy
dependency['subsets'] = dest_rule.spec.subsets
dependencies.append(dependency)
# Analyze telemetry data
telemetry_deps = self.analyze_istio_telemetry()
dependencies.extend(telemetry_deps)
return dependencies
Dependency Types
Synchronous Dependencies
class SynchronousDependencyDetector:
def detect_sync_dependencies(self, service):
"""Detect synchronous dependencies like API calls"""
sync_deps = []
# HTTP/REST API calls
http_calls = self.trace_http_calls(service)
for call in http_calls:
dependency = {
'type': 'sync_api',
'protocol': 'HTTP',
'source': service.id,
'target': call.destination,
'endpoints': call.endpoints,
'methods': call.methods,
'avg_response_time': call.avg_response_time,
'timeout': call.timeout_config,
'retry_policy': call.retry_config,
'circuit_breaker': call.circuit_breaker_config
}
sync_deps.append(dependency)
# RPC calls (gRPC, Thrift, etc.)
rpc_calls = self.trace_rpc_calls(service)
for call in rpc_calls:
dependency = {
'type': 'sync_rpc',
'protocol': call.protocol,
'source': service.id,
'target': call.service,
'methods': call.methods,
'avg_latency': call.avg_latency,
'connection_pooling': call.pool_config
}
sync_deps.append(dependency)
# Database queries
db_calls = self.trace_database_calls(service)
for call in db_calls:
dependency = {
'type': 'sync_database',
'protocol': call.protocol,
'source': service.id,
'target': call.database,
'query_types': call.query_types,
'tables': call.tables_accessed,
'avg_query_time': call.avg_query_time,
'connection_pool': call.pool_config
}
sync_deps.append(dependency)
return sync_deps
Asynchronous Dependencies
Async Dependency Types:
Message Queues:
- RabbitMQ
- Apache Kafka
- AWS SQS
- Redis Pub/Sub
Event Streams:
- Kafka Streams
- AWS Kinesis
- Azure Event Hubs
Batch Processing:
- Scheduled jobs
- ETL pipelines
- File transfers
Async Dependency Detector
class AsynchronousDependencyDetector {
async detectAsyncDependencies(service) {
const asyncDeps = [];
// Message queue dependencies
const mqDeps = await this.detectMessageQueueDeps(service);
asyncDeps.push(...mqDeps);
// Event streaming dependencies
const streamDeps = await this.detectStreamingDeps(service);
asyncDeps.push(...streamDeps);
// Scheduled job dependencies
const jobDeps = await this.detectScheduledJobDeps(service);
asyncDeps.push(...jobDeps);
return asyncDeps;
}
async detectMessageQueueDeps(service) {
const deps = [];
// Detect Kafka dependencies
const kafkaConfig = await this.getKafkaConfig(service);
if (kafkaConfig) {
// Producer dependencies
kafkaConfig.producerTopics.forEach(topic => {
deps.push({
type: 'async_producer',
protocol: 'kafka',
source: service.id,
target: topic,
messageRate: this.getMessageRate(service, topic),
messageSize: this.getAvgMessageSize(service, topic),
serialization: kafkaConfig.serialization
});
});
// Consumer dependencies
kafkaConfig.consumerTopics.forEach(topic => {
deps.push({
type: 'async_consumer',
protocol: 'kafka',
source: topic,
target: service.id,
consumerGroup: kafkaConfig.consumerGroup,
processingRate: this.getProcessingRate(service, topic),
lag: this.getConsumerLag(service, topic)
});
});
}
return deps;
}
}
Data Dependencies
class DataDependencyAnalyzer:
def analyze_data_dependencies(self, service):
"""Analyze data dependencies and flows"""
data_deps = {
'reads': [],
'writes': [],
'transformations': [],
'replication': []
}
# Analyze database access patterns
db_access = self.analyze_database_access(service)
for access in db_access:
if access.type == 'READ':
data_deps['reads'].append({
'source': access.database,
'tables': access.tables,
'frequency': access.frequency,
'volume': access.avg_rows,
'query_patterns': access.patterns,
'cache_usage': access.cache_hit_rate
})
elif access.type == 'WRITE':
data_deps['writes'].append({
'target': access.database,
'tables': access.tables,
'frequency': access.frequency,
'volume': access.avg_rows,
'transaction_size': access.avg_transaction_size,
'write_patterns': access.patterns
})
# Analyze data transformations
transformations = self.trace_data_transformations(service)
for transform in transformations:
data_deps['transformations'].append({
'input_sources': transform.inputs,
'output_targets': transform.outputs,
'transformation_type': transform.type,
'processing_time': transform.avg_duration,
'data_volume': transform.avg_volume
})
# Analyze data replication
replication = self.detect_data_replication(service)
data_deps['replication'] = replication
return data_deps
Dependency Validation
Confidence Scoring
class DependencyValidator:
def calculate_confidence_score(self, dependency):
"""Calculate confidence score for discovered dependency"""
score = 0.0
max_score = 0.0
# Evidence from multiple sources
evidence_weights = {
'network_traffic': 0.3,
'configuration': 0.25,
'logs': 0.2,
'service_mesh': 0.25
}
for evidence_type, weight in evidence_weights.items():
max_score += weight
if dependency.has_evidence(evidence_type):
score += weight * dependency.get_evidence_strength(evidence_type)
# Additional factors
factors = {
'consistent_pattern': 0.1, # Consistent over time
'bidirectional': 0.05, # Two-way communication
'documented': 0.05, # Found in documentation
'high_volume': 0.05, # Significant traffic
'critical_path': 0.05 # On critical path
}
for factor, weight in factors.items():
if self.check_factor(dependency, factor):
score += weight
confidence = (score / (max_score + sum(factors.values()))) * 100
return {
'score': confidence,
'category': self.categorize_confidence(confidence),
'evidence': dependency.get_evidence_summary(),
'missing_evidence': self.identify_missing_evidence(dependency)
}
def categorize_confidence(self, score):
if score >= 90:
return 'verified'
elif score >= 70:
return 'high_confidence'
elif score >= 50:
return 'moderate_confidence'
else:
return 'low_confidence'
Manual Validation Interface
Validation Workflow:
Review Queue:
- Low confidence dependencies
- Critical service dependencies
- External dependencies
- Unusual patterns
Validation Actions:
- Confirm dependency
- Reject dependency
- Modify details
- Add documentation
- Set expiration
Validation Sources:
- Development team input
- Architecture documentation
- Code review
- Test results
Dependency Mapping
Building Dependency Graphs
class DependencyGraphBuilder:
def build_service_graph(self, dependencies):
"""Build complete dependency graph"""
graph = nx.DiGraph()
# Add nodes
components = set()
for dep in dependencies:
components.add(dep['source'])
components.add(dep['target'])
for component in components:
node_data = self.get_component_data(component)
graph.add_node(component, **node_data)
# Add edges
for dep in dependencies:
edge_data = {
'type': dep['type'],
'protocol': dep.get('protocol'),
'confidence': dep.get('confidence', 100),
'weight': self.calculate_edge_weight(dep),
'metadata': dep.get('metadata', {})
}
graph.add_edge(dep['source'], dep['target'], **edge_data)
# Analyze graph properties
self.add_graph_analytics(graph)
return graph
def add_graph_analytics(self, graph):
"""Add analytical properties to graph"""
# Calculate centrality metrics
degree_centrality = nx.degree_centrality(graph)
betweenness_centrality = nx.betweenness_centrality(graph)
for node in graph.nodes():
graph.nodes[node]['degree_centrality'] = degree_centrality[node]
graph.nodes[node]['betweenness_centrality'] = betweenness_centrality[node]
graph.nodes[node]['is_hub'] = degree_centrality[node] > 0.5
graph.nodes[node]['is_critical_path'] = betweenness_centrality[node] > 0.3
# Identify clusters
clusters = nx.community.louvain_communities(graph.to_undirected())
for i, cluster in enumerate(clusters):
for node in cluster:
graph.nodes[node]['cluster'] = i
# Find cycles
cycles = list(nx.simple_cycles(graph))
graph.graph['has_cycles'] = len(cycles) > 0
graph.graph['cycles'] = cycles
Dependency Visualization
class DependencyVisualizer {
constructor(containerId) {
this.container = document.getElementById(containerId);
this.svg = d3.select(this.container).append('svg');
this.simulation = d3.forceSimulation();
}
visualizeDependencies(dependencyGraph) {
const { nodes, links } = this.prepareGraphData(dependencyGraph);
// Set up force simulation
this.simulation
.nodes(nodes)
.force('link', d3.forceLink(links).id(d => d.id).distance(100))
.force('charge', d3.forceManyBody().strength(-300))
.force('center', d3.forceCenter(this.width / 2, this.height / 2))
.force('collision', d3.forceCollide().radius(30));
// Create links
const link = this.svg.append('g')
.selectAll('line')
.data(links)
.enter().append('line')
.attr('stroke', d => this.getLinkColor(d))
.attr('stroke-width', d => Math.sqrt(d.weight))
.attr('marker-end', 'url(#arrowhead)');
// Create nodes
const node = this.svg.append('g')
.selectAll('circle')
.data(nodes)
.enter().append('circle')
.attr('r', d => this.getNodeSize(d))
.attr('fill', d => this.getNodeColor(d))
.call(this.drag());
// Add labels
const label = this.svg.append('g')
.selectAll('text')
.data(nodes)
.enter().append('text')
.text(d => d.name)
.attr('font-size', 12)
.attr('dx', 15)
.attr('dy', 4);
// Add tooltips
this.addTooltips(node, link);
// Update positions on tick
this.simulation.on('tick', () => {
link
.attr('x1', d => d.source.x)
.attr('y1', d => d.source.y)
.attr('x2', d => d.target.x)
.attr('y2', d => d.target.y);
node
.attr('cx', d => d.x)
.attr('cy', d => d.y);
label
.attr('x', d => d.x)
.attr('y', d => d.y);
});
}
}
Continuous Discovery
Real-time Dependency Tracking
class RealTimeDependencyTracker:
def __init__(self):
self.trackers = {
'network': NetworkTracker(),
'logs': LogTracker(),
'metrics': MetricsTracker(),
'traces': TraceTracker()
}
self.dependency_store = DependencyStore()
async def start_tracking(self, service):
"""Start real-time dependency tracking"""
tasks = []
for tracker_name, tracker in self.trackers.items():
task = asyncio.create_task(
self.track_dependencies(service, tracker, tracker_name)
)
tasks.append(task)
# Run all trackers concurrently
await asyncio.gather(*tasks)
async def track_dependencies(self, service, tracker, tracker_name):
"""Track dependencies from a specific source"""
async for event in tracker.stream_events(service):
if event.is_dependency_event():
dependency = tracker.extract_dependency(event)
# Update dependency store
existing = self.dependency_store.get(
dependency.source,
dependency.target
)
if existing:
# Update existing dependency
updated = self.merge_dependency_data(existing, dependency)
self.dependency_store.update(updated)
else:
# New dependency discovered
self.dependency_store.add(dependency)
await self.notify_new_dependency(dependency)
# Check for anomalies
if self.is_anomalous(dependency):
await self.handle_anomaly(dependency)
Change Detection
Change Detection:
Monitored Changes:
- New dependencies
- Removed dependencies
- Protocol changes
- Latency changes
- Volume changes
- Error rate changes
Detection Methods:
- Baseline comparison
- Statistical analysis
- Pattern matching
- ML anomaly detection
Notifications:
- Real-time alerts
- Daily summaries
- Change reports
- Impact analysis
Best Practices
1. Discovery Coverage
- ✅ Use multiple discovery methods
- ✅ Cover all protocols and patterns
- ✅ Include external dependencies
- ✅ Regular validation cycles
2. Accuracy
- ✅ Validate low-confidence dependencies
- ✅ Cross-reference multiple sources
- ✅ Document assumptions
- ✅ Regular accuracy audits
3. Performance
- ✅ Minimize discovery overhead
- ✅ Use sampling for high-volume
- ✅ Cache discovery results
- ✅ Incremental updates
4. Maintenance
- ✅ Automate discovery updates
- ✅ Track dependency changes
- ✅ Archive historical data
- ✅ Regular cleanup
Troubleshooting
Common Issues
Missing Dependencies
Symptoms:
- Incomplete service maps
- Unexplained failures
- Performance issues
Causes:
- Encrypted traffic
- Custom protocols
- Async patterns
- External services
Solutions:
- Add protocol decoders
- Analyze application logs
- Review configurations
- Manual documentation
False Dependencies
Symptoms:
- Incorrect relationships
- Circular dependencies
- Too many connections
Causes:
- Shared infrastructure
- Test traffic
- Health checks
- Monitoring noise
Solutions:
- Filter known patterns
- Validate with teams
- Set confidence thresholds
- Exclude monitoring
Next Steps
- 📖 Business Services - Defining business services
- 📖 Impact Analysis - Analyzing service impacts
- 📖 Service Health - Monitoring service health