Line data Source code
1 : /*
2 : * Copyright (c) 2013 Juniper Networks, Inc. All rights reserved.
3 : */
4 :
5 : #include "base/os.h"
6 : #include <sys/types.h>
7 : #include <arpa/inet.h>
8 : #include <netinet/in.h>
9 : #include <netinet/ip6.h>
10 : #include <netinet/icmp6.h>
11 :
12 : #include "init/agent_param.h"
13 : #include "cmn/agent_cmn.h"
14 : #include "diag/diag.h"
15 : #include "base/address_util.h"
16 : #include "oper/ecmp_load_balance.h"
17 : #include "oper/interface_common.h"
18 : #include "oper/nexthop.h"
19 : #include "oper/route_common.h"
20 : #include "oper/vrf.h"
21 : #include "oper/tunnel_nh.h"
22 : #include "pkt/control_interface.h"
23 : #include "pkt/pkt_handler.h"
24 : #include "pkt/proto.h"
25 : #include "pkt/flow_table.h"
26 : #include "pkt/flow_proto.h"
27 : #include "pkt/pkt_types.h"
28 : #include "pkt/pkt_init.h"
29 : #include "cmn/agent_stats.h"
30 : #include "pkt/packet_buffer.h"
31 : #include "vr_types.h"
32 : #include "vr_defs.h"
33 : #include "vr_mpls.h"
34 :
35 : #define PKT_TRACE(obj, arg) \
36 : do { \
37 : std::ostringstream _str; \
38 : _str << arg; \
39 : Pkt##obj::TraceMsg(PacketTraceBuf, __FILE__, __LINE__, _str.str()); \
40 : } while (false) \
41 :
42 : const std::size_t PktTrace::kPktMaxTraceSize;
43 :
44 : ////////////////////////////////////////////////////////////////////////////////
45 : namespace {
46 22 : inline bool IsBfdkeepalivePkt(const AgentHdr &hdr)
47 : {
48 : // For the BFD traped packets, BFD state info is available in 'cmd_param'
49 22 : if ((hdr.cmd == AGENT_TRAP_BFD) && (hdr.cmd_param == 0xc0)) {
50 0 : return true;
51 : }
52 22 : return false;
53 : }
54 :
55 : } // namespace
56 :
57 2 : PktHandler::PktHandler(Agent *agent, PktModule *pkt_module) :
58 4 : stats_(), agent_(agent), pkt_module_(pkt_module),
59 2 : work_queue_(TaskScheduler::GetInstance()->GetTaskId("Agent::PktHandler"), 0,
60 : boost::bind(&PktHandler::ProcessPacket, this, _1)),
61 2 : work_queue_bfd_ka_(TaskScheduler::GetInstance()->GetTaskId("Agent::BFD_KA"), 0,
62 2 : boost::bind(&PktHandler::ProcessBfdDataPacket, this, _1))
63 : {
64 2 : work_queue_.set_name("Packet Handler Queue");
65 2 : work_queue_.set_measure_busy_time(agent_->MeasureQueueDelay());
66 : // BFD keep alive packet processing
67 2 : work_queue_bfd_ka_.set_name("Packet BFD Data Queue");
68 2 : work_queue_bfd_ka_.set_measure_busy_time(agent_->MeasureQueueDelay());
69 34 : for (int i = 0; i < MAX_MODULES; ++i) {
70 32 : if (i == PktHandler::DHCP || i == PktHandler::DHCPV6 ||
71 : i == PktHandler::DNS)
72 6 : pkt_trace_.at(i).set_pkt_trace_size(512);
73 : else
74 26 : pkt_trace_.at(i).set_pkt_trace_size(128);
75 : }
76 2 : }
77 :
78 4 : PktHandler::~PktHandler() {
79 2 : work_queue_.Shutdown();
80 4 : }
81 :
82 16 : void PktHandler::Register(PktModuleName type, Proto *proto) {
83 16 : proto_list_.at(type) = proto;
84 16 : if (type == BFD) {
85 1 : bfd_keepalive_proto_ = proto;
86 : }
87 16 : }
88 :
89 0 : uint32_t PktHandler::EncapHeaderLen() const {
90 0 : return agent_->pkt()->control_interface()->EncapsulationLength();
91 : }
92 :
93 : // Send packet to tap interface
94 112 : void PktHandler::Send(const AgentHdr &hdr, const PacketBufferPtr &buff) {
95 112 : stats_.PktSent(PktHandler::PktModuleName(buff->module()));
96 112 : pkt_trace_.at(buff->module()).AddPktTrace(PktTrace::Out, buff->data_len(),
97 : buff->data(), &hdr);
98 112 : if (agent_->pkt()->control_interface()->Send(hdr, buff) <= 0) {
99 0 : PKT_TRACE(Err, "Error sending packet");
100 : }
101 112 : return;
102 : }
103 :
104 25 : void PktHandler::CalculatePortIP(PktInfo *pkt) {
105 25 : const Interface *in = NULL;
106 25 : const VmInterface *intf = NULL;
107 25 : const Interface *pkt_in_intf = NULL;
108 25 : bool ingress= false;
109 : VmInterface::FatFlowIgnoreAddressType ignore_addr;
110 : uint8_t protocol;
111 :
112 : const NextHop *nh =
113 25 : agent()->nexthop_table()->FindNextHop(pkt->agent_hdr.nh);
114 25 : if (!nh) {
115 25 : return;
116 : }
117 :
118 25 : if (nh->GetType() == NextHop::INTERFACE) {
119 21 : const InterfaceNH *intf_nh = static_cast<const InterfaceNH *>(nh);
120 21 : in = intf_nh->GetInterface();
121 4 : } else if (nh->GetType() == NextHop::VLAN) {
122 0 : const VlanNH *vlan_nh = static_cast<const VlanNH *>(nh);
123 0 : in = vlan_nh->GetInterface();
124 4 : } else if (nh->GetType() == NextHop::COMPOSITE) {
125 0 : const CompositeNH *comp_nh = static_cast<const CompositeNH *>(nh);
126 :
127 0 : if (comp_nh->composite_nh_type() == Composite::LOCAL_ECMP) {
128 0 : in = comp_nh->GetFirstLocalEcmpMemberInterface();
129 : }
130 : }
131 :
132 25 : if (in) {
133 21 : intf = dynamic_cast<const VmInterface *>(in);
134 : }
135 :
136 25 : if (!intf) {
137 4 : return;
138 : }
139 :
140 21 : if (intf->fat_flow_list().list_.size() == 0) {
141 18 : return;
142 : }
143 :
144 3 : if (intf->ExcludeFromFatFlow(pkt->family, pkt->ip_saddr, pkt->ip_daddr)) {
145 0 : return;
146 : }
147 3 : uint16_t sport = pkt->sport;
148 3 : if (pkt->ip_proto == IPPROTO_ICMP || pkt->ip_proto == IPPROTO_IGMP) {
149 0 : sport = 0;
150 : }
151 :
152 3 : protocol = pkt->ip_proto;
153 : /*
154 : * For ICMPv6 change the protocol to ICMP as the same value is used when it is
155 : * stored in the fat flow rule
156 : */
157 3 : if (pkt->ip_proto == IPPROTO_ICMPV6) {
158 0 : protocol = IPPROTO_ICMP;
159 : }
160 :
161 3 : pkt->is_fat_flow_src_prefix = false;
162 3 : pkt->is_fat_flow_dst_prefix = false;
163 :
164 3 : if (pkt->sport < pkt->dport) {
165 2 : if (intf->IsFatFlowPortBased(protocol, sport, &ignore_addr)) {
166 2 : pkt->dport = 0;
167 2 : pkt->ignore_address = ignore_addr;
168 2 : return;
169 : }
170 :
171 0 : if (intf->IsFatFlowPortBased(protocol, pkt->dport, &ignore_addr)) {
172 0 : pkt->sport = 0;
173 0 : pkt->ignore_address = ignore_addr;
174 0 : return;
175 : }
176 : } else {
177 1 : if (intf->IsFatFlowPortBased(protocol, pkt->dport, &ignore_addr)) {
178 1 : if (pkt->dport == pkt->sport) {
179 0 : pkt->same_port_number = true;
180 : }
181 1 : pkt->sport = 0;
182 1 : pkt->ignore_address = ignore_addr;
183 1 : return;
184 : }
185 :
186 0 : if (intf->IsFatFlowPortBased(protocol, sport, &ignore_addr)) {
187 0 : pkt->dport = 0;
188 0 : pkt->ignore_address = ignore_addr;
189 0 : return;
190 : }
191 : }
192 : /* If Fat-flow port is 0, then both source and destination ports have to
193 : * be ignored */
194 0 : if (intf->IsFatFlowPortBased(protocol, 0, &ignore_addr)) {
195 0 : pkt->sport = 0;
196 0 : pkt->dport = 0;
197 0 : pkt->ignore_address = ignore_addr;
198 0 : return;
199 : }
200 :
201 : // Check for fat flow based on prefix aggregation
202 0 : pkt_in_intf = Agent::GetInstance()->interface_table()->FindInterface(
203 0 : pkt->agent_hdr.ifindex);
204 0 : ingress = PktFlowInfo::ComputeDirection(pkt_in_intf);
205 : // set the src and dst prefix to src & dst ip to begin with
206 0 : pkt->ip_ff_src_prefix = pkt->ip_saddr;
207 0 : pkt->ip_ff_dst_prefix = pkt->ip_daddr;
208 0 : intf->IsFatFlowPrefixAggregation(ingress, protocol,
209 0 : (uint16_t *)&pkt->sport,
210 0 : (uint16_t *) &pkt->dport,
211 : &pkt->same_port_number,
212 : &pkt->ip_ff_src_prefix,
213 : &pkt->ip_ff_dst_prefix,
214 : &pkt->is_fat_flow_src_prefix,
215 : &pkt->is_fat_flow_dst_prefix,
216 : &pkt->ignore_address);
217 : }
218 :
219 47 : bool PktHandler::IsBFDHealthCheckPacket(const PktInfo *pkt_info,
220 : const Interface *intrface) {
221 47 : if (intrface->type() == Interface::VM_INTERFACE &&
222 74 : pkt_info->ip_proto == IPPROTO_UDP &&
223 27 : (pkt_info->dport == BFD_SINGLEHOP_CONTROL_PORT ||
224 27 : pkt_info->dport == BFD_MULTIHOP_CONTROL_PORT ||
225 27 : pkt_info->dport == BFD_ECHO_PORT)) {
226 0 : return true;
227 : }
228 :
229 47 : return false;
230 : }
231 :
232 47 : bool PktHandler::IsSegmentHealthCheckPacket(const PktInfo *pkt_info,
233 : const Interface *intrface) {
234 74 : if (intrface->type() == Interface::VM_INTERFACE &&
235 27 : pkt_info->ip_proto == IPPROTO_ICMP) {
236 0 : if (pkt_info->icmp_chksum == 0xffff) {
237 0 : return true;
238 : }
239 0 : return pkt_info->is_segment_hc_pkt;
240 : }
241 47 : return false;
242 : }
243 :
244 : // Validate the BFD Keepalive packet received
245 0 : PktHandler::PktModuleName PktHandler::ParseBfdDataPacket(const AgentHdr &hdr,
246 : PktInfo *pkt_info,
247 : uint8_t *pkt) {
248 :
249 0 : PktType::Type pkt_type = PktType::INVALID;
250 0 : pkt_info->agent_hdr = hdr;
251 :
252 0 : int len = 0, ret = 0;
253 :
254 : // Parse packet before computing forwarding mode. Packet is parsed
255 : // independent of packet forwarding mode
256 0 : len += ParseEthernetHeader(pkt_info, (pkt + len));
257 0 : if (pkt_info->ether_type != ETHERTYPE_IP) {
258 0 : agent_->stats()->incr_pkt_drop_due_to_invalid_ethertype();
259 0 : return INVALID;
260 : }
261 :
262 : // IP Packets
263 0 : ret = ParseIpPacket(pkt_info, pkt_type, (pkt + len));
264 0 : if(ret == -1) {
265 0 : return INVALID;
266 : }
267 0 : len += ret;
268 :
269 : // Check if the BFD KA packet
270 0 : if (pkt_info->ip_proto == IPPROTO_UDP &&
271 0 : pkt_info->dport == BFD_SINGLEHOP_CONTROL_PORT &&
272 0 : (pkt_info->data[0] & 0x20)) {
273 0 : return BFD;
274 : }
275 :
276 0 : return INVALID;
277 : }
278 :
279 : // Process the packet received from tap interface
280 25 : PktHandler::PktModuleName PktHandler::ParsePacket(const AgentHdr &hdr,
281 : PktInfo *pkt_info,
282 : uint8_t *pkt) {
283 25 : PktType::Type pkt_type = PktType::INVALID;
284 25 : Interface *intf = NULL;
285 :
286 25 : pkt_info->agent_hdr = hdr;
287 25 : if (!IsValidInterface(hdr.ifindex, &intf)) {
288 0 : return INVALID;
289 : }
290 :
291 : // Parse packet before computing forwarding mode. Packet is parsed
292 : // indepndent of packet forwarding mode
293 25 : if (ParseUserPkt(pkt_info, intf, pkt_type, pkt) < 0) {
294 0 : return INVALID;
295 : }
296 :
297 25 : if (pkt_info->agent_hdr.cmd == AgentHdr::TRAP_TOR_CONTROL_PKT) {
298 : // In case of a control packet from a TOR served by us, the ifindex
299 : // is modified to index of the VM interface; validate this interface.
300 0 : if (!IsValidInterface(pkt_info->agent_hdr.ifindex, &intf)) {
301 0 : return INVALID;
302 : }
303 : }
304 :
305 25 : pkt_info->vrf = pkt_info->agent_hdr.vrf;
306 :
307 :
308 25 : if (hdr.cmd == AgentHdr::TRAP_MAC_MOVE ||
309 25 : hdr.cmd == AgentHdr::TRAP_MAC_LEARN ||
310 25 : hdr.cmd == AgentHdr::TRAP_MAC_IP_LEARNING) {
311 0 : return MAC_LEARNING;
312 : }
313 :
314 25 : bool is_flow_packet = IsFlowPacket(pkt_info);
315 : // Look for DHCP packets if corresponding service is enabled
316 : // Service processing over-rides ACL/Flow and forwarding configuration
317 25 : if (!is_flow_packet && intf->dhcp_enabled() && (pkt_type == PktType::UDP)) {
318 0 : if (pkt_info->ip && (pkt_info->dport == DHCP_SERVER_PORT ||
319 0 : pkt_info->sport == DHCP_CLIENT_PORT)) {
320 0 : return DHCP;
321 : }
322 : }
323 25 : if (!is_flow_packet && intf->dhcp_enabled_v6() && (pkt_type == PktType::UDP)) {
324 0 : if (pkt_info->ip6 && (pkt_info->dport == DHCPV6_SERVER_PORT ||
325 0 : pkt_info->sport == DHCPV6_CLIENT_PORT)) {
326 0 : return DHCPV6;
327 : }
328 : }
329 :
330 : // Handle ARP packet
331 25 : if (pkt_type == PktType::ARP) {
332 0 : return ARP;
333 : }
334 :
335 : // Packets needing flow
336 25 : if (is_flow_packet) {
337 25 : CalculatePortIP(pkt_info);
338 25 : if ((pkt_info->ip && pkt_info->family == Address::INET) ||
339 0 : (pkt_info->ip6 && pkt_info->family == Address::INET6)) {
340 25 : return FLOW;
341 : } else {
342 0 : PKT_TRACE(Err, "Flow trap for non-IP packet for interface "
343 : "index <" << hdr.ifindex << ">");
344 0 : agent_->stats()->pkt_drop_due_to_flow_trap();
345 0 : return INVALID;
346 : }
347 : }
348 :
349 : // Look for DNS packets if corresponding service is enabled
350 : // Service processing over-rides ACL/Flow
351 0 : if (intf->dns_enabled() && (pkt_type == PktType::UDP)) {
352 0 : if (pkt_info->dport == DNS_SERVER_PORT) {
353 0 : return DNS;
354 : }
355 : }
356 :
357 : // Look for IP packets that need ARP resolution
358 0 : if (pkt_info->ip && hdr.cmd == AgentHdr::TRAP_RESOLVE) {
359 0 : return ARP;
360 : }
361 :
362 : // send time exceeded ICMP messages to diag module
363 0 : if (IsDiagPacket(pkt_info)) {
364 0 : return DIAG;
365 : }
366 :
367 0 : if (IsBFDHealthCheckPacket(pkt_info, intf)) {
368 0 : return BFD;
369 : }
370 :
371 0 : if (pkt_type == PktType::ICMP && IsSegmentHealthCheckPacket(pkt_info,
372 : intf)) {
373 0 : return DIAG;
374 : }
375 :
376 0 : if (pkt_type == PktType::ICMP && IsGwPacket(intf, pkt_info->ip_daddr)) {
377 0 : return ICMP;
378 : }
379 :
380 : // Look for IP6 packets that need NDP resolution
381 0 : if (pkt_info->ip6 && hdr.cmd == AgentHdr::TRAP_RESOLVE) {
382 0 : return ICMPV6;
383 : }
384 :
385 0 : if (pkt_type == PktType::ICMPV6) {
386 0 : if (hdr.cmd == AgentHdr::TRAP_HANDLE_DF) {
387 0 : return ICMPV6_ERROR;
388 : }
389 0 : return ICMPV6;
390 : }
391 :
392 0 : if (pkt_type == PktType::IGMP) {
393 0 : return IGMP;
394 : }
395 :
396 0 : if(pkt_info->ip6 && hdr.cmd == AgentHdr::TRAP_HANDLE_DF) {
397 0 : return ICMPV6_ERROR;
398 : }
399 :
400 0 : if (pkt_info->ip && hdr.cmd == AgentHdr::TRAP_HANDLE_DF) {
401 0 : return ICMP_ERROR;
402 : }
403 :
404 0 : if (hdr.cmd == AgentHdr::TRAP_DIAG && (pkt_info->ip || pkt_info->ip6)) {
405 0 : return DIAG;
406 : }
407 :
408 0 : return INVALID;
409 : }
410 :
411 22 : void PktHandler::HandleRcvPkt(const AgentHdr &hdr, const PacketBufferPtr &buff){
412 : // Enqueue packets to a workqueue to decouple from ASIO and run in
413 : // exclusion with DB
414 : boost::shared_ptr<PacketBufferEnqueueItem>
415 22 : info(new PacketBufferEnqueueItem(hdr, buff));
416 :
417 : // BFD Keepalive packets are enqueued to a seperate work queue
418 : // so that its job run without dependence on dB tasks
419 22 : if (IsBfdkeepalivePkt(hdr)) {
420 0 : work_queue_bfd_ka_.Enqueue(info);
421 : } else {
422 22 : work_queue_.Enqueue(info);
423 : }
424 22 : }
425 :
426 22 : bool PktHandler::ProcessPacket(boost::shared_ptr<PacketBufferEnqueueItem> item) {
427 22 : agent_->stats()->incr_pkt_exceptions();
428 22 : const AgentHdr &hdr = item->hdr;
429 22 : const PacketBufferPtr &buff = item->buff;
430 22 : boost::shared_ptr<PktInfo> pkt_info (new PktInfo(buff));
431 22 : uint8_t *pkt = buff->data();
432 22 : PktModuleName mod = ParsePacket(hdr, pkt_info.get(), pkt);
433 22 : PktModuleEnqueue(mod, hdr, pkt_info, pkt);
434 22 : if (mod == INVALID) {
435 0 : pkt_info.reset();
436 : }
437 :
438 22 : return true;
439 22 : }
440 : // Process BFD keepalives (BFD packets with state 'UP') in a seperate task
441 : // that is independent of Db Task
442 0 : bool PktHandler::ProcessBfdDataPacket(boost::shared_ptr<PacketBufferEnqueueItem> item) {
443 0 : const AgentHdr &hdr = item->hdr;
444 0 : const PacketBufferPtr &buff = item->buff;
445 0 : uint8_t *pkt = buff->data();
446 : PktModuleName mod;
447 :
448 : // In certain Agent restart race condition, Agent would restart but
449 : // vrouter is not in sync yet with agent restart. In this condition
450 : // there could be BFD packets from the prior session could be trapped
451 : // in vrouter and injected to the Agent's PKT0 i/f So, don't process
452 : // BFD packets received, prior to agent init done.
453 0 : if (!bfd_keepalive_proto_ || (agent_->init_done() == false)) {
454 0 : PKT_TRACE(Err, "BFD proto_list is not Initialized. Ignoring");
455 0 : return true;
456 : }
457 :
458 0 : boost::shared_ptr<PktInfo> pkt_info (new PktInfo(buff));
459 0 : mod = ParseBfdDataPacket(hdr, pkt_info.get(), pkt);
460 0 : if (mod == BFD) {
461 0 : pkt_info->is_bfd_keepalive = true;
462 0 : pkt_info->packet_buffer()->set_module(mod);
463 0 : bfd_keepalive_proto_->Enqueue(pkt_info);
464 0 : proto_list_.at(BFD)->ProcessStats(PktStatsType::PKT_RX_ENQUEUE);
465 : } else {
466 0 : proto_list_.at(BFD)->ProcessStats(PktStatsType::PKT_RX_DROP_STATS);
467 0 : pkt_info.reset();
468 : }
469 0 : return true;
470 0 : }
471 :
472 22 : void PktHandler::PktModuleEnqueue(PktModuleName mod, const AgentHdr &hdr,
473 : boost::shared_ptr<PktInfo> pkt_info,
474 : uint8_t * pkt) {
475 22 : pkt_info->packet_buffer()->set_module(mod);
476 22 : stats_.PktRcvd(mod);
477 22 : if (mod == INVALID) {
478 0 : agent_->stats()->incr_pkt_dropped();
479 0 : pkt_trace_.at(mod).AddPktTrace(PktTrace::In, pkt_info->len, pkt, &hdr);
480 0 : return;
481 : }
482 22 : Enqueue(mod, pkt_info);
483 : }
484 :
485 : // Compute L2/L3 forwarding mode for pacekt.
486 : // Forwarding mode is L3 if,
487 : // - Packet uses L3 label
488 : // - DMAC in packet is VRRP Mac or VHOST MAC or receiving physical interface MAC
489 : // Else forwarding mode is L2
490 0 : bool PktHandler::ComputeForwardingMode(PktInfo *pkt_info,
491 : const Interface *intf) const {
492 0 : if (pkt_info->tunnel.type.GetType() == TunnelType::MPLS_GRE ||
493 0 : pkt_info->tunnel.type.GetType() == TunnelType::MPLS_UDP) {
494 0 : return pkt_info->l3_label;
495 : }
496 :
497 0 : if (pkt_info->dmac == agent_->vrrp_mac()) {
498 0 : return true;
499 : }
500 :
501 0 : if (pkt_info->dmac == agent_->vhost_interface()->mac()) {
502 0 : return true;
503 : }
504 :
505 0 : if (intf && intf->type() == Interface::PHYSICAL &&
506 0 : pkt_info->dmac == intf->mac()) {
507 0 : return true;
508 : }
509 :
510 0 : return false;
511 : }
512 :
513 25 : void PktHandler::SetOuterIp(PktInfo *pkt_info, uint8_t *pkt) {
514 25 : if (pkt_info->ether_type != ETHERTYPE_IP) {
515 0 : return;
516 : }
517 25 : struct ip *ip_hdr = (struct ip *)pkt;
518 25 : pkt_info->tunnel.ip = ip_hdr;
519 25 : pkt_info->tunnel.ip_saddr = ntohl(ip_hdr->ip_src.s_addr);
520 25 : pkt_info->tunnel.ip_daddr = ntohl(ip_hdr->ip_dst.s_addr);
521 : }
522 :
523 25 : void PktHandler::SetOuterMac(PktInfo *pkt_info) {
524 25 : pkt_info->tunnel.eth = pkt_info->eth;
525 25 : }
526 :
527 :
528 37 : int PktHandler::ParseEthernetHeader(PktInfo *pkt_info, uint8_t *pkt) {
529 37 : int len = 0;
530 37 : pkt_info->eth = (struct ether_header *) (pkt + len);
531 37 : pkt_info->smac = MacAddress(pkt_info->eth->ether_shost);
532 37 : pkt_info->dmac = MacAddress(pkt_info->eth->ether_dhost);
533 37 : pkt_info->ether_type = ntohs(pkt_info->eth->ether_type);
534 37 : len += sizeof(struct ether_header);
535 :
536 : //strip service vlan and customer vlan in packet
537 37 : while (pkt_info->ether_type == ETHERTYPE_VLAN ||
538 37 : pkt_info->ether_type == ETHERTYPE_QINQ) {
539 0 : pkt_info->ether_type = ntohs(*((uint16_t *)(pkt + len + 2)));
540 0 : len += VLAN_HDR_LEN;
541 : }
542 :
543 37 : if (pkt_info->ether_type == ETHERTYPE_PBB) {
544 : //Parse inner payload
545 0 : pkt_info->pbb_header = (uint32_t *)(pkt + len);
546 0 : pkt_info->b_smac = pkt_info->smac;
547 0 : pkt_info->b_dmac = pkt_info->dmac;
548 0 : pkt_info->i_sid = ntohl(*(pkt_info->pbb_header)) & 0x00FFFFFF;
549 0 : len += ParseEthernetHeader(pkt_info, pkt + len + PBB_HEADER_LEN);
550 : }
551 :
552 37 : return len;
553 : }
554 :
555 36 : int PktHandler::ParseIpPacket(PktInfo *pkt_info, PktType::Type &pkt_type,
556 : uint8_t *pkt) {
557 36 : int len = 0;
558 36 : uint16_t ip_payload_len = 0;
559 36 : if (pkt_info->ether_type == ETHERTYPE_IP) {
560 36 : struct ip *ip = (struct ip *)(pkt + len);
561 36 : pkt_info->ip = ip;
562 36 : pkt_info->family = Address::INET;
563 36 : pkt_info->ip_saddr = IpAddress(Ip4Address(ntohl(ip->ip_src.s_addr)));
564 36 : pkt_info->ip_daddr = IpAddress(Ip4Address(ntohl(ip->ip_dst.s_addr)));
565 36 : pkt_info->ip_proto = ip->ip_p;
566 36 : pkt_info->ttl = ip->ip_ttl;
567 36 : uint8_t ip_header_len = (ip->ip_hl << 2);
568 36 : ip_payload_len = ip->ip_len - ip_header_len;
569 36 : len += ip_header_len;
570 0 : } else if (pkt_info->ether_type == ETHERTYPE_IPV6) {
571 0 : pkt_info->family = Address::INET6;
572 0 : ip6_hdr *ip = (ip6_hdr *)(pkt + len);
573 0 : pkt_info->ip6 = ip;
574 0 : pkt_info->ip = NULL;
575 : Ip6Address::bytes_type addr;
576 :
577 0 : for (int i = 0; i < 16; i++) {
578 0 : addr[i] = ip->ip6_src.s6_addr[i];
579 : }
580 0 : pkt_info->ip_saddr = IpAddress(Ip6Address(addr));
581 :
582 0 : for (int i = 0; i < 16; i++) {
583 0 : addr[i] = ip->ip6_dst.s6_addr[i];
584 : }
585 0 : pkt_info->ip_daddr = IpAddress(Ip6Address(addr));
586 0 : pkt_info->ttl = ip->ip6_hlim;
587 :
588 0 : uint8_t proto = ip->ip6_ctlun.ip6_un1.ip6_un1_nxt;
589 0 : len += sizeof(ip6_hdr);
590 0 : if (proto == IPPROTO_FRAGMENT) {
591 0 : ip6_frag *nxt = (ip6_frag *)(pkt + len);
592 0 : proto = nxt->ip6f_nxt;
593 0 : len += sizeof(ip6_frag);
594 : }
595 :
596 0 : pkt_info->ip_proto = proto;
597 : } else {
598 0 : LOG(ERROR,
599 : "Error EthType = Non IP/IPv6. BackTrace: " << AgentBackTrace(1));
600 0 : agent_->stats()->incr_pkt_drop_due_to_invalid_ethertype();
601 0 : return -1;
602 : }
603 :
604 36 : switch (pkt_info->ip_proto) {
605 28 : case IPPROTO_UDP : {
606 28 : pkt_info->transp.udp = (udphdr *) (pkt + len);
607 28 : len += sizeof(udphdr);
608 28 : pkt_info->data = (pkt + len);
609 :
610 28 : pkt_info->dport = ntohs(pkt_info->transp.udp->uh_dport);
611 28 : pkt_info->sport = ntohs(pkt_info->transp.udp->uh_sport);
612 28 : pkt_type = PktType::UDP;
613 28 : break;
614 : }
615 :
616 0 : case IPPROTO_TCP : {
617 0 : pkt_info->transp.tcp = (tcphdr *) (pkt + len);
618 0 : len += sizeof(tcphdr);
619 0 : pkt_info->data = (pkt + len);
620 :
621 0 : pkt_info->dport = ntohs(pkt_info->transp.tcp->th_dport);
622 0 : pkt_info->sport = ntohs(pkt_info->transp.tcp->th_sport);
623 0 : pkt_info->tcp_ack = pkt_info->transp.tcp->th_flags & TH_ACK;
624 0 : pkt_type = PktType::TCP;
625 0 : break;
626 : }
627 :
628 0 : case IPPROTO_SCTP : {
629 0 : pkt_info->transp.sctp = (sctphdr *) (pkt + len);
630 0 : len += sizeof(sctphdr);
631 0 : pkt_info->data = (pkt + len);
632 0 : pkt_info->dport = ntohs(pkt_info->transp.sctp->th_dport);
633 0 : pkt_info->sport = ntohs(pkt_info->transp.sctp->th_sport);
634 0 : pkt_type = PktType::SCTP;
635 0 : break;
636 : }
637 :
638 0 : case IPPROTO_ICMP: {
639 0 : pkt_info->transp.icmp = (struct icmp *) (pkt + len);
640 0 : pkt_type = PktType::ICMP;
641 :
642 0 : struct icmp *icmp = (struct icmp *)(pkt + len);
643 0 : pkt_info->icmp_chksum = icmp->icmp_cksum;
644 0 : pkt_info->dport = htons(icmp->icmp_type);
645 0 : if (icmp->icmp_type == ICMP_ECHO || icmp->icmp_type == ICMP_ECHOREPLY) {
646 0 : pkt_info->dport = ICMP_ECHOREPLY;
647 0 : pkt_info->sport = htons(icmp->icmp_id);
648 0 : pkt_info->data = (pkt + len + ICMP_MINLEN);
649 0 : uint16_t icmp_payload_len = ip_payload_len - ICMP_MINLEN;
650 0 : if (icmp_payload_len >= sizeof(AgentDiagPktData)) {
651 0 : AgentDiagPktData *ad = (AgentDiagPktData *)pkt_info->data;
652 0 : string value(ad->data_, (sizeof(ad->data_) - 1));
653 0 : if (value.compare(0, (sizeof(ad->data_) - 1),
654 0 : DiagTable::kDiagData) == 0) {
655 0 : pkt_info->is_segment_hc_pkt = true;
656 : }
657 0 : }
658 0 : } else if (IsFlowPacket(pkt_info) &&
659 0 : ((icmp->icmp_type == ICMP_DEST_UNREACH) ||
660 0 : (icmp->icmp_type == ICMP_TIME_EXCEEDED))) {
661 : //Agent has to look at inner payload
662 : //and recalculate the parameter
663 : //Handle this only for packets requiring flow miss
664 0 : if(ParseIpPacket(pkt_info, pkt_type, pkt + len + sizeof(icmp)) == -1) {
665 0 : return -1;
666 : }
667 : //Swap the key parameter, which would be used as key
668 0 : IpAddress src_ip = pkt_info->ip_saddr;
669 0 : pkt_info->ip_saddr = pkt_info->ip_daddr;
670 0 : pkt_info->ip_daddr = src_ip;
671 0 : if (pkt_info->ip_proto != IPPROTO_ICMP) {
672 0 : uint16_t port = pkt_info->sport;
673 0 : pkt_info->sport = pkt_info->dport;
674 0 : pkt_info->dport = port;
675 : }
676 : } else {
677 0 : pkt_info->sport = 0;
678 : }
679 0 : break;
680 : }
681 :
682 0 : case IPPROTO_ICMPV6: {
683 0 : pkt_type = PktType::ICMPV6;
684 0 : icmp6_hdr *icmp = (icmp6_hdr *)(pkt + len);
685 0 : pkt_info->transp.icmp6 = icmp;
686 :
687 0 : pkt_info->dport = htons(icmp->icmp6_type);
688 0 : if (icmp->icmp6_type == ICMP6_ECHO_REQUEST ||
689 0 : icmp->icmp6_type == ICMP6_ECHO_REPLY) {
690 0 : pkt_info->dport = ICMP6_ECHO_REPLY;
691 0 : pkt_info->sport = htons(icmp->icmp6_id);
692 0 : } else if (IsFlowPacket(pkt_info) &&
693 0 : icmp->icmp6_type < ICMP6_ECHO_REQUEST) {
694 : //Agent has to look at inner payload
695 : //and recalculate the parameter
696 : //Handle this only for packets requiring flow miss
697 0 : if(ParseIpPacket(pkt_info, pkt_type, pkt + len + sizeof(icmp)) == -1) {
698 0 : return -1;
699 : }
700 : //Swap the key parameter, which would be used as key
701 0 : IpAddress src_ip = pkt_info->ip_saddr;
702 0 : pkt_info->ip_saddr = pkt_info->ip_daddr;
703 0 : pkt_info->ip_daddr = src_ip;
704 0 : if (pkt_info->ip_proto != IPPROTO_ICMPV6) {
705 0 : uint16_t port = pkt_info->sport;
706 0 : pkt_info->sport = pkt_info->dport;
707 0 : pkt_info->dport = port;
708 : }
709 : } else {
710 0 : pkt_info->sport = 0;
711 : }
712 0 : break;
713 : }
714 :
715 0 : case IPPROTO_IGMP: {
716 0 : pkt_info->transp.igmp = (struct igmp *) (pkt + len);
717 0 : pkt_type = PktType::IGMP;
718 :
719 0 : pkt_info->dport = 0;
720 0 : pkt_info->sport = 0;
721 0 : pkt_info->data = (pkt + len);
722 0 : break;
723 : }
724 :
725 8 : default: {
726 8 : pkt_type = PktType::IP;
727 8 : pkt_info->dport = 0;
728 8 : pkt_info->sport = 0;
729 8 : break;
730 : }
731 : }
732 :
733 36 : return len;
734 : }
735 :
736 8 : int PktHandler::ParseControlWord(PktInfo *pkt_info, uint8_t *pkt,
737 : const MplsLabel *mpls) {
738 8 : uint32_t ret = 0;
739 8 : if (mpls->IsFabricMulticastReservedLabel() == true) {
740 : //Check if there is a control word
741 0 : uint32_t *control_word = (uint32_t *)(pkt);
742 0 : if (*control_word == kMulticastControlWord) {
743 0 : pkt_info->l3_label = false;
744 0 : ret += kMulticastControlWordSize + sizeof(VxlanHdr) +
745 : sizeof(udphdr) + sizeof(ip);
746 : }
747 8 : } else if (pkt_info->l3_label == false) {
748 6 : bool layer2_control_word = false;
749 : const InterfaceNH *intf_nh =
750 6 : dynamic_cast<const InterfaceNH *>(mpls->nexthop());
751 6 : if (intf_nh && intf_nh->layer2_control_word()) {
752 0 : layer2_control_word = true;
753 : }
754 :
755 : const CompositeNH *comp_nh =
756 6 : dynamic_cast<const CompositeNH *>(mpls->nexthop());
757 6 : if (comp_nh && comp_nh->layer2_control_word()) {
758 0 : layer2_control_word = true;
759 : }
760 :
761 : const VrfNH *vrf_nh =
762 6 : dynamic_cast<const VrfNH *>(mpls->nexthop());
763 6 : if (vrf_nh && vrf_nh->layer2_control_word()) {
764 0 : layer2_control_word = true;
765 : }
766 :
767 : //Check if there is a control word
768 6 : uint32_t *control_word = (uint32_t *)(pkt);
769 6 : if (layer2_control_word && *control_word == kMulticastControlWord) {
770 0 : ret += kMulticastControlWordSize;
771 : }
772 : }
773 :
774 8 : return ret;
775 : }
776 :
777 8 : int PktHandler::ParseMplsHdr(PktInfo *pkt_info, uint8_t *pkt) {
778 8 : MplsHdr *hdr = (MplsHdr *)(pkt);
779 :
780 : // MPLS Header validation. Check for,
781 : // - There is single label
782 8 : uint32_t mpls_host = ntohl(hdr->hdr);
783 8 : pkt_info->tunnel.label = (mpls_host & 0xFFFFF000) >> 12;
784 8 : uint32_t ret = sizeof(MplsHdr);
785 :
786 8 : if ((mpls_host & 0x100) == 0) {
787 : // interpret outer label 0xffffff as no label
788 0 : if (((mpls_host & 0xFFFFF000) >> 12) !=
789 : MplsTable::kInvalidLabel) {
790 0 : pkt_info->tunnel.label = MplsTable::kInvalidLabel;
791 0 : PKT_TRACE(Err, "Unexpected MPLS Label Stack. Ignoring");
792 0 : return -1;
793 : }
794 0 : hdr = (MplsHdr *) (pkt + 4);
795 0 : mpls_host = ntohl(hdr->hdr);
796 0 : pkt_info->tunnel.label = (mpls_host & 0xFFFFF000) >> 12;
797 0 : if ((mpls_host & 0x100) == 0) {
798 0 : pkt_info->tunnel.label = MplsTable::kInvalidLabel;
799 0 : PKT_TRACE(Err, "Unexpected MPLS Label Stack. Ignoring");
800 0 : return -1;
801 : }
802 0 : ret += sizeof(MplsHdr);
803 :
804 : }
805 :
806 8 : uint32_t label = pkt_info->tunnel.label;
807 8 : MplsLabelKey mpls_key(label);
808 :
809 8 : const MplsLabel *mpls = static_cast<const MplsLabel *>(
810 8 : agent_->mpls_table()->FindActiveEntry(&mpls_key));
811 8 : if (mpls == NULL) {
812 0 : PKT_TRACE(Err, "Invalid MPLS Label <" << label << ">. Ignoring");
813 0 : pkt_info->tunnel.label = MplsTable::kInvalidLabel;
814 0 : return -1;
815 : }
816 :
817 8 : pkt_info->l3_label = true;
818 8 : const InterfaceNH *nh = dynamic_cast<const InterfaceNH *>(mpls->nexthop());
819 8 : if (nh && nh->IsBridge()) {
820 6 : pkt_info->l3_label = false;
821 : }
822 :
823 8 : const CompositeNH *cnh = dynamic_cast<const CompositeNH *>(mpls->nexthop());
824 8 : if (cnh && cnh->composite_nh_type() != Composite::LOCAL_ECMP) {
825 0 : pkt_info->l3_label = false;
826 : }
827 :
828 8 : const VrfNH *vrf_nh = dynamic_cast<const VrfNH *>(mpls->nexthop());
829 8 : if (vrf_nh && vrf_nh->bridge_nh() == true) {
830 0 : pkt_info->l3_label = false;
831 : }
832 :
833 8 : ret += ParseControlWord(pkt_info, pkt + ret, mpls);
834 8 : return ret;
835 8 : }
836 :
837 : // Parse MPLSoGRE header
838 8 : int PktHandler::ParseMPLSoGRE(PktInfo *pkt_info, uint8_t *pkt) {
839 8 : GreHdr *gre = (GreHdr *)(pkt);
840 8 : if (gre->protocol != ntohs(VR_GRE_PROTO_MPLS)) {
841 0 : PKT_TRACE(Err, "Non-MPLS protocol <" << ntohs(gre->protocol) <<
842 : "> in GRE header");
843 0 : return -1;
844 : }
845 :
846 8 : int len = sizeof(GreHdr);
847 :
848 : int tmp;
849 8 : tmp = ParseMplsHdr(pkt_info, (pkt + len));
850 8 : if (tmp < 0) {
851 0 : return tmp;
852 : }
853 8 : len += tmp;
854 :
855 8 : if (pkt_info->l3_label == false) {
856 6 : tmp = ParseEthernetHeader(pkt_info, (pkt + len));
857 6 : if (tmp < 0)
858 0 : return tmp;
859 6 : len += tmp;
860 : }
861 :
862 8 : pkt_info->tunnel.type.SetType(TunnelType::MPLS_GRE);
863 8 : return (len);
864 : }
865 :
866 0 : int PktHandler::ParseMPLSoUDP(PktInfo *pkt_info, uint8_t *pkt) {
867 0 : int len = ParseMplsHdr(pkt_info, pkt);
868 0 : if (len < 0) {
869 0 : return len;
870 : }
871 :
872 0 : if (pkt_info->l3_label == false) {
873 0 : len += ParseEthernetHeader(pkt_info, (pkt + len));
874 : }
875 :
876 0 : pkt_info->tunnel.type.SetType(TunnelType::MPLS_UDP);
877 0 : return len;
878 : }
879 :
880 3 : int PktHandler::ParseVxlan(PktInfo *pkt_info, uint8_t *pkt) {
881 3 : VxlanHdr *vxlan = (VxlanHdr *)(pkt);
882 3 : pkt_info->tunnel.vxlan_id = htonl(vxlan->vxlan_id) >> 8;
883 :
884 3 : int len = sizeof(vxlan);
885 3 : len += ParseEthernetHeader(pkt_info, (pkt + len));
886 :
887 3 : pkt_info->tunnel.type.SetType(TunnelType::VXLAN);
888 3 : pkt_info->l3_label = false;
889 3 : return len;
890 : }
891 :
892 3 : int PktHandler::ParseUDPTunnels(PktInfo *pkt_info, uint8_t *pkt) {
893 3 : int len = 0;
894 3 : if (pkt_info->dport == VXLAN_UDP_DEST_PORT)
895 3 : len = ParseVxlan(pkt_info, (pkt + len));
896 0 : else if (pkt_info->dport == MPLS_OVER_UDP_DEST_PORT ||
897 0 : pkt_info->dport == IANA_MPLS_OVER_UDP_DEST_PORT)
898 0 : len = ParseMPLSoUDP(pkt_info, (pkt + len));
899 :
900 3 : return len;
901 : }
902 :
903 36 : bool PktHandler::ValidateIpPacket(PktInfo *pkt_info) {
904 : // For ICMP, IGMP, make sure IP is IPv4, else fail the parsing
905 : // so that we don't go ahead and access the ip header later
906 36 : if (((pkt_info->ip_proto == IPPROTO_ICMP) ||
907 36 : (pkt_info->ip_proto == IPPROTO_IGMP)) && (!pkt_info->ip)) {
908 0 : return false;
909 : }
910 : // If ip proto is ICMPv6, then make sure IP is IPv6, else fail
911 : // the parsing so that we don't go ahead and access the ip6 header
912 : // later
913 36 : if ((pkt_info->ip_proto == IPPROTO_ICMPV6) && (!pkt_info->ip6)) {
914 0 : return false;
915 : }
916 36 : return true;
917 : }
918 :
919 25 : int PktHandler::ParseUserPkt(PktInfo *pkt_info, Interface *intf,
920 : PktType::Type &pkt_type, uint8_t *pkt) {
921 25 : int len = 0, ret = 0;
922 : bool pkt_ok;
923 :
924 : // get to the actual packet header
925 25 : len += ParseEthernetHeader(pkt_info, (pkt + len));
926 :
927 : // Parse payload
928 25 : if (pkt_info->ether_type == ETHERTYPE_ARP) {
929 0 : pkt_info->arp = (ether_arp *) (pkt + len);
930 0 : pkt_type = PktType::ARP;
931 : union {
932 : uint8_t data[sizeof(in_addr_t)];
933 : in_addr_t addr;
934 : } bytes;
935 0 : memcpy(bytes.data, pkt_info->arp->arp_spa, sizeof(in_addr_t));
936 0 : in_addr_t spa = ntohl(bytes.addr);
937 0 : pkt_info->ip_saddr = IpAddress(Ip4Address(spa));
938 0 : return len;
939 : }
940 :
941 : // Identify NON-IP Packets
942 25 : if (pkt_info->ether_type != ETHERTYPE_IP &&
943 0 : pkt_info->ether_type != ETHERTYPE_IPV6) {
944 0 : pkt_info->data = (pkt + len);
945 0 : pkt_type = PktType::NON_IP;
946 0 : return len;
947 : }
948 : // Copy IP fields from outer header assuming tunnel is present. If tunnel
949 : // is not present, the values here will be ignored
950 25 : SetOuterMac(pkt_info);
951 25 : SetOuterIp(pkt_info, (pkt + len));
952 :
953 : // IP Packets
954 25 : ret = ParseIpPacket(pkt_info, pkt_type, (pkt + len));
955 25 : if(ret == -1) {
956 0 : return -1;
957 : }
958 25 : len += ret;
959 :
960 25 : if (!ValidateIpPacket(pkt_info)) {
961 0 : agent_->stats()->incr_pkt_invalid_ip_pkt();
962 0 : return -1;
963 : }
964 :
965 : // If packet is an IP fragment and not flow trap, ignore it
966 25 : if (IgnoreFragmentedPacket(pkt_info)) {
967 0 : agent_->stats()->incr_pkt_fragments_dropped();
968 0 : return -1;
969 : }
970 :
971 : // If it is a packet from TOR that we serve, dont parse any further
972 25 : if (IsManagedTORPacket(intf, pkt_info, pkt_type, (pkt + len), &pkt_ok)) {
973 0 : return len;
974 25 : } else if (!pkt_ok) {
975 : // invalid pkt received from tor
976 0 : agent_->stats()->incr_pkt_invalid_frm_tor();
977 0 : return -1;
978 : }
979 :
980 25 : if (IsDiagPacket(pkt_info) &&
981 0 : (pkt_info->agent_hdr.cmd != AgentHdr::TRAP_ROUTER_ALERT)) {
982 0 : return len;
983 : }
984 :
985 : // If tunneling is not enabled on interface or if it is a DHCP packet,
986 : // dont parse any further
987 : // if (intf->IsTunnelEnabled() == false || IsDHCPPacket(pkt_info)) {
988 25 : if (intf->IsTunnelEnabled() == false) {
989 14 : agent_->stats()->incr_pkt_drop_due_to_disable_tnl();
990 14 : return len;
991 : }
992 11 : else if (IsDHCPPacket(pkt_info)) {
993 0 : return len;
994 : }
995 :
996 11 : int tunnel_len = 0;
997 : // Do tunnel processing only if IP-DA is ours
998 22 : if (pkt_info->family == Address::INET &&
999 22 : pkt_info->ip_daddr.to_v4() == agent_->router_id()) {
1000 : // Look for supported headers
1001 11 : switch (pkt_info->ip_proto) {
1002 8 : case IPPROTO_GRE :
1003 : // Parse MPLSoGRE tunnel
1004 8 : tunnel_len = ParseMPLSoGRE(pkt_info, (pkt + len));
1005 8 : break;
1006 :
1007 3 : case IPPROTO_UDP:
1008 : // Parse MPLSoUDP tunnel
1009 3 : tunnel_len = ParseUDPTunnels(pkt_info, (pkt + len));
1010 3 : break;
1011 :
1012 0 : default:
1013 0 : break;
1014 : }
1015 : }
1016 :
1017 11 : if (tunnel_len < 0) {
1018 : // Found tunnel packet, but error in decoding
1019 0 : pkt_type = PktType::INVALID;
1020 0 : agent_->stats()->incr_pkt_drop_due_to_decode_error();
1021 0 : return tunnel_len;
1022 : }
1023 :
1024 11 : if (tunnel_len == 0) {
1025 0 : return len;
1026 : }
1027 :
1028 11 : len += tunnel_len;
1029 :
1030 : // Find IPv4/IPv6 Packet based on first nibble in payload
1031 11 : if (((pkt + len)[0] & 0x60) == 0x60) {
1032 0 : pkt_info->ether_type = ETHERTYPE_IPV6;
1033 : } else {
1034 11 : pkt_info->ether_type = ETHERTYPE_IP;
1035 : }
1036 :
1037 :
1038 11 : ret = ParseIpPacket(pkt_info, pkt_type, (pkt + len));
1039 11 : if(ret == -1) {
1040 0 : return -1;
1041 : }
1042 11 : len += ret;
1043 :
1044 : // validate inner iphdr
1045 11 : if (!ValidateIpPacket(pkt_info)) {
1046 0 : agent_->stats()->incr_pkt_invalid_ip_pkt();
1047 0 : return -1;
1048 : }
1049 :
1050 11 : return len;
1051 : }
1052 :
1053 : // Enqueue an inter-task message to the specified module
1054 97 : void PktHandler::SendMessage(PktModuleName mod, InterTaskMsg *msg) {
1055 97 : if (mod < MAX_MODULES) {
1056 97 : boost::shared_ptr<PktInfo> pkt_info(new PktInfo(mod, msg));
1057 97 : if (!(proto_list_.at(mod)->Enqueue(pkt_info))) {
1058 0 : PKT_TRACE(Err, "Threshold exceeded while enqueuing IPC Message <" <<
1059 : mod << ">");
1060 : }
1061 97 : }
1062 97 : }
1063 :
1064 25 : bool PktHandler::IgnoreFragmentedPacket(PktInfo *pkt_info) {
1065 25 : if (pkt_info->ip) {
1066 25 : uint16_t offset = htons(pkt_info->ip->ip_off);
1067 25 : if (((offset & IP_MF) || (offset & IP_OFFMASK)) &&
1068 0 : !IsFlowPacket(pkt_info))
1069 0 : return true;
1070 0 : } else if (pkt_info->ip6) {
1071 0 : uint8_t proto = pkt_info->ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
1072 0 : if (proto == IPPROTO_FRAGMENT) {
1073 0 : if (IsFlowPacket(pkt_info) ||
1074 0 : pkt_info->agent_hdr.cmd == AgentHdr::TRAP_HANDLE_DF) {
1075 0 : return false;
1076 : } else {
1077 0 : return true;
1078 : }
1079 : }
1080 : }
1081 :
1082 25 : return false;
1083 : }
1084 :
1085 11 : bool PktHandler::IsDHCPPacket(PktInfo *pkt_info) {
1086 : // Do not consider source port in case we are looking at UDP tunnel header
1087 11 : if (pkt_info->dport == VXLAN_UDP_DEST_PORT ||
1088 8 : pkt_info->dport == MPLS_OVER_UDP_DEST_PORT ||
1089 8 : pkt_info->dport == IANA_MPLS_OVER_UDP_DEST_PORT) {
1090 3 : return false;
1091 : }
1092 :
1093 8 : if (pkt_info->dport == DHCP_SERVER_PORT ||
1094 8 : pkt_info->sport == DHCP_CLIENT_PORT) {
1095 : // we dont handle DHCPv6 coming from fabric
1096 0 : return true;
1097 : }
1098 8 : return false;
1099 : }
1100 :
1101 1 : bool PktHandler::IsToRDevice(uint32_t vrf_id, const IpAddress &ip) {
1102 1 : if (agent()->tsn_enabled() == false)
1103 1 : return false;
1104 :
1105 0 : if (ip.is_v4() == false)
1106 0 : return false;
1107 0 : Ip4Address ip4 = ip.to_v4();
1108 :
1109 0 : VrfEntry *vrf = agent()->vrf_table()->FindVrfFromId(vrf_id);
1110 0 : if (vrf == NULL)
1111 0 : return false;
1112 :
1113 : BridgeAgentRouteTable *table = static_cast<BridgeAgentRouteTable *>
1114 0 : (vrf->GetBridgeRouteTable());
1115 0 : if (table == NULL)
1116 0 : return false;
1117 :
1118 0 : BridgeRouteEntry *rt = table->FindRoute(MacAddress::BroadcastMac());
1119 0 : if (rt == NULL)
1120 0 : return false;
1121 :
1122 0 : const CompositeNH *nh = dynamic_cast<const CompositeNH *>
1123 0 : (rt->GetActiveNextHop());
1124 0 : if (nh == NULL)
1125 0 : return false;
1126 :
1127 0 : ComponentNHList::const_iterator it = nh->begin();
1128 0 : while (it != nh->end()) {
1129 0 : const CompositeNH *tor_nh = dynamic_cast<const CompositeNH *>
1130 0 : ((*it)->nh());
1131 0 : if (tor_nh == NULL) {
1132 0 : it++;
1133 0 : continue;
1134 : }
1135 :
1136 0 : if (tor_nh->composite_nh_type() != Composite::TOR &&
1137 0 : tor_nh->composite_nh_type() != Composite::EVPN) {
1138 0 : it++;
1139 0 : continue;
1140 : }
1141 :
1142 0 : ComponentNHList::const_iterator tor_it = tor_nh->begin();
1143 0 : while (tor_it != tor_nh->end()) {
1144 0 : const TunnelNH *tun_nh = dynamic_cast<const TunnelNH *>
1145 0 : ((*tor_it)->nh());
1146 0 : if (tun_nh == NULL) {
1147 0 : tor_it++;
1148 0 : continue;
1149 : }
1150 0 : if (*tun_nh->GetDip() == ip4) {
1151 0 : return true;
1152 : }
1153 0 : tor_it++;
1154 : }
1155 :
1156 0 : it++;
1157 : }
1158 :
1159 0 : return false;
1160 : }
1161 :
1162 : // We can receive DHCP / DNS packets on physical port from TOR ports managed
1163 : // by a TOR services node. Check if the source mac is the mac address of a
1164 : // VM interface available in the node.
1165 25 : bool PktHandler::IsManagedTORPacket(Interface *intf, PktInfo *pkt_info,
1166 : PktType::Type &pkt_type, uint8_t *pkt, bool *pkt_ok) {
1167 25 : *pkt_ok = true;
1168 25 : if (intf->type() != Interface::PHYSICAL) {
1169 14 : return false;
1170 : }
1171 :
1172 11 : if (pkt_type != PktType::UDP || pkt_info->dport != VXLAN_UDP_DEST_PORT)
1173 8 : return false;
1174 :
1175 : // Get VXLAN id and point to original L2 frame after the VXLAN header
1176 3 : pkt += 8;
1177 :
1178 : // get to the actual packet header
1179 3 : pkt += ParseEthernetHeader(pkt_info, pkt);
1180 :
1181 : ether_addr addr;
1182 3 : memcpy(addr.ether_addr_octet, pkt_info->eth->ether_shost, ETH_ALEN);
1183 3 : MacAddress address(addr);
1184 3 : const VrfEntry *vrf = agent_->vrf_table()->
1185 3 : FindVrfFromId(pkt_info->agent_hdr.vrf);
1186 3 : if (vrf == NULL)
1187 0 : return false;
1188 : BridgeAgentRouteTable *bridge_table =
1189 3 : dynamic_cast<BridgeAgentRouteTable *>(vrf->GetBridgeRouteTable());
1190 3 : if(bridge_table == NULL) {
1191 0 : LOG(ERROR,
1192 : "Error bridge_table == NULL. BackTrace: " << AgentBackTrace(1));
1193 0 : _Exit(0);
1194 : }
1195 3 : const VmInterface *vm_intf = bridge_table->FindVmFromDhcpBinding(address);
1196 3 : if (vm_intf == NULL) {
1197 2 : return false;
1198 : }
1199 :
1200 1 : if (IsToRDevice(vm_intf->vrf_id(), pkt_info->ip_saddr) == false) {
1201 1 : return false;
1202 : }
1203 :
1204 : // update agent_hdr to reflect the VM interface data
1205 : // cmd_param is set to physical interface id
1206 0 : pkt_info->agent_hdr.cmd = AgentHdr::TRAP_TOR_CONTROL_PKT;
1207 0 : pkt_info->agent_hdr.cmd_param = pkt_info->agent_hdr.ifindex;
1208 0 : pkt_info->agent_hdr.ifindex = vm_intf->id();
1209 :
1210 : // Parse payload
1211 0 : if (pkt_info->ether_type == ETHERTYPE_ARP) {
1212 0 : pkt_info->arp = (ether_arp *) pkt;
1213 0 : pkt_type = PktType::ARP;
1214 0 : return true;
1215 : }
1216 :
1217 : // Identify NON-IP Packets
1218 0 : if (pkt_info->ether_type != ETHERTYPE_IP &&
1219 0 : pkt_info->ether_type != ETHERTYPE_IPV6) {
1220 0 : pkt_info->data = pkt;
1221 0 : pkt_type = PktType::NON_IP;
1222 0 : return true;
1223 : }
1224 :
1225 : // IP Packets
1226 0 : if(ParseIpPacket(pkt_info, pkt_type, pkt) == -1) {
1227 0 : *pkt_ok = false; //Don't process any further
1228 0 : return false;
1229 : }
1230 0 : if (!ValidateIpPacket(pkt_info)) {
1231 0 : *pkt_ok = false;
1232 0 : return false;
1233 : }
1234 0 : return true;
1235 : }
1236 :
1237 25 : bool PktHandler::IsFlowPacket(PktInfo *pkt_info) {
1238 25 : if (pkt_info->agent_hdr.cmd == AgentHdr::TRAP_FLOW_MISS ||
1239 0 : pkt_info->agent_hdr.cmd == AgentHdr::TRAP_FLOW_ACTION_HOLD) {
1240 25 : return true;
1241 : }
1242 0 : return false;
1243 : }
1244 :
1245 0 : bool PktHandler::IsFlowPacket(const AgentHdr &agent_hdr) {
1246 0 : if (agent_hdr.cmd == AgentHdr::TRAP_FLOW_MISS ||
1247 0 : agent_hdr.cmd == AgentHdr::TRAP_FLOW_ACTION_HOLD) {
1248 0 : return true;
1249 : }
1250 0 : return false;
1251 : }
1252 :
1253 25 : bool PktHandler::IsDiagPacket(PktInfo *pkt_info) {
1254 25 : if (pkt_info->agent_hdr.cmd == AgentHdr::TRAP_ZERO_TTL ||
1255 25 : pkt_info->agent_hdr.cmd == AgentHdr::TRAP_ICMP_ERROR
1256 25 : || pkt_info->agent_hdr.cmd == AgentHdr::TRAP_ROUTER_ALERT)
1257 0 : return true;
1258 25 : return false;
1259 : }
1260 :
1261 : // Check if the packet is destined to the VM's default GW
1262 0 : bool PktHandler::IsGwPacket(const Interface *intf, const IpAddress &dst_ip) {
1263 0 : if (!intf || intf->type() != Interface::VM_INTERFACE)
1264 0 : return false;
1265 :
1266 0 : const VmInterface *vm_intf = static_cast<const VmInterface *>(intf);
1267 0 : if (vm_intf->vmi_type() != VmInterface::GATEWAY) {
1268 : //Gateway interface doesnt have IP address
1269 0 : if (dst_ip.is_v6() && vm_intf->primary_ip6_addr().is_unspecified())
1270 0 : return false;
1271 0 : else if (dst_ip.is_v4() && vm_intf->primary_ip_addr().is_unspecified())
1272 0 : return false;
1273 : }
1274 0 : const VnEntry *vn = vm_intf->vn();
1275 0 : if (vn) {
1276 0 : const std::vector<VnIpam> &ipam = vn->GetVnIpam();
1277 0 : for (unsigned int i = 0; i < ipam.size(); ++i) {
1278 0 : if (dst_ip.is_v4()) {
1279 0 : if (!ipam[i].IsV4()) {
1280 0 : continue;
1281 : }
1282 0 : if (ipam[i].default_gw == dst_ip ||
1283 0 : ipam[i].dns_server == dst_ip) {
1284 0 : return true;
1285 : }
1286 : } else {
1287 0 : if (!ipam[i].IsV6()) {
1288 0 : continue;
1289 : }
1290 0 : if (ipam[i].default_gw == dst_ip ||
1291 0 : ipam[i].dns_server == dst_ip) {
1292 0 : return true;
1293 : }
1294 : }
1295 :
1296 : }
1297 : }
1298 :
1299 0 : return false;
1300 : }
1301 :
1302 25 : bool PktHandler::IsValidInterface(uint32_t ifindex, Interface **intrface) {
1303 25 : Interface *intf = agent_->interface_table()->FindInterface(ifindex);
1304 25 : if (intf == NULL) {
1305 0 : PKT_TRACE(Err, "Invalid interface index <" << ifindex << ">");
1306 0 : agent_->stats()->incr_pkt_invalid_interface();
1307 0 : return false;
1308 : }
1309 :
1310 25 : *intrface = intf;
1311 25 : return true;
1312 : }
1313 :
1314 0 : void PktHandler::PktTraceIterate(PktModuleName mod, PktTraceCallback cb) {
1315 0 : if (!cb.empty()) {
1316 0 : PktTrace &pkt(pkt_trace_.at(mod));
1317 0 : pkt.Iterate(cb);
1318 : }
1319 0 : }
1320 :
1321 22 : void PktHandler::PktStats::PktRcvd(PktModuleName mod) {
1322 22 : if (mod < MAX_MODULES)
1323 22 : received[mod]++;
1324 22 : }
1325 :
1326 112 : void PktHandler::PktStats::PktSent(PktModuleName mod) {
1327 112 : if (mod < MAX_MODULES)
1328 112 : sent[mod]++;
1329 112 : }
1330 :
1331 0 : void PktHandler::PktStats::PktQThresholdExceeded(PktModuleName mod) {
1332 0 : if (mod < MAX_MODULES)
1333 0 : q_threshold_exceeded[mod]++;
1334 0 : }
1335 :
1336 : ///////////////////////////////////////////////////////////////////////////////
1337 :
1338 25 : PktInfo::PktInfo(const PacketBufferPtr &buff) :
1339 25 : module(PktHandler::INVALID),
1340 25 : pkt(buff->data()), len(buff->data_len()), max_pkt_len(buff->buffer_len()),
1341 25 : data(), ipc(), family(Address::UNSPEC), type(PktType::INVALID), agent_hdr(),
1342 25 : ether_type(-1), ip_saddr(), ip_daddr(), ip_proto(), sport(), dport(),
1343 25 : ttl(0), icmp_chksum(0), tcp_ack(false), tunnel(),
1344 25 : l3_label(false), is_bfd_keepalive(false), is_segment_hc_pkt(false),
1345 25 : ignore_address(VmInterface::IGNORE_NONE), same_port_number(false),
1346 25 : is_fat_flow_src_prefix(false), ip_ff_src_prefix(),
1347 25 : is_fat_flow_dst_prefix(false), ip_ff_dst_prefix(),
1348 100 : eth(), arp(), ip(), ip6(), packet_buffer_(buff) {
1349 25 : transp.tcp = 0;
1350 25 : }
1351 :
1352 0 : PktInfo::PktInfo(const PacketBufferPtr &buff, const AgentHdr &hdr) :
1353 0 : pkt(buff->data()), len(buff->data_len()), max_pkt_len(buff->buffer_len()),
1354 0 : data(), ipc(), family(Address::UNSPEC), type(PktType::INVALID),
1355 0 : agent_hdr(hdr), ether_type(-1), ip_saddr(), ip_daddr(), ip_proto(), sport(),
1356 0 : dport(), ttl(0), icmp_chksum(0), tcp_ack(false), tunnel(),
1357 0 : l3_label(false), is_bfd_keepalive(false), is_segment_hc_pkt(false),
1358 0 : ignore_address(VmInterface::IGNORE_NONE), same_port_number(false),
1359 0 : is_fat_flow_src_prefix(false), ip_ff_src_prefix(),
1360 0 : is_fat_flow_dst_prefix(false), ip_ff_dst_prefix(),
1361 0 : eth(), arp(), ip(), ip6(), packet_buffer_(buff) {
1362 0 : transp.tcp = 0;
1363 0 : }
1364 :
1365 185 : PktInfo::PktInfo(Agent *agent, uint32_t buff_len, PktHandler::PktModuleName mod,
1366 185 : uint32_t mdata) :
1367 185 : module(mod),
1368 185 : len(), max_pkt_len(), data(), ipc(), family(Address::UNSPEC),
1369 185 : type(PktType::INVALID), agent_hdr(), ether_type(-1), ip_saddr(), ip_daddr(),
1370 370 : ip_proto(), sport(), dport(), ttl(0), icmp_chksum(0), tcp_ack(false),
1371 185 : tunnel(), l3_label(false), is_bfd_keepalive(false), is_segment_hc_pkt(false),
1372 185 : ignore_address(VmInterface::IGNORE_NONE), same_port_number(false),
1373 185 : is_fat_flow_src_prefix(false), ip_ff_src_prefix(),
1374 185 : is_fat_flow_dst_prefix(false), ip_ff_dst_prefix(),
1375 555 : eth(),arp(), ip(), ip6() {
1376 :
1377 : packet_buffer_ = agent->pkt()->packet_buffer_manager()->Allocate
1378 185 : (module, buff_len, mdata);
1379 185 : pkt = packet_buffer_->data();
1380 185 : len = packet_buffer_->data_len();
1381 185 : max_pkt_len = packet_buffer_->buffer_len();
1382 :
1383 185 : transp.tcp = 0;
1384 185 : }
1385 :
1386 123 : PktInfo::PktInfo(PktHandler::PktModuleName mod, InterTaskMsg *msg) :
1387 123 : module(mod),
1388 123 : pkt(), len(), max_pkt_len(0), data(), ipc(msg), family(Address::UNSPEC),
1389 123 : type(PktType::MESSAGE), agent_hdr(), ether_type(-1), ip_saddr(), ip_daddr(),
1390 246 : ip_proto(), sport(), dport(), ttl(0), icmp_chksum(0), tcp_ack(false),
1391 123 : tunnel(), l3_label(false), is_bfd_keepalive(false), is_segment_hc_pkt(false),
1392 123 : ignore_address(VmInterface::IGNORE_NONE), same_port_number(false),
1393 123 : is_fat_flow_src_prefix(false), ip_ff_src_prefix(),
1394 123 : is_fat_flow_dst_prefix(false), ip_ff_dst_prefix(),
1395 369 : eth(), arp(), ip(), ip6(), packet_buffer_() {
1396 123 : transp.tcp = 0;
1397 123 : }
1398 :
1399 663 : PktInfo::~PktInfo() {
1400 663 : }
1401 :
1402 141 : const AgentHdr &PktInfo::GetAgentHdr() const {
1403 141 : return agent_hdr;
1404 : }
1405 :
1406 22 : void PktInfo::reset_packet_buffer() {
1407 22 : packet_buffer_.reset();
1408 22 : }
1409 :
1410 5 : void PktInfo::AllocPacketBuffer(Agent *agent, uint32_t module, uint16_t len,
1411 : uint32_t mdata) {
1412 : packet_buffer_ = agent->pkt()->packet_buffer_manager()->Allocate
1413 5 : (module, len, mdata);
1414 5 : pkt = packet_buffer_->data();
1415 5 : len = packet_buffer_->data_len();
1416 5 : max_pkt_len = packet_buffer_->buffer_len();
1417 5 : }
1418 :
1419 112 : void PktInfo::set_len(uint32_t x) {
1420 112 : packet_buffer_->set_len(x);
1421 112 : len = x;
1422 112 : }
1423 :
1424 0 : void PktInfo::UpdateHeaderPtr() {
1425 0 : eth = (struct ether_header *)(pkt);
1426 0 : ip = (struct ip *)(eth + 1);
1427 0 : transp.tcp = (struct tcphdr *)(ip + 1);
1428 0 : }
1429 :
1430 0 : std::size_t PktInfo::hash(const Agent *agent,
1431 : const EcmpLoadBalance &ecmp_load_balance) const {
1432 0 : std::size_t seed = 0;
1433 : // Bug: 1687879
1434 : // Consider that source compute has 2 member ECMP - compute-1 and compute-2
1435 : // Compute-1 is chosed in hash is even and Compute-2 is chosen if hash
1436 : // is add.
1437 : //
1438 : // On Compute-1, the hash computation uses same 5-tuple and will always
1439 : // result in even number. As a result, flows from Compute-1 will go to
1440 : // even-numbered ECMP members and never odd-numbered members.
1441 : // If Compute-2 happens to have only 2 members, all flows go to ecmp-index
1442 : // 0 and never to 1
1443 : //
1444 : // Solution:
1445 : // We need to ensure that hash computed in Compute-1 and Compute-2 are
1446 : // different. We also want to have same hash on agent restarts. So, include
1447 : // vhost-ip also to compute hash
1448 0 : if (agent->params()->flow_use_rid_in_hash()) {
1449 0 : boost::hash_combine(seed, agent->router_id().to_ulong());
1450 : }
1451 :
1452 0 : if (family == Address::INET) {
1453 0 : if (ecmp_load_balance.is_source_ip_set()) {
1454 0 : boost::hash_combine(seed, ip_saddr.to_v4().to_ulong());
1455 : }
1456 0 : if (ecmp_load_balance.is_destination_ip_set()) {
1457 0 : boost::hash_combine(seed, ip_daddr.to_v4().to_ulong());
1458 : }
1459 0 : } else if (family == Address::INET6) {
1460 0 : if (ecmp_load_balance.is_source_ip_set()) {
1461 : uint32_t words[4];
1462 0 : memcpy(words, ip_saddr.to_v6().to_bytes().data(), sizeof(words));
1463 0 : boost::hash_combine(seed, words[0]);
1464 0 : boost::hash_combine(seed, words[1]);
1465 0 : boost::hash_combine(seed, words[2]);
1466 0 : boost::hash_combine(seed, words[3]);
1467 : }
1468 :
1469 0 : if (ecmp_load_balance.is_destination_ip_set()) {
1470 : uint32_t words[4];
1471 0 : memcpy(words, ip_daddr.to_v6().to_bytes().data(), sizeof(words));
1472 0 : boost::hash_combine(seed, words[0]);
1473 0 : boost::hash_combine(seed, words[1]);
1474 0 : boost::hash_combine(seed, words[2]);
1475 0 : boost::hash_combine(seed, words[3]);
1476 : }
1477 : } else {
1478 0 : assert(0);
1479 : }
1480 0 : if (ecmp_load_balance.is_ip_protocol_set()) {
1481 0 : boost::hash_combine(seed, ip_proto);
1482 : }
1483 0 : if (ecmp_load_balance.is_source_port_set()) {
1484 0 : boost::hash_combine(seed, sport);
1485 : }
1486 0 : if (ecmp_load_balance.is_destination_port_set()) {
1487 0 : boost::hash_combine(seed, dport);
1488 : }
1489 :
1490 : // When only the sport changes by 2, its observed that only the lower 32
1491 : // bits of hash changes. Just hash combine upper and lower 32 bit numbers
1492 : // to randomize in case of incremental sport numbers
1493 0 : std::size_t hash = 0;
1494 0 : boost::hash_combine(hash, (seed & 0xFFFFFFFF));
1495 0 : boost::hash_combine(hash, (seed >> 16));
1496 0 : return hash;
1497 : }
1498 :
1499 0 : uint32_t PktInfo::GetUdpPayloadLength() const {
1500 0 : if (ip_proto == IPPROTO_UDP) {
1501 0 : return ntohs(transp.udp->uh_ulen) - sizeof(udphdr);
1502 : }
1503 0 : return 0;
1504 : }
1505 :
1506 97 : void PktHandler::AddPktTrace(PktModuleName module, PktTrace::Direction dir,
1507 : const PktInfo *pkt) {
1508 97 : pkt_trace_.at(module).AddPktTrace(PktTrace::In, pkt->len, pkt->pkt,
1509 : &pkt->agent_hdr);
1510 97 : }
1511 :
1512 22 : void PktHandler::Enqueue(PktModuleName module,
1513 : boost::shared_ptr<PktInfo> pkt_info) {
1514 22 : if (!(proto_list_.at(module)->Enqueue(pkt_info))) {
1515 0 : stats_.PktQThresholdExceeded(module);
1516 : }
1517 22 : return;
1518 : }
1519 :
1520 : ///////////////////////////////////////////////////////////////////////////////
1521 209 : void PktTrace::Pkt::Copy(Direction d, std::size_t l, uint8_t *msg,
1522 : std::size_t pkt_trace_size, const AgentHdr *hdr) {
1523 209 : uint16_t hdr_len = sizeof(AgentHdr);
1524 209 : dir = d;
1525 209 : len = l + hdr_len;
1526 209 : memcpy(pkt, hdr, hdr_len);
1527 209 : memcpy(pkt + hdr_len, msg, std::min(l, (pkt_trace_size - hdr_len)));
1528 209 : }
1529 :
1530 209 : void PktTrace::AddPktTrace(Direction dir, std::size_t len, uint8_t *msg,
1531 : const AgentHdr *hdr) {
1532 209 : if (num_buffers_) {
1533 209 : end_ = (end_ + 1) % num_buffers_;
1534 209 : pkt_buffer_[end_].Copy(dir, len, msg, pkt_trace_size_, hdr);
1535 209 : count_ = std::min((count_ + 1), (uint32_t) num_buffers_);
1536 : }
1537 209 : }
|