/****************************************** * File: nt_bench.c * Purpose: benchmarks for NT. ************************************************************** * FILE: nt_bench.c * * DESCRIPTION: netapi user space transport * library test application : benchmarks * * REVISION HISTORY: rev 0.0.1 * * Copyright (c) Texas Instruments Incorporated 2010-2011 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the * distribution. * * Neither the name of Texas Instruments Incorporated nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************/ #include #include #include #include #include #include #include //#include "trie.h" #include "string.h" #include "netapi.h" //#include "pktio.h" #include "net_test.h" #include "net_test_sa_utils.h" #include "net_test_utils.h" #include "net_test_thread_utils.h" #include #if defined(DEVICE_K2H) #include #include #elif defined (DEVICE_K2K) #include #include #elif defined (DEVICE_K2L) #include #include #elif defined (DEVICE_K2E) #include #include #else /*Default */ #include #include #endif /* Device */ extern Rm_ServiceHandle *rmClientServiceHandle; extern Rm_ServiceHandle *rmClientServiceHandle; #define netapi_timing_start hplib_mUtilGetPmuCCNT static int scnt=0; static int QUIT=0; static int XMIT=0; static int CAP=0; volatile int RESET=0; //to reset stats __thread int our_core; void benchmarks1(void); void benchmarks2(Pktlib_HeapHandle h , int n_trials); void benchmarks3(Pktlib_HeapHandle h , int n_trials); //sig handler void netTest_utilMySig(int x) { QUIT=1; scnt+=1; printf(">ifdma-test: recv'd signal %d cnt=%d\n",x,scnt); if (scnt > 10) {printf(">ifdma-test: WARNING EXITING WITH PROPER SHUTDOWN, LUTS LEFT ACTIVE\n");exit(1);} } void recv_cb_net(struct PKTIO_HANDLE_Tag * channel, Ti_Pkt* p_recv[], PKTIO_METADATA_T meta[], int n_pkts, uint64_t ts ); /*************debug********************/ void netTest_utilDumpDescr(unsigned long *p, int n) { printf("--------dump of descriptor %d %x\n", n, (int) p); printf("> %x %x %x %x %x %x %x %x\n",p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]); printf("> %x %x %x %x %x %x %x %x\n",p[8],p[9],p[10],p[11],p[12],p[13],p[14],p[15]); printf("-----------------------------\n"); } void netTest_utilDumpHeader(unsigned long *p, int n, int a, int r) { printf("--------dump of header %d %x appID=%x flag1=%x\n", n, (int) p,a,r); printf("> %0x %0x %0x %0x %0x %0x %0x %0x\n", ntohl(p[0]),ntohl(p[1]),ntohl(p[2]),ntohl(p[3]), ntohl(p[4]),ntohl(p[5]),ntohl(p[6]),ntohl(p[7]) ); #if 0 printf("> %x %x %x %x %x %x %x %x\n",p[8],p[9],p[10],p[11],p[12],p[13],p[14],p[15]); printf("> %x %x %x %x %x %x %x %x\n",p[16],p[17],p[18],p[19],p[20],p[21],p[22],p[23]); printf("> %x %x %x %x %x %x %x %x\n",p[24],p[25],p[26],p[27],p[28],p[29],p[30],p[31]); #endif printf("-----------------------------\n"); } /*****************************************/ unsigned long long CALIB=0; unsigned long long calibrate_idle(void) { volatile unsigned long long at1; volatile unsigned long long at2; volatile unsigned long pt1; volatile unsigned long pt2; unsigned long long calib; at1 = hplib_mUtilGetTimestamp(); pt1=netapi_timing_start(); for(;;) { pt2=netapi_timing_start() ; if ((pt2-pt1) >= 100000) break; } at2 = hplib_mUtilGetTimestamp(); calib = ((unsigned long long) (pt2-pt1))/(at2-at1); printf("calibrate: arm time=%lld -> arm cycles=%d calib=%lld\n", at2-at1, pt2-pt1, calib); return calib; } /******************************************* *************NETAPI OBJECTS*************** *****************************************/ static NETAPI_CFG_T our_netapi_default_cfg= { TUNE_NETAPI_PERM_MEM_SZ, 128, //start of packet offset for hw to place data on rx for default flow TUNE_NETAPI_QM_CONFIG_MAX_DESC_NUM, //max number of descriptors in system TUNE_NETAPI_NUM_GLOBAL_DESC, //total we will use TUNE_NETAPI_DEFAULT_NUM_BUFFERS, //#descriptors+buffers in default heap 64, //#descriptors w/o buffers in default heap TUNE_NETAPI_DEFAULT_BUFFER_SIZE+128+128, //size of buffers in default heap 128, //tail room 256, //extra room 0, NULL, -1, -1 }; Pktlib_HeapHandle OurHeap; //default heap, used by producer PKTIO_CFG_T netcp_rx_cfg={PKTIO_RX, PKTIO_NA, PKTIO_NA, 8}; PKTIO_CFG_T netcp_tx_cfg={PKTIO_TX, PKTIO_NA, PKTIO_NA, 12}; PKTIO_HANDLE_T * netcp_rx_chan; PKTIO_HANDLE_T * netcp_tx_chan; NETAPI_T netapi_handle; PKTIO_CONTROL_T zap_channel_control={PKTIO_CLEAR, NULL}; PKTIO_CONTROL_T poll_cannel_control={PKTIO_SET_POLL_FLAGS, NULL, nwal_POLL_DEFAULT_GLOB_PKT_Q}; //template for fast path nwalTxPktInfo_t txPktInfoNoCrypto = { NULL, /* p_pkt */ NWAL_TX_FLAG1_META_DATA_VALID, /* txFlags */ 0, /* lpbackPass */ 0, /* enetport */ 0, /* msuSize */ 0, /* startOffset */ 0, /* saOffBytes */ 0, /* saPayLoadLen */ 0 , /* saAhIcvOffBytes */ 0, /* saAhMacSize */ 0, /* etherLenOffBytes */ netTest_MAC_HEADER_LEN, /* ipOffBytes */ netTest_MAC_HEADER_LEN +netTest_IP_HEADER_LEN, /* l4OffBytes */ netTest_UDP_HEADER_LEN, /* l4HdrLen */ 0, /* pseudoHdrChecksum */ 0 /* pLoadLen */ }; NETCP_CFG_ROUTE_T test_route= { 0, NULL, NULL, 0, 0, 0, 1 }; /*************************END NETAPI OBJECTS***********************/ static unsigned char all_mac[]={0,0,0,0,0,0}; static unsigned long last_header[32/sizeof(unsigned long)]; //stats int pkt_rx=0; int pkt_tx=0; unsigned long long pkt_rx_cycles=0L; unsigned long long pkt_tx_cycles=0L; unsigned long long pkt_cb_cycles=0L; unsigned long long idle_cycles=0L; volatile unsigned long long start_time; unsigned long long end_time; //********************************** //producer thread //********************************* void producer_thread(int coreid) { int err; int i; Ti_Pkt * tip; unsigned char * pData; int len; PKTIO_METADATA_T meta = {PKTIO_META_IFDMA_TX,{0},0}; int np; cpu_set_t cpu_set; unsigned long t1; unsigned long t2; CPU_ZERO( &cpu_set); CPU_SET( 2, &cpu_set); hplib_utilSetupThread(0, &cpu_set, hplib_spinLock_Type_LOL); start_time = hplib_mUtilGetTimestamp(); //DAL we poll the default pktio channel for pkts from net for(i=0;!((volatile int) QUIT);i++) { t1 = netapi_timing_start(); np = netapi_pktioPoll(netcp_rx_chan,NULL,&err); t2 = netapi_timing_start(); pkt_rx+=np; if (np) { pkt_rx_cycles += (t2-t1); } else { idle_cycles += (t2-t1); } if (RESET) { idle_cycles=0LL; start_time = hplib_mUtilGetTimestamp(); pkt_rx=pkt_tx=0; pkt_rx_cycles=pkt_tx_cycles=0LL; pkt_cb_cycles=0LL; RESET=0; } } end_time = hplib_mUtilGetTimestamp(); printf("nt-bench: receiver DONE %d pkts rx, pkt poll cycles=% u; pkt rx cycle=%u pkt tx cycles=%u idle cycles=%lld duration=%lld ticks idle pct= %lld\n", pkt_rx, (unsigned) (pkt_rx ? (pkt_rx_cycles - pkt_cb_cycles)/pkt_rx : 0), (unsigned) (pkt_rx ? (pkt_rx_cycles) /pkt_rx : 0), (unsigned) (pkt_tx ? (pkt_tx_cycles) /pkt_tx : 0), idle_cycles, (end_time-start_time), (idle_cycles*100) /( CALIB* (end_time-start_time)) ); printf("Leaving producer_thread\n"); } //****************************** // main program //***************************** int main(int argc, char **argv) { int err,i; int32_t errCode; Pktlib_HeapIfTable* pPktifTable; Pktlib_HeapCfg heapCfg; long t1, t2 ; cpu_set_t cpu_set; NETCP_CFG_ROUTE_T test_route; //install signal handler for ^c signal(SIGINT,netTest_utilMySig); if (initRm()) { printf("main: initRm() returned error\n"); exit(1); } CPU_ZERO( &cpu_set); CPU_SET( 0, &cpu_set); hplib_utilSetupThread(0, &cpu_set, hplib_spinLock_Type_LOL); /*******************************************/ /*************NETAPI STARTUP****************/ /*******************************************/ /* create netapi */ our_netapi_default_cfg.rmHandle = rmClientServiceHandle; netapi_handle = netapi_init(NETAPI_SYS_MASTER, &our_netapi_default_cfg); if (netapi_handle == NULL) { printf("main: netapi_init failure, exiting\n"); exit(1); } /* open the main heap */ OurHeap = Pktlib_findHeapByName("netapi"); if (!OurHeap) {printf("findheapbyname fail\n"); exit(1);} //if we want to relay network packets, we create a handle to the //default netcp receive queue here netcp_rx_chan= netapi_pktioOpen(netapi_handle, NETCP_RX, (PKTIO_CB) recv_cb_net, &netcp_rx_cfg, &err); if (!netcp_rx_chan) {printf("pktio open RX failed err=%d\n",err); exit(1);} netcp_tx_chan= netapi_pktioOpen(netapi_handle, NETCP_TX, (PKTIO_CB) NULL, &netcp_tx_cfg, &err); if (!netcp_tx_chan) {printf("pktio open TX failed err=%d\n",err); exit(1);} else //install a fast path template into the NETCP TX channel { PKTIO_CONTROL_T control2; control2.op = PKTIO_UPDATE_FAST_PATH; PKTIO_CFG_T cfg2; cfg2.fast_path_cfg.fp_send_option = PKTIO_FP_NO_CRYPTO_NO_CKSUM_PORT; cfg2.fast_path_cfg.txPktInfo= &txPktInfoNoCrypto; netapi_pktioControl(netcp_tx_chan, NULL, &cfg2, &control2, &err); } /*********************************************/ /*****************end NETAPI STARTUP**********/ /*********************************************/ /************************************************* ********************some basic benchmarks********* **************************************************/ printf("\n\n*******STARTING MEM ACCESS BENCHMARK*********\n\n"); benchmarks1(); printf("\n\n*******STARTING RAW BENCHMARK2*********\n\n"); benchmarks2(OurHeap, 20); printf("\n\n*******STARTING RAW BENCHMARK3*********\n\n"); benchmarks3(OurHeap, 20); printf("\n\n******STARTING RECV BENCHMARK (q to quit)*****\n\n"); //now creaate a simple netcp rule //to get a lot of packets memset(&test_route, 0, sizeof(NETCP_CFG_ROUTE_T)); test_route.valid_params = NETCP_CFG_VALID_PARAM_MATCH_ACTION_DEST; test_route.match_destination = NETCP_CFG_ACTION_TO_SW; test_route.routeType = 0; test_route.p_flow = NULL; test_route.p_dest_q = NULL; netapi_netcpCfgCreateMacInterface( netapi_handle, &all_mac[0], NULL, 0,0, (NETCP_CFG_ROUTE_HANDLE_T) &test_route, (NETCP_CFG_VLAN_T ) NULL , //future 0, 1, &err); //calibrate idle CALIB = calibrate_idle(); //************************************** //Create a worked thread //*************************************** { pthread_t *thrs; int procs =1; int c; thrs = malloc( sizeof( pthread_t ) * procs ); if (thrs == NULL) { perror( "malloc" ); return -1; } printf( "benchmark-test: Starting %d threads...\n", procs ); if (pthread_create( &thrs[0], NULL, (void*)producer_thread, (void *)0 )) { perror( "pthread_create" ); exit(1); } //this thread of execution (main) now just waits on user input for(;;) { printf(">"); c=getchar(); if (c=='c') {CAP=!CAP; printf("CAPTURE= %d\n", CAP); } else if (c=='q') {QUIT=1;break;} else if (c=='t') {XMIT=!XMIT; printf("XMIT= %d\n", XMIT); } else if (c=='s') { unsigned long long et= hplib_mUtilGetTimestamp(); printf(">NT_BENCH STATS: %d received %d xmitted, %lld idle cycles, %lld duration ticks idle=%lld\n", pkt_rx,pkt_tx, idle_cycles, (et-start_time), (idle_cycles*100)/(CALIB*(et-start_time))); Pktlib_HeapStats pktLibHeapStats; Pktlib_getHeapStats(OurHeap, &pktLibHeapStats); printf("main heap stats> #free=%d #zb=%d #garbage=%d\n", pktLibHeapStats.numFreeDataPackets, pktLibHeapStats.numZeroBufferPackets, pktLibHeapStats.numPacketsinGarbage); } else if (c=='r') { RESET=1; } else if (c=='h') { printf("> 'q' to quit, 's' for stats, 't' to toggle transmit 'c' to toggle capture, 'd' to dump capture, 'r' to reset idle counters, 'h' for help\n"); } else if (c=='d') { netTest_utilDumpHeader(&last_header[0], 0,0,0); } } //wait for completion printf("main task now pending on thread completion\n"); for (i = 0; i < procs; i++) pthread_join( thrs[i], NULL ); free( thrs ); } /************************************************* ************CLEAN UP**************************** ************************************************/ //get rid of rule, in the case that we are relaying packets //also close our netcp rx channel netapi_netcpCfgDelMac(netapi_handle,0,&err); netapi_pktioClose(netcp_rx_chan,&err); netapi_pktioClose(netcp_tx_chan,&err); //done netapi_shutdown(netapi_handle); //!finished! } static inline void sendit(Ti_Pkt *tip, int len, int out_port) { int err=0; PKTIO_METADATA_T meta2 = {PKTIO_META_TX,{0},0}; nwalTxPktInfo_t meta_tx2={0}; if (len<60) { unsigned int templen; char * p_pkt; len=60; Pktlib_getDataBuffer(tip,(uint8_t**)&p_pkt,&templen);//ignore templen Cppi_setData (Cppi_DescType_HOST, (Cppi_Desc *) tip, p_pkt,len); } Pktlib_setPacketLen(tip,len); meta_tx2.txFlag1 = NWAL_TX_FLAG1_META_DATA_VALID ; meta_tx2.ploadLen = len ; meta_tx2.enetPort=out_port; meta2.u.tx_meta=&meta_tx2; netapi_pktioSend(netcp_tx_chan,tip,&meta2,&err); } //receive callback for packets from net (for consumer) void recv_cb_net(struct PKTIO_HANDLE_Tag * channel, Ti_Pkt* p_recv[], PKTIO_METADATA_T meta[], int n_pkts, uint64_t ts ) { int i; Ti_Pkt * tip; int len; unsigned long t1; unsigned long t2; unsigned long t3; unsigned long t4; pasahoLongInfo_t* protoInfo; int ifno; int out_port; t1= netapi_timing_start(); for(i=0;i just output to other port! //---------------------------------------------------- if (ifno ==1) out_port=2; else out_port=1; t3= netapi_timing_start(); sendit(tip,len,out_port); pkt_tx+=1; t4= netapi_timing_start(); pkt_tx_cycles += (t4-t3); } else { Pktlib_freePacket((Ti_Pkt*)tip); } } t2 = netapi_timing_start(); pkt_cb_cycles += (t2-t1); } //SOME BENCHMARKS //sonme benchmarks void benchmarks1(void) { int i,j; unsigned long v1pop; unsigned long v2pop; unsigned long v1push; unsigned long v2push; unsigned long v1read; unsigned long v2read; unsigned long v1write; unsigned long v2write; unsigned long v1read2; unsigned long v2read2; #define N 100 Ti_Pkt pkts[N]; unsigned char * p_pkt; int len; int sum=0; int sum2=0; char *p=(char *) malloc(1000); if(p) { for(i=0;i<1000;i++) p[i] = i; //alloc v1pop=netapi_timing_start(); for(i=0;i