// This is adapted from a benchmark written by John Ellis and Pete Kovac // of Post Communications. // It was modified by Hans Boehm of Silicon Graphics. // Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ. // Translated to C 15 March 2000 by Hans Boehm, now at HP Labs. // // This is no substitute for real applications. No actual application // is likely to behave in exactly this way. However, this benchmark was // designed to be more representative of real applications than other // Java GC benchmarks of which we are aware. // It attempts to model those properties of allocation requests that // are important to current GC techniques. // It is designed to be used either to obtain a single overall performance // number, or to give a more detailed estimate of how collector // performance varies with object lifetimes. It prints the time // required to allocate and collect balanced binary trees of various // sizes. Smaller trees result in shorter object lifetimes. Each cycle // allocates roughly the same amount of memory. // Two data structures are kept around during the entire process, so // that the measured performance is representative of applications // that maintain some live in-memory data. One of these is a tree // containing many pointers. The other is a large array containing // double precision floating point numbers. Both should be of comparable // size. // // The results are only really meaningful together with a specification // of how much memory was used. It is possible to trade memory for // better time performance. This benchmark should be run in a 32 MB // heap, though we don't currently know how to enforce that uniformly. // // Unlike the original Ellis and Kovac benchmark, we do not attempt // measure pause times. This facility should eventually be added back // in. There are several reasons for omitting it for now. The original // implementation depended on assumptions about the thread scheduler // that don't hold uniformly. The results really measure both the // scheduler and GC. Pause time measurements tend to not fit well with // current benchmark suites. As far as we know, none of the current // commercial Java implementations seriously attempt to minimize GC pause // times. #include #include #include #include "rl.h" #ifdef GC # include "gc.h" #endif #ifdef PROFIL extern void init_profiling(); extern dump_profile(); #endif // These macros were a quick hack for the Macintosh. // // #define currentTime() clock() // #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC) #define currentTime() stats_rtclock() #define elapsedTime(x) (x) /* Get the current time in milliseconds */ unsigned stats_rtclock( void ) { struct timeval t; struct timezone tz; if (gettimeofday( &t, &tz ) == -1) return 0; return (t.tv_sec * 1000 + t.tv_usec / 1000); } static const int kStretchTreeDepth = 18; // about 16Mb static const int kLongLivedTreeDepth = 16; // about 4Mb static const int kArraySize = 500000; // about 4Mb static const int kMinTreeDepth = 4; static const int kMaxTreeDepth = 16; typedef struct Node0_struct { Rl left; Rl right; int i, j; } Node0; typedef Rl* Node; #define TO_NODE(rl) ((Node0*) (rl)->ref) #ifdef DEBUG int allocated = 0; int freed = 0; #endif void init_Node(Node me, Node l, Node r) { Node0* node = TO_NODE(me); rl_set(&node->left, me->ref, l); rl_set(&node->right, me->ref, r); } void destroy_Node(void* me) { #ifdef DEBUG freed++; #endif Node0* node = (Node0*)me; if(node->left.ref) rl_free(&node->left); if(node->right.ref) rl_free(&node->right); } // Nodes used by a tree of a given size static int TreeSize(int i) { return ((1 << (i + 1)) - 1); } // Number of iterations to use for a given tree depth static int NumIters(int i) { return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i); } // Build tree top down, assigning to older objects. static void Populate(int iDepth, Node thisNode) { Node0* node = TO_NODE(thisNode); if (iDepth<=0) { node->left.ref = NULL; node->right.ref = NULL; return; } else { iDepth--; rl_alloc(&node->left, node, sizeof(Node0), destroy_Node); rl_alloc(&node->right, node, sizeof(Node0), destroy_Node); #ifdef DEBUG allocated+=2; #endif Populate (iDepth, &node->left); Populate (iDepth, &node->right); } } // Build tree bottom-up static void MakeTree(Node result, int iDepth) { if (iDepth<=0) { rl_alloc(result, NULL, sizeof(Node0), destroy_Node); #ifdef DEBUG allocated++; #endif Node0* node = TO_NODE(result); node->left.ref = NULL; node->right.ref = NULL; /* result is implicitly initialized in both cases. */ } else { Rl left, right; MakeTree(&left, iDepth-1); MakeTree(&right, iDepth-1); rl_alloc(result, NULL, sizeof(Node0), destroy_Node); #ifdef DEBUG allocated++; #endif init_Node(result, &left, &right); rl_free(&left); rl_free(&right); } } static void PrintDiagnostics() { #if 0 long lFreeMemory = Runtime.getRuntime().freeMemory(); long lTotalMemory = Runtime.getRuntime().totalMemory(); System.out.print(" Total memory available=" + lTotalMemory + " bytes"); System.out.println(" Free memory=" + lFreeMemory + " bytes"); #endif } static void TimeConstruction(int depth) { long tStart, tFinish; int iNumIters = NumIters(depth); Rl tempTree; int i; printf("Creating %d trees of depth %d\n", iNumIters, depth); tStart = currentTime(); for (i = 0; i < iNumIters; ++i) { rl_alloc(&tempTree, NULL, sizeof(Node0), destroy_Node); Populate(depth, &tempTree); rl_free(&tempTree); } tFinish = currentTime(); printf("\tTop down construction took %d msec\n", elapsedTime(tFinish - tStart)); tStart = currentTime(); for (i = 0; i < iNumIters; ++i) { MakeTree(&tempTree, depth); rl_free(&tempTree); } tFinish = currentTime(); printf("\tBottom up construction took %d msec\n", elapsedTime(tFinish - tStart)); } int main() { Node root; Rl longLivedTree; Rl tempTree; long tStart, tFinish; long tElapsed; int i, d; double *array; printf("Garbage Collector Test\n"); printf(" Live storage will peak at %d bytes.\n\n", 2 * sizeof(Node0) * TreeSize(kLongLivedTreeDepth) + sizeof(double) * kArraySize); printf(" Stretching memory with a binary tree of depth %d\n", kStretchTreeDepth); PrintDiagnostics(); # ifdef PROFIL init_profiling(); # endif tStart = currentTime(); // Stretch the memory space quickly MakeTree(&tempTree, kStretchTreeDepth); #ifdef DEBUG printf("Made tree %i/%i\n", freed, allocated); #endif rl_free(&tempTree); #ifdef DEBUG printf("Deleted tree %i/%i\n", freed, allocated); #endif // Create a long lived object printf(" Creating a long-lived binary tree of depth %d\n", kLongLivedTreeDepth); rl_alloc(&longLivedTree, NULL, sizeof(Node0), destroy_Node); Populate(kLongLivedTreeDepth, &longLivedTree); #ifdef DEBUG printf("Populated tree %i/%i\n", freed, allocated); #endif // Create long-lived array, filling half of it printf(" Creating a long-lived array of %d doubles\n", kArraySize); array = malloc(kArraySize * sizeof(double)); for (i = 0; i < kArraySize/2; ++i) { array[i] = 1.0/i; } PrintDiagnostics(); for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) { TimeConstruction(d); } #ifdef DEBUG printf("Smol trees %i/%i\n", freed, allocated); #endif if (longLivedTree.ref == 0 || array[1000] != 1.0/1000) fprintf(stderr, "Failed\n"); // fake reference to LongLivedTree // and array // to keep them from being optimized away tFinish = currentTime(); tElapsed = elapsedTime(tFinish-tStart); PrintDiagnostics(); printf("Completed in %d msec\n", tElapsed); # ifdef PROFIL dump_profile(); # endif }