Data Structures and Algorithms/Sorting Data/Bucket Sort in C
Appearance
(Redirected from BucketsortInC)
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * This is a demo program to show how good BucketSort can be. * * * * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * * Copyright (C) 2010 by Ingenieurbüro Henning, Kiel, Germany * * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published * * by the Free Software Foundation. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include <stdio.h> #define MAXKEYS 100000 // max. number of data records #define MAXWLNG 25 // max. length of the words in the data record #define MAXBUCKETS 20 // max. number of buckets struct FiAd // register of buckets with the star-points in linked lists { int StartIndex;// element in linked list where the bucket starts int LastIndex; // last element of the bucket (to find it fast) int NumInFifo; // number of references in the fifo }; struct FiFo // structure of the linked lists used as bucket { int PosIndex; // where in the table can this element be found int Next; // pointer to next element in this bucket }; struct DataRec // the structure of the test data { char TheChar; short TheShint; int TheInt; float TheFloat; double TheDouble; int ThePos; int TheLength; char TheKey[MAXWLNG + 1]; }; // local subroutines void BuckSortInt (struct DataRec* Source, struct FiFo* InFif, int Start, int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr); void WriteText (char * TextName); struct DataRec DataRecs [MAXKEYS]; // the input list (unsorted) struct DataRec SortedRecs [MAXKEYS]; // the sorted output list int NumRecs; // number of data records // the input and output files; you have to adapt them to your needs char In_ListN_1 [40] = "/home/user/EinList.bin"; char OutTextT_2 [40] = "/home/user/BucketSort.txt"; int main(int argc, char *argv[]) { struct FiFo Positions [MAXKEYS]; // the sequence of records int NewPosis [MAXKEYS]; // where it is now int RecLng = sizeof(struct DataRec);// size of one data record int PosNr = 0; // position to store result int I, LastInt, NoBuckets; FILE *fp; NumRecs = 100000; // number of records to sort NoBuckets = 7; // number of recursion levels // we get the input data which we want to sort fp = fopen(In_ListN_1, "rb"); if(fp != NULL) { fread(DataRecs, NumRecs * sizeof(struct DataRec), 1, fp); fclose(fp); } else { printf("something is wrong with the file\n"); return; }; // we set up a linked list with references to all records for(I = 0; I < NumRecs; I++) // filling linked list { Positions[I].PosIndex = I; // record x is at position x Positions[I].Next = I + 1; // pointer to the next element }; Positions[I - 1].Next = -1; // end of the linked list // we indirectly sort the data records; correct sequence will be in NumRecs BuckSortInt(DataRecs, Positions, 0, NumRecs, NoBuckets, NewPosis, &PosNr); // we copy the records in correct order to the array SortedRecs for(I = 0; I < NumRecs; I++) { memmove(&SortedRecs[I], &DataRecs[NewPosis[I]], RecLng); }; // and we check for correct sorting LastInt = 0x80000000; for(I = 0; I < NumRecs; I++) { if(SortedRecs[I].TheInt < LastInt) { printf("sorting not correct\n"); return; }; LastInt = SortedRecs[I].TheInt; }; printf("sorting correct\n"); WriteText(OutTextT_2); } void BuckSortInt(struct DataRec* Source, struct FiFo* InFif, int Start, int NumLines, int NumBuckets, int* NewPosis, int* NewPosNr) { int I, K, Index, LastLast, FirstFree, Next; int StepSize, MinVal, MaxVal, ActVal; long long Interval; int BuckBorder[MAXBUCKETS]; // border values between buckets struct FiAd* FifoAdmin; // the fifo administration struct FiFo* Fifo; // the left fifo if(NumBuckets > MAXBUCKETS) return; // we reserve space for the chained lists, depending on length of table FifoAdmin = (struct FiAd*) malloc(NumBuckets * sizeof(struct FiAd)); Fifo = (struct FiFo*) malloc(NumLines * sizeof(struct FiFo)); for(K = 0; K < NumBuckets; K++) { FifoAdmin[K].StartIndex = FifoAdmin[K].LastIndex = -1; FifoAdmin[K].NumInFifo = 0; }; FirstFree = 0; // first free element // now we find the smallest and biggest element for the actual bucket MinVal = 0x7FFFFFFF; // min. value to biggest value MaxVal = 0x80000000; // max. value to smallest value Next = Start; // this is the next element while(Next >= 0) // while bucket is not empty ... { Index = InFif[Next].PosIndex; // next record of this bucket ActVal = Source[Index].TheInt; // the key value of the record if(ActVal < MinVal) MinVal = ActVal; // key smaller => swap value if(ActVal > MaxVal) MaxVal = ActVal; // key bigger => swap value Next = InFif[Next].Next; // ponter to next element }; // only identical key values => the record numbers go directly to the result if(MinVal == MaxVal) // all keys identical ? { Next = Start; // this is the next element while(Next >= 0) // while bucket is not empty ... { NewPosis[NewPosNr[0]] = InFif[Next].PosIndex;// final position of record NewPosNr[0]++; // we point to free element Next = InFif[Next].Next; // next element of linked list }; }; // now we make the border-array ready Interval = (long long) MaxVal - (long long) MinVal; // we get the domain StepSize = Interval / NumBuckets; // makes chunks of this size for(I = NumBuckets - 1; I > 0; I--) // loop over the buckets { BuckBorder[I] = MaxVal - StepSize; // lower border of the bucket MaxVal = MaxVal - StepSize; // lower border for next bucket }; BuckBorder[I] = MinVal; // this way is save //now we distribute the records of the input bucket to the new sub-buckets Next = Start; // this is the next element while(Next >= 0) // while bucket is not empty ... { Index = InFif[Next].PosIndex; // next record of this bucket ActVal = Source[Index].TheInt; // key value of the record for(I = NumBuckets - 1; I >= 0; I--) { if(ActVal >= BuckBorder[I]) // pointing to corrrect bucket? { if(FifoAdmin[I].StartIndex < 0) // already entries? { // this bucket is used the first time; we start the new linked list FifoAdmin[I].StartIndex = FirstFree; // start and end FifoAdmin[I].LastIndex = FirstFree; // are identical FifoAdmin[I].NumInFifo = 1; // one element in Fifo Fifo[FirstFree].PosIndex = Index; // number of record to list Fifo[FirstFree].Next = -1; // no further elements in list FirstFree++; } else { // this bucket was already used; we add the reference LastLast = FifoAdmin[I].LastIndex; // last entry FifoAdmin[I].NumInFifo++; // one element more in fifo FifoAdmin[I].LastIndex = FirstFree; // new last element Fifo[LastLast].Next = FirstFree; // linking ex-last element Fifo[FirstFree].PosIndex = Index; // the record number Fifo[FirstFree].Next = -1; // the linked list ends here FirstFree++; }; break; // record distributed }; }; Next = InFif[Next].Next; // next record of actual bucket }; // we make the recursion and call this function for any subbucket for(I = 0; I < NumBuckets; I++) { if(FifoAdmin[I].NumInFifo > 1) BuckSortInt(Source, Fifo, FifoAdmin[I].StartIndex, FifoAdmin[I].NumInFifo, NumBuckets, NewPosis, NewPosNr); else if(FifoAdmin[I].NumInFifo == 1) { NewPosis[NewPosNr[0]] = Fifo[FifoAdmin[I].StartIndex].PosIndex; NewPosNr[0]++; } }; // we must free the memory we got from the operating system free(Fifo); free(FifoAdmin); }; void WriteText(char * TextName) { FILE *fp; int I; char TChar; short TShint; int TInt; float TFloat; double TDouble; int TPos; int TLength; fp = fopen(TextName, "wb"); if(fp != NULL) { for(I = 0; I < NumRecs; I++) { TChar = SortedRecs[I].TheChar; TShint = SortedRecs[I].TheShint; TInt = SortedRecs[I].TheInt; TFloat = SortedRecs[I].TheFloat; TDouble = SortedRecs[I].TheDouble; TPos = SortedRecs[I].ThePos; TLength = SortedRecs[I].TheLength; fprintf(fp, "%6d %6d %12d %14e %14e %6d %4d %s\n", TChar, TShint, TInt, TFloat, TDouble, TPos, TLength, SortedRecs[I].TheKey); }; fclose(fp); } else { printf("Ausgabedatei %s kann nicht angelegt werden\n", TextName); return; }; };