-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0e8ce4d
commit daf5145
Showing
6 changed files
with
212 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#include <algorithm> | ||
#include <iostream> | ||
#include <set> | ||
#include <string> | ||
#include <vector> | ||
|
||
/** | ||
* A function that compares second values for a pair of pairs | ||
* | ||
* @param pair1 First input pair. | ||
* @param pair2 Second input pair. | ||
* @return Returns true if second value of first input pair is larger | ||
* than that of the second input pair | ||
*/ | ||
template<class T> | ||
bool sortbysecond(std::pair<T,int> pairOne, std::pair<T, int> pairTwo){ | ||
return (pairOne.second > pairTwo.second); | ||
} | ||
|
||
/** | ||
* A function that returns topK values in a given data | ||
* | ||
* @param data Vector of data. | ||
* @return Returns a vector of topK repeating values in the data | ||
*/ | ||
template<class T> | ||
std::vector<T> getTopK(const std::vector<T>& data, int topK){ | ||
std::vector<T> resultVector; | ||
resultVector.clear(); | ||
|
||
// Invalid input: when data size is less than topK value | ||
if(data.size() < topK) return resultVector; | ||
|
||
// Sort Values and copy to resultVector | ||
resultVector = data; | ||
std::sort(resultVector.begin(), resultVector.end()); | ||
|
||
// Create a Vector of pairs for each individual value in input data and its total frequency | ||
resultVector.erase(std::unique(resultVector.begin(), resultVector.end()), resultVector.end()); | ||
std::vector <std::pair<T,int> > dataCounter; | ||
for( const auto& it : resultVector){ | ||
dataCounter.push_back(std::make_pair(it,0)); | ||
} | ||
|
||
// Iterate over data to count number of repetitions | ||
for( const auto& it : data) | ||
{ | ||
for (auto& dataCounterIt : dataCounter) | ||
{ | ||
if(it == dataCounterIt.first) | ||
dataCounterIt.second++; | ||
} | ||
} | ||
|
||
// Sort the pair vector in by number of repetitions (TopK) | ||
std::sort(dataCounter.begin(), dataCounter.end(), sortbysecond<T>); | ||
|
||
// Store topK values in resultVector and return | ||
resultVector.clear(); | ||
int topCounter = 0; | ||
for (const auto& dataCounterIt : dataCounter) | ||
{ | ||
resultVector.push_back(dataCounterIt.first); | ||
topCounter++; | ||
if(topCounter > topK) break; | ||
} | ||
|
||
return resultVector; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#include "src/topK.cpp" | ||
#include <gtest/gtest.h> | ||
|
||
|
||
TEST (SimpleIntVector, NullInputTest){ | ||
std::vector<int> data{}; | ||
int topK = 3; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector.size(), 0); | ||
} | ||
|
||
TEST (SimpleIntVector, KLargerThanSizeOfData){ | ||
std::vector<int> data{1, 2, 3, 7, 3, 3, 2, 2, 3, 1}; | ||
int topK = 11; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector.size(), 0); | ||
} | ||
|
||
TEST (SimpleIntVector, Sample1){ | ||
std::vector<int> data{1, 2, 3, 7, 3, 3, 2, 2, 3, 1}; | ||
int topK = 3; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector[0], 3); | ||
ASSERT_EQ(resultVector[1], 2); | ||
ASSERT_EQ(resultVector[2], 1); | ||
} | ||
|
||
TEST (SimpleIntVector, Sample2){ | ||
std::vector<int> data{1, 2, 3, 7, 7, 7, 7, 7, 3, 3, 2, 2, 3, 1}; | ||
int topK = 7; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector[0], 7); | ||
ASSERT_EQ(resultVector[1], 3); | ||
ASSERT_EQ(resultVector[2], 2); | ||
ASSERT_EQ(resultVector[3], 1); | ||
} | ||
|
||
TEST (SimpleIntVector, SampleCharacters){ | ||
std::vector<char> data{'p','a','n','o','r','a','m','a','a','n','d','m','e','m','o','r','a','n','d','u','m'}; | ||
int topK = 2; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector[0], 'a'); | ||
ASSERT_EQ(resultVector[1], 'm'); | ||
} | ||
|
||
// Expect ascending order for equal frequency | ||
TEST (SimpleIntVector, SampleNonRepeatingCharacters){ | ||
std::vector<char> data{'c','d','e','l','m','o','x','y','z','a','b'}; | ||
int topK = 2; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector[0], 'a'); | ||
ASSERT_EQ(resultVector[1], 'b'); | ||
} | ||
|
||
|
||
TEST (SimpleIntVector, SampleStrings){ | ||
std::vector<std::string> data{"There", "are", "many", "many", "apples", "in", "the", "tree", | ||
"but", "the", "apples", "are", "really", "really", "small", | ||
"than", "the", "apples", "from", "last", "year"}; | ||
int topK = 2; | ||
auto resultVector = getTopK(data, topK); | ||
ASSERT_EQ(resultVector[0], "apples"); | ||
ASSERT_EQ(resultVector[1], "the"); | ||
} | ||
|
||
int main(int argc, char** argv){ | ||
testing::InitGoogleTest(&argc, argv); | ||
return RUN_ALL_TESTS(); | ||
} |