Alamo
Hash.H
Go to the documentation of this file.
1 #ifndef UTIL_HASH_HASH_H
2 #define UTIL_HASH_HASH_H
3 
4 #include <iostream>
5 #include <fstream>
6 #include <regex>
7 #include <filesystem>
8 #include <iomanip>
9 #include <sstream>
10 #include <vector>
11 #include <array>
12 #include <algorithm>
13 
14 namespace Util
15 {
16 namespace Hash
17 {
18 
19 // Helper function to check if a string ends with a specific suffix
20 bool endsWith(const std::string& str, const std::string& suffix) {
21  if (str.size() < suffix.size()) return false;
22  return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
23 }
24 
25 // Function to remove comments and extra whitespace from a file
26 std::string preprocessSourceFile(const std::string& filePath) {
27  std::ifstream file(filePath);
28  if (!file) {
29  std::cerr << "Cannot open file: " << filePath << std::endl;
30  return "";
31  }
32 
33  std::ostringstream preprocessed;
34  std::string line;
35  bool inBlockComment = false;
36 
37  while (std::getline(file, line)) {
38  // Remove block comments
39  if (inBlockComment) {
40  size_t endBlock = line.find("*/");
41  if (endBlock != std::string::npos) {
42  line = line.substr(endBlock + 2);
43  inBlockComment = false;
44  } else {
45  continue; // Skip lines inside block comments
46  }
47  }
48 
49  // Remove line comments
50  size_t lineComment = line.find("//");
51  if (lineComment != std::string::npos) {
52  line = line.substr(0, lineComment);
53  }
54 
55  // Remove block comments that start on the same line
56  size_t startBlock = line.find("/*");
57  if (startBlock != std::string::npos) {
58  size_t endBlock = line.find("*/", startBlock + 2);
59  if (endBlock != std::string::npos) {
60  line.erase(startBlock, endBlock - startBlock + 2);
61  } else {
62  line.erase(startBlock);
63  inBlockComment = true;
64  }
65  }
66 
67  // Trim extra spaces (optional)
68  std::regex extraSpace("\\s+");
69  line = std::regex_replace(line, extraSpace, " ");
70 
71  // Append the cleaned line if it’s not empty
72  if (!line.empty()) {
73  preprocessed << line << "\n";
74  }
75  }
76 
77  return preprocessed.str();
78 }
79 
80 // Native implementation of SHA-256
81 class SHA256 {
82 public:
83  SHA256() { reset(); }
84 
85  void update(const uint8_t* data, size_t length) {
86  for (size_t i = 0; i < length; ++i) {
87  data_[datalen_++] = data[i];
88  if (datalen_ == 64) {
89  transform();
90  bitlen_ += 512;
91  datalen_ = 0;
92  }
93  }
94  }
95 
96  void update(const std::string& data) {
97  update(reinterpret_cast<const uint8_t*>(data.c_str()), data.size());
98  }
99 
100  std::string final() {
101  uint8_t hash[32];
102  pad();
103  transform();
104  toBytes(hash, state_, 8);
105 
106  std::ostringstream result;
107  for (int i = 0; i < 32; ++i) {
108  result << std::hex << std::setw(2) << std::setfill('0') << (int)hash[i];
109  }
110 
111  return result.str();
112  }
113 
114  void reset() {
115  datalen_ = 0;
116  bitlen_ = 0;
117  state_ = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
118  0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
119  }
120 
121 private:
122  std::array<uint32_t, 8> state_;
123  uint8_t data_[64] = {0};
124  uint32_t datalen_ = 0;
125  uint64_t bitlen_ = 0;
126 
127  const std::array<uint32_t, 64> k_ = {
128  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b,
129  0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01,
130  0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7,
131  0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
132  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152,
133  0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
134  0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
135  0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
136  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819,
137  0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08,
138  0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f,
139  0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
140  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
141  };
142 
143  static void toBytes(uint8_t* output, const std::array<uint32_t, 8>& input, size_t size) {
144  for (size_t i = 0; i < size; ++i) {
145  output[i * 4 + 0] = (input[i] >> 24) & 0xff;
146  output[i * 4 + 1] = (input[i] >> 16) & 0xff;
147  output[i * 4 + 2] = (input[i] >> 8) & 0xff;
148  output[i * 4 + 3] = (input[i] >> 0) & 0xff;
149  }
150  }
151 
152  static uint32_t rotr(uint32_t x, uint32_t n) {
153  return (x >> n) | (x << (32 - n));
154  }
155 
156  static uint32_t choose(uint32_t e, uint32_t f, uint32_t g) {
157  return (e & f) ^ (~e & g);
158  }
159 
160  static uint32_t majority(uint32_t a, uint32_t b, uint32_t c) {
161  return (a & b) ^ (a & c) ^ (b & c);
162  }
163 
164  static uint32_t sig0(uint32_t x) {
165  return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3);
166  }
167 
168  static uint32_t sig1(uint32_t x) {
169  return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10);
170  }
171 
172  void transform() {
173  std::array<uint32_t, 64> m;
174  std::array<uint32_t, 8> v = state_;
175 
176  for (size_t i = 0, j = 0; i < 16; ++i, j += 4) {
177  m[i] = (data_[j] << 24) | (data_[j + 1] << 16) | (data_[j + 2] << 8) | data_[j + 3];
178  }
179 
180  for (size_t i = 16; i < 64; ++i) {
181  m[i] = sig1(m[i - 2]) + m[i - 7] + sig0(m[i - 15]) + m[i - 16];
182  }
183 
184  for (size_t i = 0; i < 64; ++i) {
185  uint32_t temp1 = v[7] + choose(v[4], v[5], v[6]) + rotr(v[4], 6) + k_[i] + m[i];
186  uint32_t temp2 = majority(v[0], v[1], v[2]) + rotr(v[0], 2);
187  v[7] = v[6];
188  v[6] = v[5];
189  v[5] = v[4];
190  v[4] = v[3] + temp1;
191  v[3] = v[2];
192  v[2] = v[1];
193  v[1] = v[0];
194  v[0] = temp1 + temp2;
195  }
196 
197  for (size_t i = 0; i < 8; ++i) {
198  state_[i] += v[i];
199  }
200  }
201 
202  void pad() {
203  size_t i = datalen_;
204 
205  if (datalen_ < 56) {
206  data_[i++] = 0x80;
207  while (i < 56) {
208  data_[i++] = 0x00;
209  }
210  } else {
211  data_[i++] = 0x80;
212  while (i < 64) {
213  data_[i++] = 0x00;
214  }
215  transform();
216  std::fill(std::begin(data_), std::end(data_), 0);
217  }
218 
219  bitlen_ += datalen_ * 8;
220  data_[63] = bitlen_;
221  data_[62] = bitlen_ >> 8;
222  data_[61] = bitlen_ >> 16;
223  data_[60] = bitlen_ >> 24;
224  data_[59] = bitlen_ >> 32;
225  data_[58] = bitlen_ >> 40;
226  data_[57] = bitlen_ >> 48;
227  data_[56] = bitlen_ >> 56;
228  }
229 };
230 
231 // Function to process and hash a single file
232 std::string processFile(const std::string& filePath) {
233  std::string preprocessed = preprocessSourceFile(filePath);
234  if (preprocessed.empty()) {
235  return "";
236  }
237 
238  SHA256 sha256;
239  sha256.update(preprocessed);
240  return sha256.final();
241 }
242 
243 // Recursive function to scan all files in a directory, process .cpp and .H files, and sort them
244 void processDirectory(const std::string& directory, std::vector<std::string>& sortedFiles) {
245  for (const auto& entry : std::filesystem::recursive_directory_iterator(directory)) {
246  if (entry.is_regular_file()) {
247  std::string filePath = entry.path().string();
248  if (endsWith(filePath, ".cpp") || endsWith(filePath, ".H")) {
249  sortedFiles.push_back(filePath);
250  }
251  }
252  }
253 
254  // Sort files in lexicographical order
255  std::sort(sortedFiles.begin(), sortedFiles.end());
256 }
257 
258 int getFinalHash(std::string srcDirectory) {
259  std::vector<std::string> sortedFiles;
260 
261  std::cout << "Scanning directory: " << srcDirectory << std::endl;
262  processDirectory(srcDirectory, sortedFiles);
263 
264  // Process and hash all files together
265  SHA256 sha256;
266  for (const auto& filePath : sortedFiles) {
267  std::cout << "Processing file: " << filePath << std::endl;
268  std::string fileHash = processFile(filePath);
269  sha256.update(fileHash);
270  }
271 
272  std::cout << "\nFinal Hash (SHA256 of all preprocessed content): " << sha256.final() << std::endl;
273 
274  return 0;
275 }
276 
277 
278 
279 }
280 }
281 
282 #endif
Util::Hash::SHA256::k_
const std::array< uint32_t, 64 > k_
Definition: Hash.H:127
Util::Hash::SHA256::bitlen_
uint64_t bitlen_
Definition: Hash.H:125
Util::Hash::SHA256::transform
void transform()
Definition: Hash.H:172
Util::Hash::SHA256::SHA256
SHA256()
Definition: Hash.H:83
Util::Hash::SHA256::update
void update(const uint8_t *data, size_t length)
Definition: Hash.H:85
Util::Hash::SHA256::pad
void pad()
Definition: Hash.H:202
Util::Hash::SHA256::choose
static uint32_t choose(uint32_t e, uint32_t f, uint32_t g)
Definition: Hash.H:156
Util::Hash::SHA256
Definition: Hash.H:81
Util
A collection of utility routines.
Definition: Set.cpp:7
Util::Hash::SHA256::final
std::string final()
Definition: Hash.H:100
Util::Hash::SHA256::majority
static uint32_t majority(uint32_t a, uint32_t b, uint32_t c)
Definition: Hash.H:160
Util::Hash::processDirectory
void processDirectory(const std::string &directory, std::vector< std::string > &sortedFiles)
Definition: Hash.H:244
Util::Hash::SHA256::sig0
static uint32_t sig0(uint32_t x)
Definition: Hash.H:164
Util::Hash::SHA256::data_
uint8_t data_[64]
Definition: Hash.H:123
Util::Hash::processFile
std::string processFile(const std::string &filePath)
Definition: Hash.H:232
Util::Hash::SHA256::state_
std::array< uint32_t, 8 > state_
Definition: Hash.H:122
Util::Hash::SHA256::sig1
static uint32_t sig1(uint32_t x)
Definition: Hash.H:168
Util::Hash::preprocessSourceFile
std::string preprocessSourceFile(const std::string &filePath)
Definition: Hash.H:26
Util::Hash::SHA256::rotr
static uint32_t rotr(uint32_t x, uint32_t n)
Definition: Hash.H:152
IO::hash
unsigned long hash
Definition: WriteMetaData.cpp:12
Util::Hash::SHA256::update
void update(const std::string &data)
Definition: Hash.H:96
Util::Hash::SHA256::toBytes
static void toBytes(uint8_t *output, const std::array< uint32_t, 8 > &input, size_t size)
Definition: Hash.H:143
Util::Hash::SHA256::reset
void reset()
Definition: Hash.H:114
Util::Hash::endsWith
bool endsWith(const std::string &str, const std::string &suffix)
Definition: Hash.H:20
Util::Hash::getFinalHash
int getFinalHash(std::string srcDirectory)
Definition: Hash.H:258
Util::Hash::SHA256::datalen_
uint32_t datalen_
Definition: Hash.H:124