Alamo
Hash.H
Go to the documentation of this file.
1#ifndef UTIL_HASH_HASH_H
2#define UTIL_HASH_HASH_H
3
4#include <iostream>
5#include <fstream>
6#include <regex>
7#include <filesystem>
8#include <iomanip>
9#include <sstream>
10#include <vector>
11#include <array>
12#include <algorithm>
13
14namespace Util
15{
16namespace Hash
17{
18
19// Helper function to check if a string ends with a specific suffix
20bool endsWith(const std::string& str, const std::string& suffix) {
21 if (str.size() < suffix.size()) return false;
22 return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
23}
24
25// Function to remove comments and extra whitespace from a file
26std::string preprocessSourceFile(const std::string& filePath) {
27 std::ifstream file(filePath);
28 if (!file) {
29 std::cerr << "Cannot open file: " << filePath << std::endl;
30 return "";
31 }
32
33 std::ostringstream preprocessed;
34 std::string line;
35 bool inBlockComment = false;
36
37 while (std::getline(file, line)) {
38 // Remove block comments
39 if (inBlockComment) {
40 size_t endBlock = line.find("*/");
41 if (endBlock != std::string::npos) {
42 line = line.substr(endBlock + 2);
43 inBlockComment = false;
44 } else {
45 continue; // Skip lines inside block comments
46 }
47 }
48
49 // Remove line comments
50 size_t lineComment = line.find("//");
51 if (lineComment != std::string::npos) {
52 line = line.substr(0, lineComment);
53 }
54
55 // Remove block comments that start on the same line
56 size_t startBlock = line.find("/*");
57 if (startBlock != std::string::npos) {
58 size_t endBlock = line.find("*/", startBlock + 2);
59 if (endBlock != std::string::npos) {
60 line.erase(startBlock, endBlock - startBlock + 2);
61 } else {
62 line.erase(startBlock);
63 inBlockComment = true;
64 }
65 }
66
67 // Trim extra spaces (optional)
68 std::regex extraSpace("\\s+");
69 line = std::regex_replace(line, extraSpace, " ");
70
71 // Append the cleaned line if it’s not empty
72 if (!line.empty()) {
73 preprocessed << line << "\n";
74 }
75 }
76
77 return preprocessed.str();
78}
79
80// Native implementation of SHA-256
81class SHA256 {
82public:
83 SHA256() { reset(); }
84
85 void update(const uint8_t* data, size_t length) {
86 for (size_t i = 0; i < length; ++i) {
87 data_[datalen_++] = data[i];
88 if (datalen_ == 64) {
89 transform();
90 bitlen_ += 512;
91 datalen_ = 0;
92 }
93 }
94 }
95
96 void update(const std::string& data) {
97 update(reinterpret_cast<const uint8_t*>(data.c_str()), data.size());
98 }
99
100 std::string final() {
101 uint8_t hash[32];
102 pad();
103 transform();
104 toBytes(hash, state_, 8);
105
106 std::ostringstream result;
107 for (int i = 0; i < 32; ++i) {
108 result << std::hex << std::setw(2) << std::setfill('0') << (int)hash[i];
109 }
110
111 return result.str();
112 }
113
114 void reset() {
115 datalen_ = 0;
116 bitlen_ = 0;
117 state_ = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
118 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
119 }
120
121private:
122 std::array<uint32_t, 8> state_;
123 uint8_t data_[64] = {0};
124 uint32_t datalen_ = 0;
125 uint64_t bitlen_ = 0;
126
127 const std::array<uint32_t, 64> k_ = {
128 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b,
129 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01,
130 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7,
131 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
132 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152,
133 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
134 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
135 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
136 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819,
137 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08,
138 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f,
139 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
140 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
141 };
142
143 static void toBytes(uint8_t* output, const std::array<uint32_t, 8>& input, size_t size) {
144 for (size_t i = 0; i < size; ++i) {
145 output[i * 4 + 0] = (input[i] >> 24) & 0xff;
146 output[i * 4 + 1] = (input[i] >> 16) & 0xff;
147 output[i * 4 + 2] = (input[i] >> 8) & 0xff;
148 output[i * 4 + 3] = (input[i] >> 0) & 0xff;
149 }
150 }
151
152 static uint32_t rotr(uint32_t x, uint32_t n) {
153 return (x >> n) | (x << (32 - n));
154 }
155
156 static uint32_t choose(uint32_t e, uint32_t f, uint32_t g) {
157 return (e & f) ^ (~e & g);
158 }
159
160 static uint32_t majority(uint32_t a, uint32_t b, uint32_t c) {
161 return (a & b) ^ (a & c) ^ (b & c);
162 }
163
164 static uint32_t sig0(uint32_t x) {
165 return rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3);
166 }
167
168 static uint32_t sig1(uint32_t x) {
169 return rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10);
170 }
171
172 void transform() {
173 std::array<uint32_t, 64> m;
174 std::array<uint32_t, 8> v = state_;
175
176 for (size_t i = 0, j = 0; i < 16; ++i, j += 4) {
177 m[i] = (data_[j] << 24) | (data_[j + 1] << 16) | (data_[j + 2] << 8) | data_[j + 3];
178 }
179
180 for (size_t i = 16; i < 64; ++i) {
181 m[i] = sig1(m[i - 2]) + m[i - 7] + sig0(m[i - 15]) + m[i - 16];
182 }
183
184 for (size_t i = 0; i < 64; ++i) {
185 uint32_t temp1 = v[7] + choose(v[4], v[5], v[6]) + rotr(v[4], 6) + k_[i] + m[i];
186 uint32_t temp2 = majority(v[0], v[1], v[2]) + rotr(v[0], 2);
187 v[7] = v[6];
188 v[6] = v[5];
189 v[5] = v[4];
190 v[4] = v[3] + temp1;
191 v[3] = v[2];
192 v[2] = v[1];
193 v[1] = v[0];
194 v[0] = temp1 + temp2;
195 }
196
197 for (size_t i = 0; i < 8; ++i) {
198 state_[i] += v[i];
199 }
200 }
201
202 void pad() {
203 size_t i = datalen_;
204
205 if (datalen_ < 56) {
206 data_[i++] = 0x80;
207 while (i < 56) {
208 data_[i++] = 0x00;
209 }
210 } else {
211 data_[i++] = 0x80;
212 while (i < 64) {
213 data_[i++] = 0x00;
214 }
215 transform();
216 std::fill(std::begin(data_), std::end(data_), 0);
217 }
218
219 bitlen_ += datalen_ * 8;
220 data_[63] = bitlen_;
221 data_[62] = bitlen_ >> 8;
222 data_[61] = bitlen_ >> 16;
223 data_[60] = bitlen_ >> 24;
224 data_[59] = bitlen_ >> 32;
225 data_[58] = bitlen_ >> 40;
226 data_[57] = bitlen_ >> 48;
227 data_[56] = bitlen_ >> 56;
228 }
229};
230
231// Function to process and hash a single file
232std::string processFile(const std::string& filePath) {
233 std::string preprocessed = preprocessSourceFile(filePath);
234 if (preprocessed.empty()) {
235 return "";
236 }
237
238 SHA256 sha256;
239 sha256.update(preprocessed);
240 return sha256.final();
241}
242
243// Recursive function to scan all files in a directory, process .cpp and .H files, and sort them
244void processDirectory(const std::string& directory, std::vector<std::string>& sortedFiles) {
245 for (const auto& entry : std::filesystem::recursive_directory_iterator(directory)) {
246 if (entry.is_regular_file()) {
247 std::string filePath = entry.path().string();
248 if (endsWith(filePath, ".cpp") || endsWith(filePath, ".H")) {
249 sortedFiles.push_back(filePath);
250 }
251 }
252 }
253
254 // Sort files in lexicographical order
255 std::sort(sortedFiles.begin(), sortedFiles.end());
256}
257
258int getFinalHash(std::string srcDirectory) {
259 std::vector<std::string> sortedFiles;
260
261 std::cout << "Scanning directory: " << srcDirectory << std::endl;
262 processDirectory(srcDirectory, sortedFiles);
263
264 // Process and hash all files together
265 SHA256 sha256;
266 for (const auto& filePath : sortedFiles) {
267 std::cout << "Processing file: " << filePath << std::endl;
268 std::string fileHash = processFile(filePath);
269 sha256.update(fileHash);
270 }
271
272 std::cout << "\nFinal Hash (SHA256 of all preprocessed content): " << sha256.final() << std::endl;
273
274 return 0;
275}
276
277
278
279}
280}
281
282#endif
uint32_t datalen_
Definition Hash.H:124
static uint32_t sig0(uint32_t x)
Definition Hash.H:164
void update(const uint8_t *data, size_t length)
Definition Hash.H:85
std::array< uint32_t, 8 > state_
Definition Hash.H:122
std::string final()
Definition Hash.H:100
void update(const std::string &data)
Definition Hash.H:96
static void toBytes(uint8_t *output, const std::array< uint32_t, 8 > &input, size_t size)
Definition Hash.H:143
static uint32_t choose(uint32_t e, uint32_t f, uint32_t g)
Definition Hash.H:156
const std::array< uint32_t, 64 > k_
Definition Hash.H:127
uint8_t data_[64]
Definition Hash.H:123
static uint32_t rotr(uint32_t x, uint32_t n)
Definition Hash.H:152
void transform()
Definition Hash.H:172
static uint32_t majority(uint32_t a, uint32_t b, uint32_t c)
Definition Hash.H:160
static uint32_t sig1(uint32_t x)
Definition Hash.H:168
uint64_t bitlen_
Definition Hash.H:125
std::string preprocessSourceFile(const std::string &filePath)
Definition Hash.H:26
bool endsWith(const std::string &str, const std::string &suffix)
Definition Hash.H:20
int getFinalHash(std::string srcDirectory)
Definition Hash.H:258
std::string processFile(const std::string &filePath)
Definition Hash.H:232
void processDirectory(const std::string &directory, std::vector< std::string > &sortedFiles)
Definition Hash.H:244
A collection of utility routines.
Definition Set.cpp:8