/** * Mat - Add, mul, div, min, max or average matrices read from files, or * compute standard deviation or standar error. Files contain equal * number of rows and cols of float or int numbers, optionally with a * first header row and/or first index column. Headers and indices, if * present, are copied to the output without applying any operations. */ #include <stdlib.h> #include <unistd.h> #include <set> #include <fmt/format.h> #include "Util.hh" /** * Print error message and exit * @param file File name * @param line Line number * @param format fmt::print format string * @param args Additional arguments to fmt::print */ template <typename... T> void fail(string file, uint line, const char *format, const T & ... args) { fmt::print(stderr, "File {}, line {}: ", file, line); fmt::print(stderr, format, args...); fmt::print(stderr, "\n"); exit(1); } static const char *sepChars = " \t"; static const char *prefix = ""; static const char *suffix = ""; /** * Adorn each token in line with prefix and suffix */ string adorn(string line) { string errMsg; std::vector<string> tokens = Util::tokenize(line, sepChars, errMsg); ABORT_IF(!errMsg.empty(), "%s", errMsg.c_str()); string r; bool first = true; for (auto tok : tokens) { if (first) { first = false; } else { r += sepChars[0]; } r += prefix; r += tok; r += suffix; } return r; } int main(int argc, char *argv[]) { char *pname = argv[0]; bool help = false; bool hasHdr = false; bool chkHdr = false; bool hasIndex = false; // Process command line args // std::vector<Util::ParseOptSpec> optSpecs = { {"hdr", Util::OPTARG_NONE, &hasHdr, "", "files have header row"}, {"chkHdr", Util::OPTARG_NONE, &chkHdr, "", "headers must match"}, {"index", Util::OPTARG_NONE, &hasIndex, "", "files have index column"}, {"sep", Util::OPTARG_STR, &sepChars, "separator_chars", ""}, {"prefix", Util::OPTARG_STR, &prefix, "output_header_prefix", ""}, {"suffix", Util::OPTARG_STR, &suffix, "output_header_suffix", ""}, {"help", Util::OPTARG_NONE, &help, "", "" }}; std::vector<string> nonFlags = { "{add|sub|mul|div|min|max|avg|stdevp|stdevs|sterr} <file> ..." }; if (Util::parseOpts(argc, argv, optSpecs) != 0 || optind > argc - 2 || help) { Util::usage( parseOptsUsage(pname, optSpecs, true, nonFlags).c_str(), NULL); exit(EXIT_FAILURE); } uint nCols = 0; uint nRows = 0; // The first non-flag arg is the operation string opStr = argv[optind++]; Util::Operation op; if (Util::strCiEq(opStr, "ADD")) { op = Util::ADD; } else if (Util::strCiEq(opStr, "SUB")) { op = Util::SUB; } else if (Util::strCiEq(opStr, "MUL")) { op = Util::MUL; } else if (Util::strCiEq(opStr, "DIV")) { op = Util::DIV; } else if (Util::strCiEq(opStr, "MIN")) { op = Util::MIN; } else if (Util::strCiEq(opStr, "MAX")) { op = Util::MAX; } else if (Util::strCiEq(opStr, "AVG")) { op = Util::AVG; } else if (Util::strCiEq(opStr, "STDEVP")) { op = Util::STDEVP; } else if (Util::strCiEq(opStr, "STDEVS")) { op = Util::STDEVS; } else if (Util::strCiEq(opStr, "STERR")) { op = Util::STERR; } else { Util::usage( parseOptsUsage(pname, optSpecs, false, nonFlags).c_str(), NULL); exit(EXIT_FAILURE); } // The remaining args are file names. Process them one by one // and build the result matrix as we go. For STDEV and STDERR, // also build sqsum on the fly. uint numFiles = argc - optind; string firstFile; string hdr; vector<vector<double>> firstMat; vector<vector<double>> result; vector<vector<double>> sqsum; while (optind < argc) { const char *fname = argv[optind++]; if (firstFile.empty()) { firstFile = fname; } FILE *fp = fopen(fname, "r"); if (fp == NULL) { perror(fname); return 1; } vector<vector<double>> mat; uint lineNum = 0; char line[1024]; // Read a matrix of doubles from the file while ((fgets(line, sizeof(line), fp) != NULL)) { Util::chop(line); if (++lineNum == 1) { if (hasHdr) { // This is a header line if (hdr.empty()) { hdr = line; fmt::print("{}\n", adorn(hdr)); } else { ABORT_IF(chkHdr && (hdr != line), "%s and %s have different headers", firstFile.c_str(), fname); } continue; } } string errMsg; std::vector<string> tokens = Util::tokenize(line, sepChars, errMsg); if (!errMsg.empty()) { fail(fname, lineNum, "{}", errMsg); } if (tokens.size() == 0) { fail(fname, lineNum, "empty line", errMsg); } if (nCols == 0) { nCols = tokens.size(); } else if (tokens.size() != nCols) { fail(fname, lineNum, "Expected {} tokens, found {}", nCols, tokens.size()); } vector<double> row; for (auto tok : tokens) { size_t sz; try { row.push_back(std::stod(tok, &sz)); } catch (std::invalid_argument) { fail(fname, lineNum, "Bad double [{}]", tok); } } mat.push_back(row); } fclose(fp); if (nRows == 0) { // First file read nRows = mat.size(); result = mat; firstMat = mat; sqsum = Util::matrixSquare(mat); } else { if (mat.size() != nRows) { fail(fname, lineNum, "Expected {} rows, found {}", nRows, mat.size()); } if (hasIndex) { for (uint r = 0; r < nRows; r++) { if (mat[r][0] != firstMat[r][0]) { fail(fname, hasHdr ? r+2 : r+1, "Index differs from file {}", firstFile); } } } switch (op) { case Util::STDEVP: case Util::STDEVS: case Util::STERR: sqsum = Util::matrixAdd(sqsum, Util::matrixSquare(mat)); // fall thru case Util::ADD: case Util::AVG: result = Util::matrixAdd(result, mat); break; case Util::SUB: result = Util::matrixSub(result, mat); break; case Util::MUL: result = Util::matrixMul(result, mat); break; case Util::DIV: result = Util::matrixDiv(result, mat); break; case Util::MIN: result = Util::matrixMin(result, mat); break; case Util::MAX: result = Util::matrixMax(result, mat); break; default: TRACE_FATAL("Bad operation"); } } } switch (op) { case Util::AVG: for (auto &row : result) { for (auto &elem : row) { elem /= numFiles; } } break; case Util::STDEVP: case Util::STDEVS: case Util::STERR: // For standard deviation or error, calculate the standard // deviation of the sample for (uint r = 0; r < nRows; r++) { for (uint c = 0; c < nCols; c++) { // stdev = sqrt(sum-of-squares/n - (square-of-sum/n)^2) result[r][c] = sqrt(sqsum[r][c] / numFiles - pow((result[r][c] / numFiles), 2.0)); } } // For sample statistics, apply Bessel's correction if ((numFiles > 1) && (op == Util::STDEVS || op == Util::STERR)) { result = Util::matrixMul( result, sqrt(1. * numFiles/(numFiles - 1))); } // For standard error, divide by sqrt of sample size if (op == Util::STERR) { result = Util::matrixDiv(result, sqrt(numFiles)); } break; default: // no special processing ; } if (hasIndex) { for (uint r = 0; r < nRows; r++) { result[r][0] = firstMat[r][0]; } } fmt::print(Util::matrixToStr(result)); }