/** * @file columns.cc * * Copy selected columns from input to output * * Author: Peter Helfer * Date: 2016-12-10 */ #include <unistd.h> #include <vector> #include <string> using std::string; #include <fmt/format.h> #include "Util.hh" // A few abbreviations const int NONE = Util::OPTARG_NONE; const int INT = Util::OPTARG_INT; const int UINT = Util::OPTARG_UINT; const int DBLE = Util::OPTARG_DBLE; const int STR = Util::OPTARG_STR; bool help = false; static const char *fname = NULL; // default is stdin static const char *sepChars = " \t"; // input file separator chars static const char *osep = "\t"; // output separator static bool tabular = false; /** * Print error message and exit * @param file File name * @param line Line number * @param format fmt::print format string * @param args Additional arguments to fmt::print */ template <typename... T> void fail(string file, uint line, const char *format, const T & ... args) { fmt::print(stderr, "File {}, line {}: ", file, line); fmt::print(stderr, format, args...); fmt::print(stderr, "\n"); exit(1); } int main(int argc, char *argv[]) { char *pname = argv[0]; // Process command line args: // 1. parse the options // std::vector<Util::ParseOptSpec> optSpecs = { { "file", STR, &fname, "file_name", "default: stdin" }, { "sep", STR, &sepChars, "input_separator_chars", "default: \" \\t\""}, { "osep", STR, &osep, "output_separator_string", "default: \"\\t\"" }, { "t", NONE, &tabular, "", "tabular" }, { "help", NONE, &help, "", "" }}; if (parseOpts(argc, argv, optSpecs) != 0 || optind == argc || help) { std::vector<string> nonFlags = { "column_name [column_name ...] (or number)" }; Util::usageExit( parseOptsUsage( pname, optSpecs, true, nonFlags).c_str(), NULL); } // 2. Remaining cmd line args are column specifications // std::vector<string> colSpecs; while (optind < argc) { colSpecs.push_back(argv[optind++]); } // Open the input file // FILE *fp; if (fname == NULL) { fp = stdin; fname = "<stdin>"; // for diagnostics only } else { fp = fopen(fname, "r"); if (fp == NULL) { perror(fname); exit(errno); } } // Parse the header line // const uint LINELEN = 2048; char line[LINELEN]; uint lineNum = 1; if (fgets(line, LINELEN, fp) == NULL) { fmt::print(stderr, "{}: failed to read header line\n", fname); exit(errno); } Util::chop(line); string errMsg; std::vector<string> headers = Util::tokenize(line, sepChars, errMsg); if (!errMsg.empty()) { fail(fname, lineNum, "{}", errMsg); } // Determine which columns to copy // std::vector<uint> columnNumbers; for (auto colSpec : colSpecs) { // Try to convert colSpec to an int // char *p = NULL; uint colNum = strtol(colSpec.c_str(), &p, 10); if (*p == '\0') { // If colSpec is an int, take it as column number // if (colNum > 0 || colNum <= headers.size()) { columnNumbers.push_back(colNum - 1); // zero-based } else { fmt::print(stderr, "invalid column number: {}\n", colNum); exit(1); } } else { // Otherwise colSpec should be a column header uint i = 0; for (; i < headers.size(); i++) { if (Util::strCiEq(colSpec, headers[i])) { columnNumbers.push_back(i); break; } } if (i == headers.size()) { fail(fname, lineNum, "{}: column not found", colSpec); } } } // If tabular output was requested, then pipe through column(3) // FILE *outFile = stdout; if (tabular) { outFile = popen("column -t", "w"); ABORT_IF(outFile == NULL, "Ouch!"); } // Copy the selected columns of the header line // for (uint i = 0; i < columnNumbers.size(); i++) { fmt::print(outFile, "{}", headers[columnNumbers[i]]); if (i < columnNumbers.size() - 1) { fmt::print(outFile, "{}", osep); } } fmt::print(outFile, "\n"); // Read the rest of the file and copy the // selected columns in the specified order // while (fgets(line, LINELEN, fp) != NULL) { Util::chop(line); lineNum++; std::vector<string> tokens = Util::tokenize(line, sepChars, errMsg); if (!errMsg.empty()) { fail(fname, lineNum, "{}", errMsg); } if (tokens.size() != headers.size()) { fail(fname, lineNum, "Expected {} columns, found {}", headers.size(), tokens.size()); } for (uint i = 0; i < columnNumbers.size(); i++) { fmt::print(outFile, "{}", tokens[columnNumbers[i]]); if (i < columnNumbers.size() - 1) { fmt::print(outFile, "{}", osep); } } fmt::print(outFile, "\n"); } }