Skip to content

Commit

Permalink
Merge pull request #13 from p-ranav/feature/9
Browse files Browse the repository at this point in the history
Feature/9
  • Loading branch information
p-ranav authored Jul 27, 2020
2 parents d659f90 + 5b6990b commit a20992f
Show file tree
Hide file tree
Showing 12 changed files with 2,490 additions and 48 deletions.
73 changes: 69 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
<img height="75" src="img/logo.png" alt="csv2"/>
</p>

## Table of Contents

* [CSV Reader](#csv-reader)
* [Performance Benchmark](#performance-benchmark)
* [Reader API](#reader-api)
* [CSV Writer](#csv-writer)
* [Writer API](#writer-api)
* [Compiling Tests](#compiling-tests)
* [Generating Single Header](#generating-single-header)
* [Contributing](#contributing)
* [License](#license)

## CSV Reader

```cpp
#include <csv2/reader.hpp>

Expand All @@ -24,7 +38,7 @@ int main() {
}
```

## Performance Benchmark
### Performance Benchmark

This benchmark measures the average execution time (of 5 runs after 3 warmup runs) for `csv2` to memory-map the input CSV file and iterate over every cell in the CSV. See `benchmark/main.cpp` for more details.

Expand All @@ -34,7 +48,7 @@ g++ -I../include -O3 -std=c++11 -o main main.cpp
./main <csv_file>
```

### Hardware
#### Hardware

```
MacBook Pro (15-inch, 2019)
Expand All @@ -43,7 +57,7 @@ Memory: 32 GB 2400 MHz DDR4
Operating System: macOS Catalina version 10.15.3
```

### Results (as of 23 APR 2020)
#### Results (as of 23 APR 2020)

| Dataset | File Size | Rows | Cols | Time |
|:--- | ---:| ---:| ---:| ---:|
Expand All @@ -59,7 +73,7 @@ Operating System: macOS Catalina version 10.15.3
| [SHA-1 password hash dump](https://www.kaggle.com/urvishramaiya/have-i-been-pwnd) | 11 GB | 2,62,974,241 | 2 | 19.505s |
| [DOHUI NOH scaled_data](https://www.kaggle.com/seaa0612/scaled-data) | 16 GB | 496,782 | 3213 | 32.780s |

## API
### Reader API

Here is the public API available to you:

Expand Down Expand Up @@ -123,6 +137,51 @@ public:
};
```
## CSV Writer
This library also provides a basic `csv2::Writer` class - one that can be used to write CSV rows to file. Here's a basic usage:
```cpp
#include <csv2/writer.hpp>
#include <vector>
#include <string>
using namespace csv2;
int main() {
std::ofstream stream("foo.csv");
Writer<delimiter<','>> writer(stream);
std::vector<std::vector<std::string>> rows =
{
{"a", "b", "c"},
{"1", "2", "3"},
{"4", "5", "6"}
};
writer.write_rows(rows);
stream.close();
}
```

### Writer API

Here is the public API available to you:

```cpp
template <class delimiter = delimiter<','>>
class Writer {
public:

// Construct using an std::ofstream
Writer(output_file_stream stream);

// Use this to write a single row to file
void write_row(container_of_strings row);

// Use this to write a list of rows to file
void write_rows(container_of_rows rows);
```
## Compiling Tests
```bash
Expand All @@ -133,6 +192,12 @@ cd test
./csv2_test
```

## Generating Single Header

```bash
python3 utils/amalgamate/amalgamate.py -c single_include.json -s .
```

## Contributing
Contributions are welcome, have a look at the [CONTRIBUTING.md](CONTRIBUTING.md) document for more information.

Expand Down
1 change: 1 addition & 0 deletions include/csv2/mio.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

/* Copyright 2017 https://github.com/mandreyel
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this
Expand Down
50 changes: 50 additions & 0 deletions include/csv2/parameters.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

#pragma once
#include <utility>

namespace csv2 {

namespace trim_policy {
struct no_trimming {
public:
static std::pair<size_t, size_t> trim(const char *buffer, size_t start, size_t end) {
(void)(buffer); // to silence unused parameter warning
return {start, end};
}
};

template <char... character_list> struct trim_characters {
private:
constexpr static bool is_trim_char(char) { return false; }

template <class... Tail> constexpr static bool is_trim_char(char c, char head, Tail... tail) {
return c == head || is_trim_char(c, tail...);
}

public:
static std::pair<size_t, size_t> trim(const char *buffer, size_t start, size_t end) {
size_t new_start = start, new_end = end;
while (new_start != new_end && is_trim_char(buffer[new_start], character_list...))
++new_start;
while (new_start != new_end && is_trim_char(buffer[new_end - 1], character_list...))
--new_end;
return {new_start, new_end};
}
};

using trim_whitespace = trim_characters<' ', '\t'>;
} // namespace trim_policy

template <char character> struct delimiter {
constexpr static char value = character;
};

template <char character> struct quote_character {
constexpr static char value = character;
};

template <bool flag> struct first_row_is_header {
constexpr static bool value = flag;
};

}
46 changes: 2 additions & 44 deletions include/csv2/reader.hpp
Original file line number Diff line number Diff line change
@@ -1,55 +1,13 @@

#pragma once
#include <cstring>
#include <csv2/mio.hpp>
#include <csv2/parameters.hpp>
#include <istream>
#include <string>
#include <utility>

namespace csv2 {

namespace trim_policy {
struct no_trimming {
public:
static std::pair<size_t, size_t> trim(const char *buffer, size_t start, size_t end) {
(void)(buffer); // to silence unused parameter warning
return {start, end};
}
};

template <char... character_list> struct trim_characters {
private:
constexpr static bool is_trim_char(char) { return false; }

template <class... Tail> constexpr static bool is_trim_char(char c, char head, Tail... tail) {
return c == head || is_trim_char(c, tail...);
}

public:
static std::pair<size_t, size_t> trim(const char *buffer, size_t start, size_t end) {
size_t new_start = start, new_end = end;
while (new_start != new_end && is_trim_char(buffer[new_start], character_list...))
++new_start;
while (new_start != new_end && is_trim_char(buffer[new_end - 1], character_list...))
--new_end;
return {new_start, new_end};
}
};

using trim_whitespace = trim_characters<' ', '\t'>;
} // namespace trim_policy

template <char character> struct delimiter {
constexpr static char value = character;
};

template <char character> struct quote_character {
constexpr static char value = character;
};

template <bool flag> struct first_row_is_header {
constexpr static bool value = flag;
};

template <class delimiter = delimiter<','>, class quote_character = quote_character<'"'>,
class first_row_is_header = first_row_is_header<true>,
class trim_policy = trim_policy::trim_whitespace>
Expand Down
41 changes: 41 additions & 0 deletions include/csv2/writer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

#pragma once
#include <cstring>
#include <csv2/parameters.hpp>
#include <fstream>
#include <string>
#include <utility>
#include <iostream>

namespace csv2 {

template <class delimiter = delimiter<','>>
class Writer {
std::ofstream& stream_; // output stream for the writer
public:
template <typename Stream>
Writer(Stream&& stream) : stream_(std::forward<Stream>(stream)) {}

~Writer() {
stream_.close();
}

template <typename Container>
void write_row(Container&& row) {
const auto& strings = std::forward<Container>(row);
const auto delimiter_string = std::string(1, delimiter::value);
std::copy(strings.begin(), strings.end() - 1,
std::ostream_iterator<std::string>(stream_, delimiter_string.c_str()));
stream_ << strings.back() << "\n";
}

template <typename Container>
void write_rows(Container&& rows) {
const auto& container_of_rows = std::forward<Container>(rows);
for (const auto& row : container_of_rows) {
write_row(row);
}
}
};

}
11 changes: 11 additions & 0 deletions single_include.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"project": "CSV for Modern C++",
"target": "single_include/csv2/csv2.hpp",
"sources": [
"include/csv2/mio.hpp",
"include/csv2/parameters.hpp",
"include/csv2/reader.hpp",
"include/csv2/writer.hpp"
],
"include_paths": ["include"]
}
Loading

0 comments on commit a20992f

Please sign in to comment.