Skip to content

Commit

Permalink
Merge branch 'release-0.4.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
timdawborn committed Sep 5, 2014
2 parents f66fead + b3423e7 commit 4812d10
Show file tree
Hide file tree
Showing 52 changed files with 2,926 additions and 1,582 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ dist/libschwa.pc
libtool
m4/*.m4
stamp-h1
/src/lib/schwa/_base.h
/src/lib/schwa/_config.h*
/src/lib/schwa/macros.h
/src/lib/schwa/port.h
9 changes: 9 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
0.4.0
* Don't distribute _config.h (whoops).
* Added `SCHWA_LIKELY` and `SCHWA_UNLIKELY` macro definitions to wrap around `__builtin_expect`.
* Made `--help` more helpful by adding a `--short-help` flag on all core tools (#14).
* Made `dr-count` behave more like `wc` when multiple input files are provided (#15).
* Corrected argument parsing for optional arguments (#17).
* Upgraded the Unicode database data from 6.3.0 to 7.0.0.
* Upgraded docrep reader and writer to use wire protocol version 3 (utilising the newly-added distinction between bytes and UTF-8 strings within MessagePack).

0.3.0
* Allowed `dr-count` and `dr-grep` to skip incomplete documents (#6).
* Corrected bug lazy docrep reading to ensure that it reads the desired number of bytes (#5).
Expand Down
4 changes: 2 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SUBDIRS = src/third-party src/lib src/apps
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = dist/libschwa.pc

EXTRA_DIST = LICENCE doc src/ragel src/third-party/cityhash src/third-party/re2
EXTRA_DIST = CHANGELOG LICENCE doc src/ragel src/third-party/cityhash src/third-party/re2
dist-hook: clean-docs


Expand All @@ -27,5 +27,5 @@ clean-docs:

wc:
for d in src/lib src/apps; do \
find $${d} \( -name "*.cc" -or -name "*.h" \) | egrep -v '_(gen|test)\.cc' | xargs wc -l; \
find $${d} \( -name "*.cc" -or -name "*.h" \) | egrep -v '_(gen|test)\.cc' | grep -vF /third-party/ | xargs wc -l; \
done
2 changes: 2 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
Schwa Lab core NLP tools, including the C++ implementation of docrep and the docrep command-line tools.

Documentation can be found on the GitHub wiki page: https://github.com/schwa-lab/libschwa/wiki
9 changes: 7 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ AC_INIT([libschwa], m4_normalize(m4_include([m4/version.m4])), [[email protected]
: ${CXXFLAGS=""}

m4_include([m4/version-set.m4])
AC_SUBST([libschwa_la_version_info], [3:0:0])
AC_SUBST([libschwa_la_version_info], [4:0:0])

AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIR([m4])
Expand Down Expand Up @@ -156,6 +156,11 @@ AC_COMPILE_IFELSE(
)
if test x$libschwa_have_builtin_expect = xyes ; then
AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.])
AC_SUBST([SCHWA_LIKELY], ['(__builtin_expect(!!(x), 1))'])
AC_SUBST([SCHWA_UNLIKELY], ['(__builtin_expect(!!(x), 0))'])
else
AC_SUBST([SCHWA_LIKELY], ['(x)'])
AC_SUBST([SCHWA_UNLIKELY], ['(x)'])
fi

dnl Work out how to inline the "host to big endian" functions for various based on what headers we found.
Expand Down Expand Up @@ -219,7 +224,7 @@ AC_CONFIG_FILES([
src/apps/dr-worker-example/Makefile
src/apps/schwa-tokenizer/Makefile
src/lib/Makefile
src/lib/schwa/_base.h:src/lib/schwa/_base.h.in
src/lib/schwa/macros.h:src/lib/schwa/macros.h.in
src/lib/schwa/port.h:src/lib/schwa/port.h.in
src/third-party/Makefile
dist/libschwa.pc:dist/libschwa.pc.in
Expand Down
4 changes: 2 additions & 2 deletions dist/libschwa.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

class Libschwa < Formula
homepage "https://github.com/schwa-lab/libschwa"
url "https://github.com/schwa-lab/libschwa/releases/download/0.2.2/libschwa-0.2.2.tar.gz"
sha1 "41d3e1a2dd3686da6ee0acb302e88eb84ca937bb"
url "https://github.com/schwa-lab/libschwa/releases/download/0.3.0/libschwa-0.3.0.tar.gz"
sha1 "9b44fb99547b3e8dd1a8e1e7bf24615f3f437bd6"

depends_on "pkg-config"
depends_on "zeromq" => :optional
Expand Down
8 changes: 4 additions & 4 deletions m4/version-set.m4
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
dnl Product version for header files. Maintained by dist/bump-version.sh
VERSION_MAJOR=0
VERSION_MINOR=3
VERSION_PATCH=1
VERSION_STRING='"0.3.1 (2014-07-06)"'
VERSION_MINOR=4
VERSION_PATCH=0
VERSION_STRING='"0.4.0 (2014-09-05)"'

AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)
AC_SUBST(VERSION_PATCH)
AC_SUBST(VERSION_STRING)

VERSION_NOPATCH=0.3
VERSION_NOPATCH=0.4
AC_SUBST(VERSION_NOPATCH)
2 changes: 1 addition & 1 deletion m4/version.m4
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
dnl Product version for AC_INIT. Maintained by dist/bump-version.sh
0.3.1
0.4.0
25 changes: 17 additions & 8 deletions src/apps/dr-count/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@ namespace cf = schwa::config;
namespace dr = schwa::dr;
namespace io = schwa::io;

using Formatting = schwa::dr_count::Processor::Formatting;


namespace schwa {
namespace dr_count {

static void
main(const std::vector<std::string> &input_paths, std::ostream &out, bool all_stores, const std::string &store, bool count_bytes, bool cumulative, bool per_doc, Formatting formatting, const std::string &doc_id) {
main(const std::vector<std::string> &input_paths, std::ostream &out, bool all_stores, const std::string &store, bool count_bytes, bool cumulative, int every, Formatting formatting, const std::string &doc_id, bool no_header, bool no_footer, bool no_ndocs) {
// Construct the document processor.
Processor processor(out, all_stores, store, count_bytes, cumulative, per_doc, formatting, doc_id);
Processor processor(out, all_stores, store, count_bytes, cumulative, every, formatting, doc_id, no_header, no_footer, no_ndocs);

dr::FauxDoc doc;
dr::FauxDoc::Schema schema;
Expand All @@ -34,6 +32,12 @@ main(const std::vector<std::string> &input_paths, std::ostream &out, bool all_st
io::InputStream in(input_path);
dr::Reader reader(*in, schema);

// If there is more than one file to process, output the filename and reset the processors state.
if (input_paths.size() > 1) {
out << input_path << std::endl;
processor.reset();
}

// Read the documents off the input stream.
try {
while (reader >> doc)
Expand All @@ -42,9 +46,9 @@ main(const std::vector<std::string> &input_paths, std::ostream &out, bool all_st
catch (dr::ReaderException &) {
LOG(WARNING) << "Failed to read document from '" << in.path() << "'" << std::endl;
}
}

processor.finalise();
processor.finalise();
}
}

} // namespace dr_count
Expand All @@ -53,17 +57,22 @@ main(const std::vector<std::string> &input_paths, std::ostream &out, bool all_st

int
main(int argc, char **argv) {
using Formatting = schwa::dr_count::Formatting;

// Construct an option parser.
cf::Main cfg("dr-count", "Count the number of documents or annotations in stores on a docrep stream.");
cf::Op<std::string> input_path(cfg, "input", 'i', "The input path", io::STDIN_STRING);
cf::Op<std::string> output_path(cfg, "output", 'o', "The output path", io::STDOUT_STRING);
cf::Op<bool> all_stores(cfg, "all", 'a', "Count docs and elements in all stores found on the first doc", false);
cf::Op<std::string> store(cfg, "store", 's', "Count docs and elements in the provided store only", cf::Flags::OPTIONAL);
cf::Op<bool> per_doc(cfg, "per-doc", 'e', "Show counts per doc instead of for the stream", false);
cf::Op<uint32_t> every(cfg, "every", 'e', "Show counts per doc instead of for the stream", 1, cf::Flags::OPTIONAL);
cf::Op<bool> count_bytes(cfg, "bytes", 'b', "Count bytes instead of number of instances", false);
cf::Op<bool> cumulative(cfg, "cumulative", 'c', "Show cumulative counts per doc", false);
cf::OpChoices<std::string> format(cfg, "format", 'f', "How to format the output data", {"aligned", "tabs"}, "aligned");
cf::Op<std::string> doc_id(cfg, "doc-id", 'd', "Output this expression before each document instead when outputting per-document counts", cf::Flags::OPTIONAL);
cf::Op<bool> no_header(cfg, "no-header", 'H', "Hide the column headings row", false);
cf::Op<bool> no_footer(cfg, "no-footer", 'F', "Hide the cumulative total footer row", false);
cf::Op<bool> no_ndocs(cfg, "no-ndocs", 'N', "Hide the ndocs column", false);

cfg.allow_unclaimed_args("[input-path...]");

Expand All @@ -89,7 +98,7 @@ main(int argc, char **argv) {
io::OutputStream out(output_path());

// Dispatch to main function.
schwa::dr_count::main(input_paths, *out, all_stores(), store(), count_bytes(), cumulative(), per_doc(), formatting, doc_id());
schwa::dr_count::main(input_paths, *out, all_stores(), store(), count_bytes(), cumulative(), every.was_mentioned() ? every() : -1, formatting, doc_id(), no_header(), no_footer(), no_ndocs());
})
return 0;
}
Loading

0 comments on commit 4812d10

Please sign in to comment.