Skip to content

Commit

Permalink
Merge branch 'release-0.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
timdawborn committed Jun 16, 2014
2 parents a87d7ea + c3f28ab commit 8185763
Show file tree
Hide file tree
Showing 234 changed files with 52,281 additions and 2,834 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ aclocal.in
aclocal.m4
autom4te.cache
build-aux
config.h*
config.log
config.status
configure
Expand All @@ -28,4 +27,5 @@ libtool
m4/*.m4
stamp-h1
/src/lib/schwa/_base.h
/src/lib/schwa/_config.h*
/src/lib/schwa/port.h
18 changes: 18 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
0.3.0
* Allowed `dr-count` and `dr-grep` to skip incomplete documents (#6).
* Corrected bug lazy docrep reading to ensure that it reads the desired number of bytes (#5).
* Many `dr-*` command-line tools now support multiple input files (#3).
* Renamed `dr-ui` to `dr-less`.
* Corrected construction of nested command-line options full name in help text (#11).
* Corrected a missing byte consume when reading msgpack data into a dynamic runtime structure (#9).
* Packaged re2 and cityhash into libschwa. re2 was added as full regular expression support wasn't added to gcc until 4.9, which hasn't become mainstream yet.
* Corrected bug where Ann subclasses were not destructed and re-constructed when stores were reused between reads.
* Added `dr-offsets` to output the byte offset of each document in a stream. Useful for debugging.
* Corrected `--help` to dispatch to each tool instead of being consumed by `dr` (#2).
* `dr` now lists the available tools it can to dispatch to (#1).
* Added UnicodeString support for docrep fields.
* Added schwa::unicode namespace for all Unicode related things, including UnicodeString and Unicode-equivalent ctype functions.
* Added docrep sequence tag denormalisaing and renormalising functors.
* Allow dr::Slice<T> to be iterated through using a C++11 foreach loop.
* Added homebrew formula for easy installation on OS X.

0.2.2
* Added an explicit include check in configure.ac for all used C++11 header files.
* The special command-line flags --help and --version no longer result in a non-zero exit status.
Expand Down
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SUBDIRS = src/third-party src/lib src/apps
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = dist/libschwa.pc

EXTRA_DIST = LICENCE doc src/ragel
EXTRA_DIST = LICENCE doc src/ragel src/third-party/cityhash src/third-party/re2
dist-hook: clean-docs


Expand Down
84 changes: 73 additions & 11 deletions configure.ac
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
AC_PREREQ(2.63)
AC_PREREQ([2.65])
AC_INIT([libschwa], m4_normalize(m4_include([m4/version.m4])), [[email protected]], [], [https://github.com/schwa-lab/libschwa])
: ${CXXFLAGS=""}

m4_include([m4/version-set.m4])
AC_SUBST([libschwa_la_version_info], [3:0:0])

AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([README])

dnl Initialise automake.
AM_INIT_AUTOMAKE([1.11 foreign -Wall -Werror parallel-tests subdir-objects])
AM_INIT_AUTOMAKE([1.11 foreign -Wall -Werror parallel-tests no-define subdir-objects])
AM_OPTIONS
AM_PROG_AR

Expand All @@ -27,7 +28,7 @@ dnl If CXXFLAGS was not set on entry and we are not debugging, default to -O4.
libschwa_test_CXXFLAGS="${CXXFLAGS+set}"
if test "$libschwa_test_CXXFLAGS" = "set"; then
if test "$libschwa_cv_enable_debug" != "yes"; then
CXXFLAGS='-O4'
CXXFLAGS='-O3'
else
CXXFLAGS='-O0'
fi
Expand All @@ -51,6 +52,7 @@ AC_PROG_CXX
AC_PROG_CXXCPP
AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext], [mandatory])
AC_C_BIGENDIAN

dnl Initialise pkg-config.
PKG_PROG_PKG_CONFIG
Expand Down Expand Up @@ -84,19 +86,78 @@ AC_CHECK_HEADER([mutex], , AC_MSG_ERROR([C++11 mutex header not found]))
AC_CHECK_HEADER([new], , AC_MSG_ERROR([C++11 new header not found]))
AC_CHECK_HEADER([ostream], , AC_MSG_ERROR([C++11 ostream header not found]))
AC_CHECK_HEADER([random], , AC_MSG_ERROR([C++11 random header not found]))
AC_CHECK_HEADER([regex], , AC_MSG_ERROR([C++11 regex header not found]))
AC_CHECK_HEADER([set], , AC_MSG_ERROR([C++11 set header not found]))
AC_CHECK_HEADER([sstream], , AC_MSG_ERROR([C++11 sstream header not found]))
AC_CHECK_HEADER([stack], , AC_MSG_ERROR([C++11 stack header not found]))
AC_CHECK_HEADER([string], , AC_MSG_ERROR([C++11 string header not found]))
AC_CHECK_HEADER([thread], , AC_MSG_ERROR([C++11 thread header not found]))
AC_CHECK_HEADER([typeinfo], , AC_MSG_ERROR([C++11 typeinfo header not found]))
AC_CHECK_HEADER([unordered_set], , AC_MSG_ERROR([C++11 unordered_set header not found]))
AC_CHECK_HEADER([utility], , AC_MSG_ERROR([C++11 utility header not found]))
AC_CHECK_HEADER([vector], , AC_MSG_ERROR([C++11 vector header not found]))
AC_CHECK_HEADER([dirent.h], , AC_MSG_ERROR([POSIX dirent.h header not found]))
AC_CHECK_HEADER([fcntl.h], , AC_MSG_ERROR([POSIX fcntl.h header not found]))
AC_CHECK_HEADER([unistd.h], , AC_MSG_ERROR([POSIX unistd.h header not found]))
AC_CHECK_HEADERS([cxxabi.h endian.h libgen.h libproc.h limits.h machine/byte_order.h]) dnl <schwa/port.{h,cc}>

dnl Check for C headers.
AC_CHECK_HEADER([assert.h], , AC_MSG_ERROR([C assert.h header not found]))
AC_CHECK_HEADER([ctype.h], , AC_MSG_ERROR([C ctype.h header not found]))
AC_CHECK_HEADER([errno.h], , AC_MSG_ERROR([C errno.h header not found]))
AC_CHECK_HEADER([stdarg.h], , AC_MSG_ERROR([C stdarg.h header not found]))
AC_CHECK_HEADER([stddef.h], , AC_MSG_ERROR([C stddef.h header not found]))
AC_CHECK_HEADER([stdint.h], , AC_MSG_ERROR([C stdint.h header not found]))
AC_CHECK_HEADER([stdio.h], , AC_MSG_ERROR([C stdio.h header not found]))
AC_CHECK_HEADER([stdlib.h], , AC_MSG_ERROR([C stdlib.h header not found]))
AC_CHECK_HEADER([string.h], , AC_MSG_ERROR([C string.h header not found]))
AC_CHECK_HEADER([sys/mman.h], , AC_MSG_ERROR([C sys/mman.h header not found]))
AC_CHECK_HEADER([sys/resource.h], , AC_MSG_ERROR([C sys/resource.h header not found]))
AC_CHECK_HEADER([sys/stat.h], , AC_MSG_ERROR([C sys/stat.h header not found]))
AC_CHECK_HEADER([sys/time.h], , AC_MSG_ERROR([C sys/time.h header not found]))
AC_CHECK_HEADER([sys/types.h], , AC_MSG_ERROR([C sys/types.h header not found]))
AC_CHECK_HEADER([time.h], , AC_MSG_ERROR([C time.h header not found]))
AC_CHECK_HEADER([unistd.h], , AC_MSG_ERROR([C unistd.h header not found]))

dnl Check for optional headers.
AC_CHECK_HEADERS([pcre.h])

dnl Check for C functions.
AC_CHECK_FUNC([close], , AC_MSG_ERROR([C close function not found]))
AC_CHECK_FUNC([dup2], , AC_MSG_ERROR([C dup2 function not found]))
AC_CHECK_FUNC([fork], , AC_MSG_ERROR([C fork function not found]))
AC_CHECK_FUNC([fstat], , AC_MSG_ERROR([C fstat function not found]))
AC_CHECK_FUNC([mmap], , AC_MSG_ERROR([C mmap function not found]))
AC_CHECK_FUNC([open], , AC_MSG_ERROR([C open function not found]))
AC_CHECK_FUNC([pipe], , AC_MSG_ERROR([C pipe function not found]))
AC_CHECK_FUNC([stat], , AC_MSG_ERROR([C stat function not found]))
AC_CHECK_FUNC([strsep], , AC_MSG_ERROR([C strsep function not found]))
AC_CHECK_FUNC([munmap], , AC_MSG_ERROR([C munmap function not found]))

dnl Check for pthread.
dnl Defines: PTHREAD_CC, PTHREAD_CFLAGS, PTHREAD_LIBS, HAVE_PTHREAD
AX_PTHREAD

dnl Check for SIMD extensions.
dnl Defines: HAVE_MMX, HAVE_SSE, HAVE_SSE2, HAVE_SSE3, HAVE_SSSE3, HAVE_SSE4.1, HAVE_SSE4.2, HAVE_AVX, SIMD_FLAGS
AX_EXT

dnl Check for __builtin_expect.
AC_MSG_CHECKING([if the compiler supports __builtin_expect])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(, [[return __builtin_expect(1, 1) ? 1 : 0;]])],
[
libschwa_have_builtin_expect=yes
AC_MSG_RESULT([yes])
],
[
libschwa_have_builtin_expect=no
AC_MSG_RESULT([no])
]
)
if test x$libschwa_have_builtin_expect = xyes ; then
AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.])
fi

dnl Work out how to inline the "host to big endian" functions for various based on what headers we found.
if test "$ac_cv_header_endian_h" = "yes"; then
AC_SUBST([ENDIAN_CONVERSION_HEADER], [endian.h])
Expand Down Expand Up @@ -128,20 +189,20 @@ dnl Check if we have ØMQ >= 3.
PKG_CHECK_MODULES([ZMQLIB], [libzmq >= 3], [have_libzmq=yes], [have_libzmq=no])
AM_CONDITIONAL([HAVE_LIBZMQ], [test "$have_libzmq" = "yes" && test "$libschwa_cv_enable_libzmq" = "yes"])

dnl Construct our base set of CXXFLAGS, depending on the compiler.
LIBSCHWA_BASE_CXXFLAGS='-Wall -Wextra -Werror -pedantic -Wformat-security -Wpointer-arith -Wformat-nonliteral -Winit-self -Wfloat-equal -ffast-math -fstack-protector'
dnl Construct our base set of CPPFLAGS, depending on the compiler.
LIBSCHWA_BASE_CPPFLAGS='-Wall -Wextra -Werror -pedantic -Wformat-security -Wpointer-arith -Wformat-nonliteral -Winit-self -Wfloat-equal -ffast-math -fstack-protector'
if test "x$GXX" = xyes; then
dnl Add compiler specific flags.
if test "x$CLANG" = xyes; then
LIBSCHWA_BASE_CXXFLAGS="$LIBSCHWA_BASE_CXXFLAGS -fdiagnostics-show-option -fdiagnostics-show-template-tree -pedantic-errors"
LIBSCHWA_BASE_CPPFLAGS="$LIBSCHWA_BASE_CPPFLAGS -fdiagnostics-show-option -fdiagnostics-show-template-tree -pedantic-errors"
else
LIBSCHWA_BASE_CXXFLAGS="$LIBSCHWA_BASE_CXXFLAGS -Wdouble-promotion"
LIBSCHWA_BASE_CPPFLAGS="$LIBSCHWA_BASE_CPPFLAGS -Wdouble-promotion"
fi
fi
AC_SUBST([LIBSCHWA_BASE_CXXFLAGS])
AC_SUBST([LIBSCHWA_BASE_CPPFLAGS])

dnl Configure autoconf inputs and outputs.
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_HEADERS([src/lib/schwa/_config.h])
AC_CONFIG_FILES([
Makefile
src/apps/Makefile
Expand All @@ -151,9 +212,10 @@ AC_CONFIG_FILES([
src/apps/dr-dist/Makefile
src/apps/dr-grep/Makefile
src/apps/dr-head/Makefile
src/apps/dr-less/Makefile
src/apps/dr-offsets/Makefile
src/apps/dr-sample/Makefile
src/apps/dr-tail/Makefile
src/apps/dr-ui/Makefile
src/apps/dr-worker-example/Makefile
src/apps/schwa-tokenizer/Makefile
src/lib/Makefile
Expand Down
49 changes: 30 additions & 19 deletions dist/create-deb-package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,40 @@ set -e

MAINTAINER_NAME='Tim Dawborn'
MAINTAINER_EMAIL='[email protected]'
WORKING_DIR=/tmp/create-deb-package

# Ensure we have a correct looking argument.
if [[ ${#} -ne 1 ]]; then
echo "Usage: ${0} libschwa-X.Y.Z.tar.gz"
exit 1
fi

# Relocate to the top level project directory.
cd $(dirname ${0})/..
if ! $(echo "${1}" | grep -q '^libschwa-[0-9]*\.[0-9]*\.[0-9]*\.tar\.gz$'); then
echo "Usage: ${0} libschwa-X.Y.Z.tar.gz"
exit 1
fi
VERSION=$(echo "${1}" | grep -o '[0-9]*\.[0-9]*\.[0-9]*')

# Check required dependencies for building a deb file.
sudo apt-get install build-essential autoconf automake autotools-dev dh-make debhelper devscripts fakeroot xutils lintian pbuilder
if [[ "$(uname)" != "Linux" ]]; then
echo 'Error: this script must be run on a Linux machine.'
exit 1
fi

# Work out absolute path to tarball.
TARBALL="$(cd $(dirname ${1}) && pwd)/$(basename ${1})"

# Create the distribution.
make dist
version=$(tail -n +2 m4/version.m4)
# Create the working directory.
WORKING_DIR=$(mktemp -d)
trap 'rm -rf "${WORKING_DIR}"' EXIT

# Create and relocate to the working directory.
rm -rf ${WORKING_DIR}
mkdir -p ${WORKING_DIR}
cp libschwa-${version}.tar.gz ${WORKING_DIR}
cd ${WORKING_DIR}
# Check required dependencies for building a deb file.
sudo apt-get install build-essential autoconf automake autotools-dev dh-make debhelper devscripts fakeroot xutils lintian pbuilder

# Extract the tarball and go into the extracted folder.
tar xzf libschwa-${version}.tar.gz
cd libschwa-${version}
tar xzf "${TARBALL}" -C ${WORKING_DIR}
cd ${WORKING_DIR}/libschwa-*

# Start the debianisation.
dh_make --email "${MAINTAINER_EMAIL}" --multi --file ../libschwa-${version}.tar.gz
dh_make --email "${MAINTAINER_EMAIL}" --multi --file "${TARBALL}"

# Update the generated debian files.
cat > debian/control <<EOF
Expand All @@ -49,11 +58,13 @@ EOF
cp LICENCE debian/copyright

# Build the source as a debian package.
dpkg-buildpackage -rfakeroot
dpkg-buildpackage -rfakeroot -us -uc

# Install the generated deb file on packages.schwa.org.
read -r -p 'Copy deb file to packages.schwa.org? [y/N] ' response
if [[ ${response} =~ ^([yY][eE][sS]|[yY])$ ]]; then
scp ../libschwa*.deb deb@ch2:packages/ubuntu/pool/main/precise
ssh deb@ch2 'packages/ubuntu/update-amd64.sh precise'
LSB_RELEASE=$(lsb_release -c -s)
scp ../libschwa*.deb setup@ch2:/var/www/sites/packages/ubuntu/pool/main/${LSB_RELEASE}
ssh setup@ch2 "/var/www/sites/packages/ubuntu/update.sh"
fi
cp ../libschwa*.deb /tmp
21 changes: 21 additions & 0 deletions dist/libschwa.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
require "formula"

class Libschwa < Formula
homepage "https://github.com/schwa-lab/libschwa"
url "https://github.com/schwa-lab/libschwa/releases/download/0.2.2/libschwa-0.2.2.tar.gz"
sha1 "41d3e1a2dd3686da6ee0acb302e88eb84ca937bb"

depends_on "pkg-config"
depends_on "zeromq" => :optional

def install
system "./configure", "--prefix=#{prefix}"
system "make"
system "make", "check"
system "make", "install"
end

test do
system "#{bin}/schwa-tokenizer", "--version"
end
end
74 changes: 74 additions & 0 deletions m4/ax_check_compile_flag.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <[email protected]>
# Copyright (c) 2011 Maarten Bosmans <[email protected]>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.

#serial 3

AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS
Loading

0 comments on commit 8185763

Please sign in to comment.